diff options
Diffstat (limited to 'hw/block/virtio-blk.c')
-rw-r--r-- | hw/block/virtio-blk.c | 516 |
1 files changed, 341 insertions, 175 deletions
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b19b102b42..1556c9cf5a 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -33,7 +33,9 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; req->qiov.size = 0; + req->in_len = 0; req->next = NULL; + req->mr_next = NULL; return req; } @@ -53,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, &req->elem, req->in_len); virtio_notify(vdev, s->vq); } @@ -84,20 +86,40 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, static void virtio_blk_rw_complete(void *opaque, int ret) { - VirtIOBlockReq *req = opaque; + VirtIOBlockReq *next = opaque; - trace_virtio_blk_rw_complete(req, ret); + while (next) { + VirtIOBlockReq *req = next; + next = req->mr_next; + trace_virtio_blk_rw_complete(req, ret); - if (ret) { - int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); - bool is_read = !(p & VIRTIO_BLK_T_OUT); - if (virtio_blk_handle_rw_error(req, -ret, is_read)) - return; - } + if (req->qiov.nalloc != -1) { + /* If nalloc is != 1 req->qiov is a local copy of the original + * external iovec. It was allocated in submit_merged_requests + * to be able to merge requests. */ + qemu_iovec_destroy(&req->qiov); + } - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); - block_acct_done(blk_get_stats(req->dev->blk), &req->acct); - virtio_blk_free_request(req); + if (ret) { + int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); + bool is_read = !(p & VIRTIO_BLK_T_OUT); + /* Note that memory may be dirtied on read failure. If the + * virtio request is not completed here, as is the case for + * BLOCK_ERROR_ACTION_STOP, the memory may not be copied + * correctly during live migration. While this is ugly, + * it is acceptable because the device is free to write to + * the memory until the request is completed (which will + * happen on the other side of the migration). + */ + if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + continue; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(req->dev->blk), &req->acct); + virtio_blk_free_request(req); + } } static void virtio_blk_flush_complete(void *opaque, int ret) @@ -115,6 +137,56 @@ static void virtio_blk_flush_complete(void *opaque, int ret) virtio_blk_free_request(req); } +#ifdef __linux__ + +typedef struct { + VirtIOBlockReq *req; + struct sg_io_hdr hdr; +} VirtIOBlockIoctlReq; + +static void virtio_blk_ioctl_complete(void *opaque, int status) +{ + VirtIOBlockIoctlReq *ioctl_req = opaque; + VirtIOBlockReq *req = ioctl_req->req; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + struct virtio_scsi_inhdr *scsi; + struct sg_io_hdr *hdr; + + scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; + + if (status) { + status = VIRTIO_BLK_S_UNSUPP; + virtio_stl_p(vdev, &scsi->errors, 255); + goto out; + } + + hdr = &ioctl_req->hdr; + /* + * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) + * clear the masked_status field [hence status gets cleared too, see + * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED + * status has occurred. However they do set DRIVER_SENSE in driver_status + * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. + */ + if (hdr->status == 0 && hdr->sb_len_wr > 0) { + hdr->status = CHECK_CONDITION; + } + + virtio_stl_p(vdev, &scsi->errors, + hdr->status | (hdr->msg_status << 8) | + (hdr->host_status << 16) | (hdr->driver_status << 24)); + virtio_stl_p(vdev, &scsi->residual, hdr->resid); + virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); + virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); + +out: + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); + g_free(ioctl_req); +} + +#endif + static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); @@ -127,16 +199,18 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) return req; } -int virtio_blk_handle_scsi_req(VirtIOBlock *blk, - VirtQueueElement *elem) +static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) { int status = VIRTIO_BLK_S_OK; struct virtio_scsi_inhdr *scsi = NULL; - VirtIODevice *vdev = VIRTIO_DEVICE(blk); + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + VirtQueueElement *elem = &req->elem; + VirtIOBlock *blk = req->dev; #ifdef __linux__ int i; - struct sg_io_hdr hdr; + VirtIOBlockIoctlReq *ioctl_req; + BlockAIOCB *acb; #endif /* @@ -171,71 +245,57 @@ int virtio_blk_handle_scsi_req(VirtIOBlock *blk, } #ifdef __linux__ - memset(&hdr, 0, sizeof(struct sg_io_hdr)); - hdr.interface_id = 'S'; - hdr.cmd_len = elem->out_sg[1].iov_len; - hdr.cmdp = elem->out_sg[1].iov_base; - hdr.dxfer_len = 0; + ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); + ioctl_req->req = req; + ioctl_req->hdr.interface_id = 'S'; + ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; + ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; + ioctl_req->hdr.dxfer_len = 0; if (elem->out_num > 2) { /* * If there are more than the minimally required 2 output segments * there is write payload starting from the third iovec. */ - hdr.dxfer_direction = SG_DXFER_TO_DEV; - hdr.iovec_count = elem->out_num - 2; + ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; + ioctl_req->hdr.iovec_count = elem->out_num - 2; - for (i = 0; i < hdr.iovec_count; i++) - hdr.dxfer_len += elem->out_sg[i + 2].iov_len; + for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { + ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; + } - hdr.dxferp = elem->out_sg + 2; + ioctl_req->hdr.dxferp = elem->out_sg + 2; } else if (elem->in_num > 3) { /* * If we have more than 3 input segments the guest wants to actually * read data. */ - hdr.dxfer_direction = SG_DXFER_FROM_DEV; - hdr.iovec_count = elem->in_num - 3; - for (i = 0; i < hdr.iovec_count; i++) - hdr.dxfer_len += elem->in_sg[i].iov_len; + ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; + ioctl_req->hdr.iovec_count = elem->in_num - 3; + for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { + ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; + } - hdr.dxferp = elem->in_sg; + ioctl_req->hdr.dxferp = elem->in_sg; } else { /* * Some SCSI commands don't actually transfer any data. */ - hdr.dxfer_direction = SG_DXFER_NONE; + ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; } - hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; - hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; + ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; + ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; - status = blk_ioctl(blk->blk, SG_IO, &hdr); - if (status) { + acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, + virtio_blk_ioctl_complete, ioctl_req); + if (!acb) { + g_free(ioctl_req); status = VIRTIO_BLK_S_UNSUPP; goto fail; } - - /* - * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) - * clear the masked_status field [hence status gets cleared too, see - * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED - * status has occurred. However they do set DRIVER_SENSE in driver_status - * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. - */ - if (hdr.status == 0 && hdr.sb_len_wr > 0) { - hdr.status = CHECK_CONDITION; - } - - virtio_stl_p(vdev, &scsi->errors, - hdr.status | (hdr.msg_status << 8) | - (hdr.host_status << 16) | (hdr.driver_status << 24)); - virtio_stl_p(vdev, &scsi->residual, hdr.resid); - virtio_stl_p(vdev, &scsi->sense_len, hdr.sb_len_wr); - virtio_stl_p(vdev, &scsi->data_len, hdr.dxfer_len); - - return status; + return -EINPROGRESS; #else abort(); #endif @@ -252,29 +312,134 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) { int status; - status = virtio_blk_handle_scsi_req(req->dev, &req->elem); - virtio_blk_req_complete(req, status); - virtio_blk_free_request(req); + status = virtio_blk_handle_scsi_req(req); + if (status != -EINPROGRESS) { + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); + } +} + +static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, + int start, int num_reqs, int niov) +{ + QEMUIOVector *qiov = &mrb->reqs[start]->qiov; + int64_t sector_num = mrb->reqs[start]->sector_num; + int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; + bool is_write = mrb->is_write; + + if (num_reqs > 1) { + int i; + struct iovec *tmp_iov = qiov->iov; + int tmp_niov = qiov->niov; + + /* mrb->reqs[start]->qiov was initialized from external so we can't + * modifiy it here. We need to initialize it locally and then add the + * external iovecs. */ + qemu_iovec_init(qiov, niov); + + for (i = 0; i < tmp_niov; i++) { + qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); + } + + for (i = start + 1; i < start + num_reqs; i++) { + qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, + mrb->reqs[i]->qiov.size); + mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; + nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; + } + assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); + + trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num, + nb_sectors, is_write); + block_acct_merge_done(blk_get_stats(blk), + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, + num_reqs - 1); + } + + if (is_write) { + blk_aio_writev(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } else { + blk_aio_readv(blk, sector_num, qiov, nb_sectors, + virtio_blk_rw_complete, mrb->reqs[start]); + } } -void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb) +static int multireq_compare(const void *a, const void *b) { - int i, ret; + const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, + *req2 = *(VirtIOBlockReq **)b; - if (!mrb->num_writes) { + /* + * Note that we can't simply subtract sector_num1 from sector_num2 + * here as that could overflow the return value. + */ + if (req1->sector_num > req2->sector_num) { + return 1; + } else if (req1->sector_num < req2->sector_num) { + return -1; + } else { + return 0; + } +} + +void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) +{ + int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; + int max_xfer_len = 0; + int64_t sector_num = 0; + + if (mrb->num_reqs == 1) { + submit_requests(blk, mrb, 0, 1, -1); + mrb->num_reqs = 0; return; } - ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes); - if (ret != 0) { - for (i = 0; i < mrb->num_writes; i++) { - if (mrb->blkreq[i].error) { - virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO); + max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk); + max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS); + + qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), + &multireq_compare); + + for (i = 0; i < mrb->num_reqs; i++) { + VirtIOBlockReq *req = mrb->reqs[i]; + if (num_reqs > 0) { + bool merge = true; + + /* merge would exceed maximum number of IOVs */ + if (niov + req->qiov.niov > IOV_MAX) { + merge = false; + } + + /* merge would exceed maximum transfer length of backend device */ + if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) { + merge = false; + } + + /* requests are not sequential */ + if (sector_num + nb_sectors != req->sector_num) { + merge = false; + } + + if (!merge) { + submit_requests(blk, mrb, start, num_reqs, niov); + num_reqs = 0; } } + + if (num_reqs == 0) { + sector_num = req->sector_num; + nb_sectors = niov = 0; + start = i; + } + + nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; + niov += req->qiov.niov; + num_reqs++; } - mrb->num_writes = 0; + submit_requests(blk, mrb, start, num_reqs, niov); + mrb->num_reqs = 0; } static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) @@ -285,7 +450,9 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) /* * Make sure all outstanding writes are posted to the backing device. */ - virtio_submit_multiwrite(req->dev->blk, mrb); + if (mrb->is_write && mrb->num_reqs > 0) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req); } @@ -295,6 +462,9 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; uint64_t total_sectors; + if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return false; + } if (sector & dev->sector_mask) { return false; } @@ -308,60 +478,6 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, return true; } -static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) -{ - BlockRequest *blkreq; - uint64_t sector; - - sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - - trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); - - if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - virtio_blk_free_request(req); - return; - } - - block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, - BLOCK_ACCT_WRITE); - - if (mrb->num_writes == 32) { - virtio_submit_multiwrite(req->dev->blk, mrb); - } - - blkreq = &mrb->blkreq[mrb->num_writes]; - blkreq->sector = sector; - blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE; - blkreq->qiov = &req->qiov; - blkreq->cb = virtio_blk_rw_complete; - blkreq->opaque = req; - blkreq->error = 0; - - mrb->num_writes++; -} - -static void virtio_blk_handle_read(VirtIOBlockReq *req) -{ - uint64_t sector; - - sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - - trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); - - if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - virtio_blk_free_request(req); - return; - } - - block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size, - BLOCK_ACCT_READ); - blk_aio_readv(req->dev->blk, sector, &req->qiov, - req->qiov.size / BDRV_SECTOR_SIZE, - virtio_blk_rw_complete, req); -} - void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; @@ -383,12 +499,13 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) iov_discard_front(&iov, &out_num, sizeof(req->out)); - if (in_num < 1 || - in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { error_report("virtio-blk request inhdr too short"); exit(1); } + /* We always touch the last byte, so just see how big in_iov is. */ + req->in_len = iov_size(in_iov, in_num); req->in = (void *)in_iov[in_num - 1].iov_base + in_iov[in_num - 1].iov_len - sizeof(struct virtio_blk_inhdr); @@ -396,11 +513,58 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); - if (type & VIRTIO_BLK_T_FLUSH) { + /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER + * is an optional flag. Although a guest should not send this flag if + * not negotiated we ignored it in the past. So keep ignoring it. */ + switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { + case VIRTIO_BLK_T_IN: + { + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev), + &req->out.sector); + + if (is_write) { + qemu_iovec_init_external(&req->qiov, iov, out_num); + trace_virtio_blk_handle_write(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } else { + qemu_iovec_init_external(&req->qiov, in_iov, in_num); + trace_virtio_blk_handle_read(req, req->sector_num, + req->qiov.size / BDRV_SECTOR_SIZE); + } + + if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, + req->qiov.size)) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + virtio_blk_free_request(req); + return; + } + + block_acct_start(blk_get_stats(req->dev->blk), + &req->acct, req->qiov.size, + is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + + /* merge would exceed maximum number of requests or IO direction + * changes */ + if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || + is_write != mrb->is_write || + !req->dev->conf.request_merging)) { + virtio_blk_submit_multireq(req->dev->blk, mrb); + } + + assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); + mrb->reqs[mrb->num_reqs++] = req; + mrb->is_write = is_write; + break; + } + case VIRTIO_BLK_T_FLUSH: virtio_blk_handle_flush(req, mrb); - } else if (type & VIRTIO_BLK_T_SCSI_CMD) { + break; + case VIRTIO_BLK_T_SCSI_CMD: virtio_blk_handle_scsi(req); - } else if (type & VIRTIO_BLK_T_GET_ID) { + break; + case VIRTIO_BLK_T_GET_ID: + { VirtIOBlock *s = req->dev; /* @@ -414,14 +578,9 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) iov_from_buf(in_iov, in_num, 0, serial, size); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); virtio_blk_free_request(req); - } else if (type & VIRTIO_BLK_T_OUT) { - qemu_iovec_init_external(&req->qiov, iov, out_num); - virtio_blk_handle_write(req, mrb); - } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { - /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ - qemu_iovec_init_external(&req->qiov, in_iov, in_num); - virtio_blk_handle_read(req); - } else { + break; + } + default: virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); virtio_blk_free_request(req); } @@ -431,9 +590,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBlock *s = VIRTIO_BLK(vdev); VirtIOBlockReq *req; - MultiReqBuffer mrb = { - .num_writes = 0, - }; + MultiReqBuffer mrb = {}; /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start * dataplane here instead of waiting for .set_status(). @@ -447,22 +604,16 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_request(req, &mrb); } - virtio_submit_multiwrite(s->blk, &mrb); - - /* - * FIXME: Want to check for completions before returning to guest mode, - * so cached reads and writes are reported as quickly as possible. But - * that should be done in the generic block layer. - */ + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } } static void virtio_blk_dma_restart_bh(void *opaque) { VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = { - .num_writes = 0, - }; + MultiReqBuffer mrb = {}; qemu_bh_delete(s->bh); s->bh = NULL; @@ -475,7 +626,9 @@ static void virtio_blk_dma_restart_bh(void *opaque) req = next; } - virtio_submit_multiwrite(s->blk, &mrb); + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } } static void virtio_blk_dma_restart_cb(void *opaque, int running, @@ -497,16 +650,21 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running, static void virtio_blk_reset(VirtIODevice *vdev) { VirtIOBlock *s = VIRTIO_BLK(vdev); - - if (s->dataplane) { - virtio_blk_data_plane_stop(s->dataplane); - } + AioContext *ctx; /* * This should cancel pending requests, but can't do nicely until there * are per-device request lists. */ - blk_drain_all(); + ctx = blk_get_aio_context(s->blk); + aio_context_acquire(ctx); + blk_drain(s->blk); + + if (s->dataplane) { + virtio_blk_data_plane_stop(s->dataplane); + } + aio_context_release(ctx); + blk_set_enable_write_cache(s->blk, s->original_wce); } @@ -524,11 +682,11 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) memset(&blkcfg, 0, sizeof(blkcfg)); virtio_stq_p(vdev, &blkcfg.capacity, capacity); virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); - virtio_stw_p(vdev, &blkcfg.cylinders, conf->cyls); + virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); - blkcfg.heads = conf->heads; + blkcfg.geometry.heads = conf->heads; /* * We must ensure that the block device capacity is a multiple of * the logical block size. If that is not the case, let's use @@ -541,9 +699,9 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) * per track (cylinder). */ if (blk_getlength(s->blk) / conf->heads / conf->secs % blk_size) { - blkcfg.sectors = conf->secs & ~s->sector_mask; + blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; } else { - blkcfg.sectors = conf->secs; + blkcfg.geometry.sectors = conf->secs; } blkcfg.size_max = 0; blkcfg.physical_block_exp = get_physical_block_exp(conf); @@ -564,24 +722,33 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) aio_context_release(blk_get_aio_context(s->blk)); } -static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) +static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) { VirtIOBlock *s = VIRTIO_BLK(vdev); - features |= (1 << VIRTIO_BLK_F_SEG_MAX); - features |= (1 << VIRTIO_BLK_F_GEOMETRY); - features |= (1 << VIRTIO_BLK_F_TOPOLOGY); - features |= (1 << VIRTIO_BLK_F_BLK_SIZE); - features |= (1 << VIRTIO_BLK_F_SCSI); + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); + virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); + if (__virtio_has_feature(features, VIRTIO_F_VERSION_1)) { + if (s->conf.scsi) { + error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); + return 0; + } + } else { + virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); + virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); + } if (s->conf.config_wce) { - features |= (1 << VIRTIO_BLK_F_CONFIG_WCE); + virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); } if (blk_enable_write_cache(s->blk)) { - features |= (1 << VIRTIO_BLK_F_WCE); + virtio_add_feature(&features, VIRTIO_BLK_F_WCE); } if (blk_is_read_only(s->blk)) { - features |= 1 << VIRTIO_BLK_F_RO; + virtio_add_feature(&features, VIRTIO_BLK_F_RO); } return features; @@ -590,7 +757,6 @@ static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features) static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBlock *s = VIRTIO_BLK(vdev); - uint32_t features; if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { @@ -601,8 +767,6 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) return; } - features = vdev->guest_features; - /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send * cache flushes. Thus, the "auto writethrough" behavior is never * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. @@ -618,10 +782,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) * * s->blk would erroneously be placed in writethrough mode. */ - if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) { + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { aio_context_acquire(blk_get_aio_context(s->blk)); blk_set_enable_write_cache(s->blk, - !!(features & (1 << VIRTIO_BLK_F_WCE))); + virtio_has_feature(vdev, VIRTIO_BLK_F_WCE)); aio_context_release(blk_get_aio_context(s->blk)); } } @@ -715,8 +879,7 @@ static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->conf, &s->dataplane, &err); if (err != NULL) { - error_report("%s", error_get_pretty(err)); - error_free(err); + error_report_err(err); } } } @@ -745,6 +908,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) error_propagate(errp, err); return; } + blkconf_blocksizes(&conf->conf); virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, sizeof(struct virtio_blk_config)); @@ -808,6 +972,8 @@ static Property virtio_blk_properties[] = { #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), #endif + DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, + true), DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false), DEFINE_PROP_END_OF_LIST(), }; |