diff options
Diffstat (limited to 'qemu/hw/virtio/vhost.c')
-rw-r--r-- | qemu/hw/virtio/vhost.c | 213 |
1 files changed, 164 insertions, 49 deletions
diff --git a/qemu/hw/virtio/vhost.c b/qemu/hw/virtio/vhost.c index 2712c6fc0..440071815 100644 --- a/qemu/hw/virtio/vhost.c +++ b/qemu/hw/virtio/vhost.c @@ -13,11 +13,14 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "hw/virtio/vhost.h" #include "hw/hw.h" #include "qemu/atomic.h" #include "qemu/range.h" #include "qemu/error-report.h" +#include "qemu/memfd.h" #include <linux/vhost.h> #include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" @@ -25,6 +28,23 @@ #include "migration/migration.h" static struct vhost_log *vhost_log; +static struct vhost_log *vhost_log_shm; + +static unsigned int used_memslots; +static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + +bool vhost_has_free_slot(void) +{ + unsigned int slots_limit = ~0U; + struct vhost_dev *hdev; + + QLIST_FOREACH(hdev, &vhost_devices, entry) { + unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); + slots_limit = MIN(slots_limit, r); + } + return slots_limit > used_memslots; +} static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, @@ -241,6 +261,13 @@ static void vhost_dev_assign_memory(struct vhost_dev *dev, continue; } + if (dev->vhost_ops->vhost_backend_can_merge && + !dev->vhost_ops->vhost_backend_can_merge(dev, uaddr, size, + reg->userspace_addr, + reg->memory_size)) { + continue; + } + if (merged) { --to; assert(to >= 0); @@ -286,25 +313,46 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) } return log_size; } -static struct vhost_log *vhost_log_alloc(uint64_t size) + +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) { - struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); + struct vhost_log *log; + uint64_t logsize = size * sizeof(*(log->log)); + int fd = -1; + + log = g_new0(struct vhost_log, 1); + if (share) { + log->log = qemu_memfd_alloc("vhost-log", logsize, + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, + &fd); + memset(log->log, 0, logsize); + } else { + log->log = g_malloc0(logsize); + } log->size = size; log->refcnt = 1; + log->fd = fd; return log; } -static struct vhost_log *vhost_log_get(uint64_t size) +static struct vhost_log *vhost_log_get(uint64_t size, bool share) { - if (!vhost_log || vhost_log->size != size) { - vhost_log = vhost_log_alloc(size); + struct vhost_log *log = share ? vhost_log_shm : vhost_log; + + if (!log || log->size != size) { + log = vhost_log_alloc(size, share); + if (share) { + vhost_log_shm = log; + } else { + vhost_log = log; + } } else { - ++vhost_log->refcnt; + ++log->refcnt; } - return vhost_log; + return log; } static void vhost_log_put(struct vhost_dev *dev, bool sync) @@ -321,20 +369,35 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) if (dev->log_size && sync) { vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } + if (vhost_log == log) { + g_free(log->log); vhost_log = NULL; + } else if (vhost_log_shm == log) { + qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), + log->fd); + vhost_log_shm = NULL; } + g_free(log); } } -static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) +static bool vhost_dev_log_is_shared(struct vhost_dev *dev) { - struct vhost_log *log = vhost_log_get(size); + return dev->vhost_ops->vhost_requires_shm_log && + dev->vhost_ops->vhost_requires_shm_log(dev); +} + +static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) +{ + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); + /* inform backend of log switching, this must be done before + releasing the current log, to ensure no logging is lost */ + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); assert(r >= 0); vhost_log_put(dev, true); dev->log = log; @@ -457,6 +520,7 @@ static void vhost_set_memory(MemoryListener *listener, dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); dev->memory_changed = true; + used_memslots = dev->mem->nregions; } static bool vhost_section(MemoryRegionSection *section) @@ -500,7 +564,7 @@ static void vhost_commit(MemoryListener *listener) } if (!dev->log_enabled) { - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); dev->memory_changed = false; return; @@ -513,7 +577,7 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size < log_size) { vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { @@ -581,7 +645,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; - int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); + int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); if (r < 0) { return -errno; } @@ -595,19 +659,20 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) if (enable_log) { features |= 0x1ULL << VHOST_F_LOG_ALL; } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); + r = dev->vhost_ops->vhost_set_features(dev, features); return r < 0 ? -errno : 0; } static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) { - int r, t, i; + int r, t, i, idx; r = vhost_dev_set_features(dev, enable_log); if (r < 0) { goto err_features; } for (i = 0; i < dev->nvqs; ++i) { - r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, + idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); + r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, enable_log); if (r < 0) { goto err_vq; @@ -616,7 +681,8 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) return 0; err_vq: for (; i >= 0; --i) { - t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, + idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); + t = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, dev->log_enabled); assert(t >= 0); } @@ -691,6 +757,27 @@ static void vhost_log_stop(MemoryListener *listener, /* FIXME: implement */ } +/* The vhost driver natively knows how to handle the vrings of non + * cross-endian legacy devices and modern devices. Only legacy devices + * exposed to a bi-endian guest may require the vhost driver to use a + * specific endianness. + */ +static inline bool vhost_needs_vring_endian(VirtIODevice *vdev) +{ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + return false; + } +#ifdef TARGET_IS_BIENDIAN +#ifdef HOST_WORDS_BIGENDIAN + return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE; +#else + return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG; +#endif +#else + return false; +#endif +} + static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, bool is_big_endian, int vhost_vq_index) @@ -700,7 +787,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, .num = is_big_endian }; - if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) { + if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) { return 0; } @@ -719,7 +806,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, { hwaddr s, l, a; int r; - int vhost_vq_index = idx - dev->vq_index; + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_file file = { .index = vhost_vq_index }; @@ -728,22 +815,20 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, }; struct VirtQueue *vvq = virtio_get_queue(vdev, idx); - assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); vq->num = state.num = virtio_queue_get_num(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); + r = dev->vhost_ops->vhost_set_vring_num(dev, &state); if (r) { return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_set_vring_base(dev, &state); if (r) { return -errno; } - if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) && - virtio_legacy_is_cross_endian(vdev)) { + if (vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, virtio_is_big_endian(vdev), vhost_vq_index); @@ -789,7 +874,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); + r = dev->vhost_ops->vhost_set_vring_kick(dev, &file); if (r) { r = -errno; goto fail_kick; @@ -798,6 +883,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, /* Clear and discard previous events if any. */ event_notifier_test_and_clear(&vq->masked_notifier); + /* Init vring in unmasked state, unless guest_notifier_mask + * will do it later. + */ + if (!vdev->use_guest_notifier_mask) { + /* TODO: check and handle errors. */ + vhost_virtqueue_mask(dev, vdev, idx, false); + } + return 0; fail_kick: @@ -822,13 +915,13 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx) { - int vhost_vq_index = idx - dev->vq_index; + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; int r; - assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); - r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); + + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); fflush(stderr); @@ -839,8 +932,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, /* In the cross-endian case, we need to reset the vring endianness to * native as legacy devices expect so by default. */ - if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) && - virtio_legacy_is_cross_endian(vdev)) { + if (vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, !virtio_is_big_endian(vdev), vhost_vq_index); @@ -875,8 +967,9 @@ static void vhost_eventfd_del(MemoryListener *listener, static int vhost_virtqueue_init(struct vhost_dev *dev, struct vhost_virtqueue *vq, int n) { + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n); struct vhost_vring_file file = { - .index = n, + .index = vhost_vq_index, }; int r = event_notifier_init(&vq->masked_notifier, 0); if (r < 0) { @@ -884,7 +977,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(&vq->masked_notifier); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); + r = dev->vhost_ops->vhost_set_vring_call(dev, &file); if (r) { r = -errno; goto fail_call; @@ -906,6 +999,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, uint64_t features; int i, r; + hdev->migration_blocker = NULL; + if (vhost_set_backend_type(hdev, backend_type) < 0) { close((uintptr_t)opaque); return -1; @@ -916,18 +1011,26 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, return -errno; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); + if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { + fprintf(stderr, "vhost backend memory slots limit is less" + " than current number of present memory slots\n"); + close((uintptr_t)opaque); + return -1; + } + QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); + + r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { goto fail; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); + r = hdev->vhost_ops->vhost_get_features(hdev, &features); if (r < 0) { goto fail; } for (i = 0; i < hdev->nvqs; ++i) { - r = vhost_virtqueue_init(hdev, hdev->vqs + i, i); + r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i); if (r < 0) { goto fail_vq; } @@ -949,12 +1052,21 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .eventfd_del = vhost_eventfd_del, .priority = 10 }; - hdev->migration_blocker = NULL; - if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { - error_setg(&hdev->migration_blocker, - "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + + if (hdev->migration_blocker == NULL) { + if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { + error_setg(&hdev->migration_blocker, + "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + } else if (!qemu_memfd_check()) { + error_setg(&hdev->migration_blocker, + "Migration disabled: failed to allocate shared memory"); + } + } + + if (hdev->migration_blocker != NULL) { migrate_add_blocker(hdev->migration_blocker); } + hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); hdev->n_mem_sections = 0; hdev->mem_sections = NULL; @@ -972,6 +1084,7 @@ fail_vq: fail: r = -errno; hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); return r; } @@ -989,6 +1102,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) g_free(hdev->mem); g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); } /* Stop processing guest IO notifications in qemu. @@ -1066,18 +1180,17 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, { struct VirtQueue *vvq = virtio_get_queue(vdev, n); int r, index = n - hdev->vq_index; + struct vhost_vring_file file; - assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); - - struct vhost_vring_file file = { - .index = index - }; if (mask) { + assert(vdev->use_guest_notifier_mask); file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier); } else { file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); + + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); assert(r >= 0); } @@ -1119,7 +1232,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { r = -errno; goto fail_mem; @@ -1138,10 +1251,12 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size); + hdev->log = vhost_log_get(hdev->log_size, + vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, - hdev->log_size ? &log_base : NULL); + r = hdev->vhost_ops->vhost_set_log_base(hdev, + hdev->log_size ? log_base : 0, + hdev->log); if (r < 0) { r = -errno; goto fail_log; |