summaryrefslogtreecommitdiffstats
path: root/qemu/hw/virtio/vhost.c
diff options
context:
space:
mode:
Diffstat (limited to 'qemu/hw/virtio/vhost.c')
-rw-r--r--qemu/hw/virtio/vhost.c213
1 files changed, 164 insertions, 49 deletions
diff --git a/qemu/hw/virtio/vhost.c b/qemu/hw/virtio/vhost.c
index 2712c6fc0..440071815 100644
--- a/qemu/hw/virtio/vhost.c
+++ b/qemu/hw/virtio/vhost.c
@@ -13,11 +13,14 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "hw/virtio/vhost.h"
#include "hw/hw.h"
#include "qemu/atomic.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
+#include "qemu/memfd.h"
#include <linux/vhost.h>
#include "exec/address-spaces.h"
#include "hw/virtio/virtio-bus.h"
@@ -25,6 +28,23 @@
#include "migration/migration.h"
static struct vhost_log *vhost_log;
+static struct vhost_log *vhost_log_shm;
+
+static unsigned int used_memslots;
+static QLIST_HEAD(, vhost_dev) vhost_devices =
+ QLIST_HEAD_INITIALIZER(vhost_devices);
+
+bool vhost_has_free_slot(void)
+{
+ unsigned int slots_limit = ~0U;
+ struct vhost_dev *hdev;
+
+ QLIST_FOREACH(hdev, &vhost_devices, entry) {
+ unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+ slots_limit = MIN(slots_limit, r);
+ }
+ return slots_limit > used_memslots;
+}
static void vhost_dev_sync_region(struct vhost_dev *dev,
MemoryRegionSection *section,
@@ -241,6 +261,13 @@ static void vhost_dev_assign_memory(struct vhost_dev *dev,
continue;
}
+ if (dev->vhost_ops->vhost_backend_can_merge &&
+ !dev->vhost_ops->vhost_backend_can_merge(dev, uaddr, size,
+ reg->userspace_addr,
+ reg->memory_size)) {
+ continue;
+ }
+
if (merged) {
--to;
assert(to >= 0);
@@ -286,25 +313,46 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev)
}
return log_size;
}
-static struct vhost_log *vhost_log_alloc(uint64_t size)
+
+static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
{
- struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log)));
+ struct vhost_log *log;
+ uint64_t logsize = size * sizeof(*(log->log));
+ int fd = -1;
+
+ log = g_new0(struct vhost_log, 1);
+ if (share) {
+ log->log = qemu_memfd_alloc("vhost-log", logsize,
+ F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+ &fd);
+ memset(log->log, 0, logsize);
+ } else {
+ log->log = g_malloc0(logsize);
+ }
log->size = size;
log->refcnt = 1;
+ log->fd = fd;
return log;
}
-static struct vhost_log *vhost_log_get(uint64_t size)
+static struct vhost_log *vhost_log_get(uint64_t size, bool share)
{
- if (!vhost_log || vhost_log->size != size) {
- vhost_log = vhost_log_alloc(size);
+ struct vhost_log *log = share ? vhost_log_shm : vhost_log;
+
+ if (!log || log->size != size) {
+ log = vhost_log_alloc(size, share);
+ if (share) {
+ vhost_log_shm = log;
+ } else {
+ vhost_log = log;
+ }
} else {
- ++vhost_log->refcnt;
+ ++log->refcnt;
}
- return vhost_log;
+ return log;
}
static void vhost_log_put(struct vhost_dev *dev, bool sync)
@@ -321,20 +369,35 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
if (dev->log_size && sync) {
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
}
+
if (vhost_log == log) {
+ g_free(log->log);
vhost_log = NULL;
+ } else if (vhost_log_shm == log) {
+ qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
+ log->fd);
+ vhost_log_shm = NULL;
}
+
g_free(log);
}
}
-static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
+static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
{
- struct vhost_log *log = vhost_log_get(size);
+ return dev->vhost_ops->vhost_requires_shm_log &&
+ dev->vhost_ops->vhost_requires_shm_log(dev);
+}
+
+static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
+{
+ struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
uint64_t log_base = (uintptr_t)log->log;
int r;
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base);
+ /* inform backend of log switching, this must be done before
+ releasing the current log, to ensure no logging is lost */
+ r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
assert(r >= 0);
vhost_log_put(dev, true);
dev->log = log;
@@ -457,6 +520,7 @@ static void vhost_set_memory(MemoryListener *listener,
dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr);
dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1);
dev->memory_changed = true;
+ used_memslots = dev->mem->nregions;
}
static bool vhost_section(MemoryRegionSection *section)
@@ -500,7 +564,7 @@ static void vhost_commit(MemoryListener *listener)
}
if (!dev->log_enabled) {
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
assert(r >= 0);
dev->memory_changed = false;
return;
@@ -513,7 +577,7 @@ static void vhost_commit(MemoryListener *listener)
if (dev->log_size < log_size) {
vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
}
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
assert(r >= 0);
/* To log less, can only decrease log size after table update. */
if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
@@ -581,7 +645,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
.log_guest_addr = vq->used_phys,
.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
};
- int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr);
+ int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
if (r < 0) {
return -errno;
}
@@ -595,19 +659,20 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
if (enable_log) {
features |= 0x1ULL << VHOST_F_LOG_ALL;
}
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features);
+ r = dev->vhost_ops->vhost_set_features(dev, features);
return r < 0 ? -errno : 0;
}
static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
{
- int r, t, i;
+ int r, t, i, idx;
r = vhost_dev_set_features(dev, enable_log);
if (r < 0) {
goto err_features;
}
for (i = 0; i < dev->nvqs; ++i) {
- r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
+ idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
+ r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
enable_log);
if (r < 0) {
goto err_vq;
@@ -616,7 +681,8 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
return 0;
err_vq:
for (; i >= 0; --i) {
- t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
+ idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
+ t = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
dev->log_enabled);
assert(t >= 0);
}
@@ -691,6 +757,27 @@ static void vhost_log_stop(MemoryListener *listener,
/* FIXME: implement */
}
+/* The vhost driver natively knows how to handle the vrings of non
+ * cross-endian legacy devices and modern devices. Only legacy devices
+ * exposed to a bi-endian guest may require the vhost driver to use a
+ * specific endianness.
+ */
+static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
+{
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ return false;
+ }
+#ifdef TARGET_IS_BIENDIAN
+#ifdef HOST_WORDS_BIGENDIAN
+ return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
+#else
+ return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
+#endif
+#else
+ return false;
+#endif
+}
+
static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
bool is_big_endian,
int vhost_vq_index)
@@ -700,7 +787,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
.num = is_big_endian
};
- if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) {
+ if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
return 0;
}
@@ -719,7 +806,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
{
hwaddr s, l, a;
int r;
- int vhost_vq_index = idx - dev->vq_index;
+ int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_file file = {
.index = vhost_vq_index
};
@@ -728,22 +815,20 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
};
struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
- assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
vq->num = state.num = virtio_queue_get_num(vdev, idx);
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state);
+ r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
if (r) {
return -errno;
}
state.num = virtio_queue_get_last_avail_idx(vdev, idx);
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state);
+ r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
if (r) {
return -errno;
}
- if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) &&
- virtio_legacy_is_cross_endian(vdev)) {
+ if (vhost_needs_vring_endian(vdev)) {
r = vhost_virtqueue_set_vring_endian_legacy(dev,
virtio_is_big_endian(vdev),
vhost_vq_index);
@@ -789,7 +874,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file);
+ r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
if (r) {
r = -errno;
goto fail_kick;
@@ -798,6 +883,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
/* Clear and discard previous events if any. */
event_notifier_test_and_clear(&vq->masked_notifier);
+ /* Init vring in unmasked state, unless guest_notifier_mask
+ * will do it later.
+ */
+ if (!vdev->use_guest_notifier_mask) {
+ /* TODO: check and handle errors. */
+ vhost_virtqueue_mask(dev, vdev, idx, false);
+ }
+
return 0;
fail_kick:
@@ -822,13 +915,13 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct vhost_virtqueue *vq,
unsigned idx)
{
- int vhost_vq_index = idx - dev->vq_index;
+ int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
.index = vhost_vq_index,
};
int r;
- assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
- r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state);
+
+ r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
fflush(stderr);
@@ -839,8 +932,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
/* In the cross-endian case, we need to reset the vring endianness to
* native as legacy devices expect so by default.
*/
- if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) &&
- virtio_legacy_is_cross_endian(vdev)) {
+ if (vhost_needs_vring_endian(vdev)) {
r = vhost_virtqueue_set_vring_endian_legacy(dev,
!virtio_is_big_endian(vdev),
vhost_vq_index);
@@ -875,8 +967,9 @@ static void vhost_eventfd_del(MemoryListener *listener,
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_virtqueue *vq, int n)
{
+ int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
struct vhost_vring_file file = {
- .index = n,
+ .index = vhost_vq_index,
};
int r = event_notifier_init(&vq->masked_notifier, 0);
if (r < 0) {
@@ -884,7 +977,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
- r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file);
+ r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
if (r) {
r = -errno;
goto fail_call;
@@ -906,6 +999,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
uint64_t features;
int i, r;
+ hdev->migration_blocker = NULL;
+
if (vhost_set_backend_type(hdev, backend_type) < 0) {
close((uintptr_t)opaque);
return -1;
@@ -916,18 +1011,26 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
return -errno;
}
- r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
+ if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
+ fprintf(stderr, "vhost backend memory slots limit is less"
+ " than current number of present memory slots\n");
+ close((uintptr_t)opaque);
+ return -1;
+ }
+ QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
+
+ r = hdev->vhost_ops->vhost_set_owner(hdev);
if (r < 0) {
goto fail;
}
- r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features);
+ r = hdev->vhost_ops->vhost_get_features(hdev, &features);
if (r < 0) {
goto fail;
}
for (i = 0; i < hdev->nvqs; ++i) {
- r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
+ r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
if (r < 0) {
goto fail_vq;
}
@@ -949,12 +1052,21 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
.eventfd_del = vhost_eventfd_del,
.priority = 10
};
- hdev->migration_blocker = NULL;
- if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
- error_setg(&hdev->migration_blocker,
- "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
+
+ if (hdev->migration_blocker == NULL) {
+ if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
+ error_setg(&hdev->migration_blocker,
+ "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
+ } else if (!qemu_memfd_check()) {
+ error_setg(&hdev->migration_blocker,
+ "Migration disabled: failed to allocate shared memory");
+ }
+ }
+
+ if (hdev->migration_blocker != NULL) {
migrate_add_blocker(hdev->migration_blocker);
}
+
hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
hdev->n_mem_sections = 0;
hdev->mem_sections = NULL;
@@ -972,6 +1084,7 @@ fail_vq:
fail:
r = -errno;
hdev->vhost_ops->vhost_backend_cleanup(hdev);
+ QLIST_REMOVE(hdev, entry);
return r;
}
@@ -989,6 +1102,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
g_free(hdev->mem);
g_free(hdev->mem_sections);
hdev->vhost_ops->vhost_backend_cleanup(hdev);
+ QLIST_REMOVE(hdev, entry);
}
/* Stop processing guest IO notifications in qemu.
@@ -1066,18 +1180,17 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
{
struct VirtQueue *vvq = virtio_get_queue(vdev, n);
int r, index = n - hdev->vq_index;
+ struct vhost_vring_file file;
- assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
-
- struct vhost_vring_file file = {
- .index = index
- };
if (mask) {
+ assert(vdev->use_guest_notifier_mask);
file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
- r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file);
+
+ file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
+ r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
assert(r >= 0);
}
@@ -1119,7 +1232,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
if (r < 0) {
goto fail_features;
}
- r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
+ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
if (r < 0) {
r = -errno;
goto fail_mem;
@@ -1138,10 +1251,12 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
uint64_t log_base;
hdev->log_size = vhost_get_log_size(hdev);
- hdev->log = vhost_log_get(hdev->log_size);
+ hdev->log = vhost_log_get(hdev->log_size,
+ vhost_dev_log_is_shared(hdev));
log_base = (uintptr_t)hdev->log->log;
- r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
- hdev->log_size ? &log_base : NULL);
+ r = hdev->vhost_ops->vhost_set_log_base(hdev,
+ hdev->log_size ? log_base : 0,
+ hdev->log);
if (r < 0) {
r = -errno;
goto fail_log;