summaryrefslogtreecommitdiffstats
path: root/qemu/hw/virtio/virtio.c
diff options
context:
space:
mode:
Diffstat (limited to 'qemu/hw/virtio/virtio.c')
-rw-r--r--qemu/hw/virtio/virtio.c562
1 files changed, 425 insertions, 137 deletions
diff --git a/qemu/hw/virtio/virtio.c b/qemu/hw/virtio/virtio.c
index 788b556a7..30ede3d1c 100644
--- a/qemu/hw/virtio/virtio.c
+++ b/qemu/hw/virtio/virtio.c
@@ -11,8 +11,10 @@
*
*/
-#include <inttypes.h>
-
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "cpu.h"
#include "trace.h"
#include "exec/address-spaces.h"
#include "qemu/error-report.h"
@@ -60,6 +62,7 @@ typedef struct VRingUsed
typedef struct VRing
{
unsigned int num;
+ unsigned int num_default;
unsigned int align;
hwaddr desc;
hwaddr avail;
@@ -69,7 +72,15 @@ typedef struct VRing
struct VirtQueue
{
VRing vring;
+
+ /* Next head to pop */
uint16_t last_avail_idx;
+
+ /* Last avail_idx read from VQ. */
+ uint16_t shadow_avail_idx;
+
+ uint16_t used_idx;
+
/* Last used index value we have signalled on */
uint16_t signalled_used;
@@ -85,6 +96,7 @@ struct VirtQueue
uint16_t vector;
void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+ void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq);
VirtIODevice *vdev;
EventNotifier guest_notifier;
EventNotifier host_notifier;
@@ -106,35 +118,15 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
vring->align);
}
-static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
- int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
- return virtio_ldq_phys(vdev, pa);
-}
-
-static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)
+static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
+ hwaddr desc_pa, int i)
{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
- return virtio_ldl_phys(vdev, pa);
-}
-
-static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,
- int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
- return virtio_lduw_phys(vdev, pa);
-}
-
-static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,
- int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
- return virtio_lduw_phys(vdev, pa);
+ address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
+ MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
+ virtio_tswap64s(vdev, &desc->addr);
+ virtio_tswap32s(vdev, &desc->len);
+ virtio_tswap16s(vdev, &desc->flags);
+ virtio_tswap16s(vdev, &desc->next);
}
static inline uint16_t vring_avail_flags(VirtQueue *vq)
@@ -148,7 +140,8 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
hwaddr pa;
pa = vq->vring.avail + offsetof(VRingAvail, idx);
- return virtio_lduw_phys(vq->vdev, pa);
+ vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
+ return vq->shadow_avail_idx;
}
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
@@ -163,18 +156,15 @@ static inline uint16_t vring_get_used_event(VirtQueue *vq)
return vring_avail_ring(vq, vq->vring.num);
}
-static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
- virtio_stl_phys(vq->vdev, pa, val);
-}
-
-static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
+static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
+ int i)
{
hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
- virtio_stl_phys(vq->vdev, pa, val);
+ virtio_tswap32s(vq->vdev, &uelem->id);
+ virtio_tswap32s(vq->vdev, &uelem->len);
+ pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
+ address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
+ (void *)uelem, sizeof(VRingUsedElem));
}
static uint16_t vring_used_idx(VirtQueue *vq)
@@ -189,6 +179,7 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, idx);
virtio_stw_phys(vq->vdev, pa, val);
+ vq->used_idx = val;
}
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
@@ -220,7 +211,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
vq->notification = enable;
- if (virtio_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
@@ -238,19 +229,23 @@ int virtio_queue_ready(VirtQueue *vq)
return vq->vring.avail != 0;
}
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers. */
int virtio_queue_empty(VirtQueue *vq)
{
+ if (vq->shadow_avail_idx != vq->last_avail_idx) {
+ return 0;
+ }
+
return vring_avail_idx(vq) == vq->last_avail_idx;
}
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
- unsigned int len, unsigned int idx)
+static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len)
{
unsigned int offset;
int i;
- trace_virtqueue_fill(vq, elem, len, idx);
-
offset = 0;
for (i = 0; i < elem->in_num; i++) {
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
@@ -266,12 +261,29 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
elem->out_sg[i].iov_len,
0, elem->out_sg[i].iov_len);
+}
- idx = (idx + vring_used_idx(vq)) % vq->vring.num;
+void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len)
+{
+ vq->last_avail_idx--;
+ virtqueue_unmap_sg(vq, elem, len);
+}
- /* Get a pointer to the next entry in the used ring. */
- vring_used_ring_id(vq, idx, elem->index);
- vring_used_ring_len(vq, idx, len);
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len, unsigned int idx)
+{
+ VRingUsedElem uelem;
+
+ trace_virtqueue_fill(vq, elem, len, idx);
+
+ virtqueue_unmap_sg(vq, elem, len);
+
+ idx = (idx + vq->used_idx) % vq->vring.num;
+
+ uelem.id = elem->index;
+ uelem.len = len;
+ vring_used_write(vq, &uelem, idx);
}
void virtqueue_flush(VirtQueue *vq, unsigned int count)
@@ -280,7 +292,7 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
/* Make sure buffer is written before we update index. */
smp_wmb();
trace_virtqueue_flush(vq, count);
- old = vring_used_idx(vq);
+ old = vq->used_idx;
new = old + count;
vring_used_idx_set(vq, new);
vq->inuse -= count;
@@ -302,7 +314,7 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
/* Check it isn't doing very strange things with descriptor numbers. */
if (num_heads > vq->vring.num) {
error_report("Guest moved used index from %u to %u",
- idx, vring_avail_idx(vq));
+ idx, vq->shadow_avail_idx);
exit(1);
}
/* On success, callers read a descriptor at vq->last_avail_idx.
@@ -331,18 +343,18 @@ static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
return head;
}
-static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa,
- unsigned int i, unsigned int max)
+static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
+ hwaddr desc_pa, unsigned int max)
{
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
- if (!(vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_NEXT)) {
+ if (!(desc->flags & VRING_DESC_F_NEXT)) {
return max;
}
/* Check they're not leading us off end of descriptors. */
- next = vring_desc_next(vdev, desc_pa, i);
+ next = desc->next;
/* Make sure compiler knows to grab that: we don't want it changing! */
smp_wmb();
@@ -351,6 +363,7 @@ static unsigned virtqueue_next_desc(VirtIODevice *vdev, hwaddr desc_pa,
exit(1);
}
+ vring_desc_read(vdev, desc, desc_pa, next);
return next;
}
@@ -367,6 +380,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
while (virtqueue_num_heads(vq, idx)) {
VirtIODevice *vdev = vq->vdev;
unsigned int max, num_bufs, indirect = 0;
+ VRingDesc desc;
hwaddr desc_pa;
int i;
@@ -374,9 +388,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
num_bufs = total_bufs;
i = virtqueue_get_head(vq, idx++);
desc_pa = vq->vring.desc;
+ vring_desc_read(vdev, &desc, desc_pa, i);
- if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ if (desc.len % sizeof(VRingDesc)) {
error_report("Invalid size for indirect buffer table");
exit(1);
}
@@ -389,9 +404,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
/* loop over the indirect descriptor table */
indirect = 1;
- max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
- desc_pa = vring_desc_addr(vdev, desc_pa, i);
+ max = desc.len / sizeof(VRingDesc);
+ desc_pa = desc.addr;
num_bufs = i = 0;
+ vring_desc_read(vdev, &desc, desc_pa, i);
}
do {
@@ -401,15 +417,15 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
exit(1);
}
- if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
- in_total += vring_desc_len(vdev, desc_pa, i);
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ in_total += desc.len;
} else {
- out_total += vring_desc_len(vdev, desc_pa, i);
+ out_total += desc.len;
}
if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
goto done;
}
- } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
+ } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
if (!indirect)
total_bufs = num_bufs;
@@ -434,98 +450,256 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
return in_bytes <= in_total && out_bytes <= out_total;
}
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
- size_t num_sg, int is_write)
+static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
+ unsigned int max_num_sg, bool is_write,
+ hwaddr pa, size_t sz)
+{
+ unsigned num_sg = *p_num_sg;
+ assert(num_sg <= max_num_sg);
+
+ while (sz) {
+ hwaddr len = sz;
+
+ if (num_sg == max_num_sg) {
+ error_report("virtio: too many write descriptors in indirect table");
+ exit(1);
+ }
+
+ iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+ iov[num_sg].iov_len = len;
+ addr[num_sg] = pa;
+
+ sz -= len;
+ pa += len;
+ num_sg++;
+ }
+ *p_num_sg = num_sg;
+}
+
+static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
+ unsigned int *num_sg, unsigned int max_size,
+ int is_write)
{
unsigned int i;
hwaddr len;
- if (num_sg > VIRTQUEUE_MAX_SIZE) {
- error_report("virtio: map attempt out of bounds: %zd > %d",
- num_sg, VIRTQUEUE_MAX_SIZE);
- exit(1);
- }
+ /* Note: this function MUST validate input, some callers
+ * are passing in num_sg values received over the network.
+ */
+ /* TODO: teach all callers that this can fail, and return failure instead
+ * of asserting here.
+ * When we do, we might be able to re-enable NDEBUG below.
+ */
+#ifdef NDEBUG
+#error building with NDEBUG is not supported
+#endif
+ assert(*num_sg <= max_size);
- for (i = 0; i < num_sg; i++) {
+ for (i = 0; i < *num_sg; i++) {
len = sg[i].iov_len;
sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
- if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+ if (!sg[i].iov_base) {
error_report("virtio: error trying to map MMIO memory");
exit(1);
}
+ if (len != sg[i].iov_len) {
+ error_report("virtio: unexpected memory split");
+ exit(1);
+ }
}
}
-int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+void virtqueue_map(VirtQueueElement *elem)
+{
+ virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
+ VIRTQUEUE_MAX_SIZE, 1);
+ virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
+ VIRTQUEUE_MAX_SIZE, 0);
+}
+
+void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
+{
+ VirtQueueElement *elem;
+ size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
+ size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
+ size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
+ size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
+ size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+ size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+ assert(sz >= sizeof(VirtQueueElement));
+ elem = g_malloc(out_sg_end);
+ elem->out_num = out_num;
+ elem->in_num = in_num;
+ elem->in_addr = (void *)elem + in_addr_ofs;
+ elem->out_addr = (void *)elem + out_addr_ofs;
+ elem->in_sg = (void *)elem + in_sg_ofs;
+ elem->out_sg = (void *)elem + out_sg_ofs;
+ return elem;
+}
+
+void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
unsigned int i, head, max;
hwaddr desc_pa = vq->vring.desc;
VirtIODevice *vdev = vq->vdev;
+ VirtQueueElement *elem;
+ unsigned out_num, in_num;
+ hwaddr addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ VRingDesc desc;
- if (!virtqueue_num_heads(vq, vq->last_avail_idx))
- return 0;
+ if (virtio_queue_empty(vq)) {
+ return NULL;
+ }
+ /* Needed after virtio_queue_empty(), see comment in
+ * virtqueue_num_heads(). */
+ smp_rmb();
/* When we start there are none of either input nor output. */
- elem->out_num = elem->in_num = 0;
+ out_num = in_num = 0;
max = vq->vring.num;
i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
- if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vq->last_avail_idx);
}
- if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) {
+ vring_desc_read(vdev, &desc, desc_pa, i);
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ if (desc.len % sizeof(VRingDesc)) {
error_report("Invalid size for indirect buffer table");
exit(1);
}
/* loop over the indirect descriptor table */
- max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc);
- desc_pa = vring_desc_addr(vdev, desc_pa, i);
+ max = desc.len / sizeof(VRingDesc);
+ desc_pa = desc.addr;
i = 0;
+ vring_desc_read(vdev, &desc, desc_pa, i);
}
/* Collect all the descriptors */
do {
- struct iovec *sg;
-
- if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
- if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
- error_report("Too many write descriptors in indirect table");
- exit(1);
- }
- elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
- sg = &elem->in_sg[elem->in_num++];
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
+ VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
} else {
- if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
- error_report("Too many read descriptors in indirect table");
+ if (in_num) {
+ error_report("Incorrect order for descriptors");
exit(1);
}
- elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
- sg = &elem->out_sg[elem->out_num++];
+ virtqueue_map_desc(&out_num, addr, iov,
+ VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
}
- sg->iov_len = vring_desc_len(vdev, desc_pa, i);
-
/* If we've got too many, that implies a descriptor loop. */
- if ((elem->in_num + elem->out_num) > max) {
+ if ((in_num + out_num) > max) {
error_report("Looped descriptor");
exit(1);
}
- } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);
-
- /* Now map what we have collected */
- virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
- virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+ } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+ /* Now copy what we have collected and mapped */
+ elem = virtqueue_alloc_element(sz, out_num, in_num);
elem->index = head;
+ for (i = 0; i < out_num; i++) {
+ elem->out_addr[i] = addr[i];
+ elem->out_sg[i] = iov[i];
+ }
+ for (i = 0; i < in_num; i++) {
+ elem->in_addr[i] = addr[out_num + i];
+ elem->in_sg[i] = iov[out_num + i];
+ }
vq->inuse++;
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
- return elem->in_num + elem->out_num;
+ return elem;
+}
+
+/* Reading and writing a structure directly to QEMUFile is *awful*, but
+ * it is what QEMU has always done by mistake. We can change it sooner
+ * or later by bumping the version number of the affected vm states.
+ * In the meanwhile, since the in-memory layout of VirtQueueElement
+ * has changed, we need to marshal to and from the layout that was
+ * used before the change.
+ */
+typedef struct VirtQueueElementOld {
+ unsigned int index;
+ unsigned int out_num;
+ unsigned int in_num;
+ hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
+ hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
+ struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
+} VirtQueueElementOld;
+
+void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
+{
+ VirtQueueElement *elem;
+ VirtQueueElementOld data;
+ int i;
+
+ qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
+
+ elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
+ elem->index = data.index;
+
+ for (i = 0; i < elem->in_num; i++) {
+ elem->in_addr[i] = data.in_addr[i];
+ }
+
+ for (i = 0; i < elem->out_num; i++) {
+ elem->out_addr[i] = data.out_addr[i];
+ }
+
+ for (i = 0; i < elem->in_num; i++) {
+ /* Base is overwritten by virtqueue_map. */
+ elem->in_sg[i].iov_base = 0;
+ elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
+ }
+
+ for (i = 0; i < elem->out_num; i++) {
+ /* Base is overwritten by virtqueue_map. */
+ elem->out_sg[i].iov_base = 0;
+ elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
+ }
+
+ virtqueue_map(elem);
+ return elem;
+}
+
+void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
+{
+ VirtQueueElementOld data;
+ int i;
+
+ memset(&data, 0, sizeof(data));
+ data.index = elem->index;
+ data.in_num = elem->in_num;
+ data.out_num = elem->out_num;
+
+ for (i = 0; i < elem->in_num; i++) {
+ data.in_addr[i] = elem->in_addr[i];
+ }
+
+ for (i = 0; i < elem->out_num; i++) {
+ data.out_addr[i] = elem->out_addr[i];
+ }
+
+ for (i = 0; i < elem->in_num; i++) {
+ /* Base is overwritten by virtqueue_map when loading. Do not
+ * save it, as it would leak the QEMU address space layout. */
+ data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
+ }
+
+ for (i = 0; i < elem->out_num; i++) {
+ /* Do not save iov_base as above. */
+ data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
+ }
+ qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
}
/* virtio device */
@@ -560,7 +734,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
trace_virtio_set_status(vdev, val);
- if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
val & VIRTIO_CONFIG_S_FEATURES_OK) {
int ret = virtio_validate_features(vdev);
@@ -629,10 +803,13 @@ void virtio_reset(void *opaque)
vdev->vq[i].vring.avail = 0;
vdev->vq[i].vring.used = 0;
vdev->vq[i].last_avail_idx = 0;
+ vdev->vq[i].shadow_avail_idx = 0;
+ vdev->vq[i].used_idx = 0;
virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
vdev->vq[i].signalled_used = 0;
vdev->vq[i].signalled_used_valid = false;
vdev->vq[i].notification = true;
+ vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
}
}
@@ -898,7 +1075,7 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
/* virtio-1 compliant devices cannot change the alignment */
- if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
error_report("tried to modify queue alignment for virtio-1 device");
return;
}
@@ -912,7 +1089,17 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
virtio_queue_update_rings(vdev, n);
}
-void virtio_queue_notify_vq(VirtQueue *vq)
+static void virtio_queue_notify_aio_vq(VirtQueue *vq)
+{
+ if (vq->vring.desc && vq->handle_aio_output) {
+ VirtIODevice *vdev = vq->vdev;
+
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ vq->handle_aio_output(vdev, vq);
+ }
+}
+
+static void virtio_queue_notify_vq(VirtQueue *vq)
{
if (vq->vring.desc && vq->handle_output) {
VirtIODevice *vdev = vq->vdev;
@@ -964,8 +1151,10 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
abort();
vdev->vq[i].vring.num = queue_size;
+ vdev->vq[i].vring.num_default = queue_size;
vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
vdev->vq[i].handle_output = handle_output;
+ vdev->vq[i].handle_aio_output = NULL;
return &vdev->vq[i];
}
@@ -977,6 +1166,7 @@ void virtio_del_queue(VirtIODevice *vdev, int n)
}
vdev->vq[n].vring.num = 0;
+ vdev->vq[n].vring.num_default = 0;
}
void virtio_irq(VirtQueue *vq)
@@ -986,32 +1176,32 @@ void virtio_irq(VirtQueue *vq)
virtio_notify_vector(vq->vdev, vq->vector);
}
-static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
+bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
{
uint16_t old, new;
bool v;
/* We need to expose used array entries before checking used event. */
smp_mb();
/* Always notify when queue is empty (when feature acknowledge) */
- if (virtio_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
- !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx) {
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ !vq->inuse && virtio_queue_empty(vq)) {
return true;
}
- if (!virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
}
v = vq->signalled_used_valid;
vq->signalled_used_valid = true;
old = vq->signalled_used;
- new = vq->signalled_used = vring_used_idx(vq);
+ new = vq->signalled_used = vq->used_idx;
return !v || vring_need_event(vring_get_used_event(vq), new, old);
}
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
- if (!vring_notify(vdev, vq)) {
+ if (!virtio_should_notify(vdev, vq)) {
return;
}
@@ -1035,7 +1225,7 @@ static bool virtio_device_endian_needed(void *opaque)
VirtIODevice *vdev = opaque;
assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
- if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
return vdev->device_endian != virtio_default_endian();
}
/* Devices conforming to VIRTIO 1.0 or later are always LE. */
@@ -1056,33 +1246,38 @@ static bool virtio_virtqueue_needed(void *opaque)
return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
}
-static void put_virtqueue_state(QEMUFile *f, void *pv, size_t size)
+static bool virtio_ringsize_needed(void *opaque)
{
- VirtIODevice *vdev = pv;
+ VirtIODevice *vdev = opaque;
int i;
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- qemu_put_be64(f, vdev->vq[i].vring.avail);
- qemu_put_be64(f, vdev->vq[i].vring.used);
+ if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
+ return true;
+ }
}
+ return false;
}
-static int get_virtqueue_state(QEMUFile *f, void *pv, size_t size)
+static bool virtio_extra_state_needed(void *opaque)
{
- VirtIODevice *vdev = pv;
- int i;
+ VirtIODevice *vdev = opaque;
+ BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
- for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- vdev->vq[i].vring.avail = qemu_get_be64(f);
- vdev->vq[i].vring.used = qemu_get_be64(f);
- }
- return 0;
+ return k->has_extra_state &&
+ k->has_extra_state(qbus->parent);
}
-static VMStateInfo vmstate_info_virtqueue = {
+static const VMStateDescription vmstate_virtqueue = {
.name = "virtqueue_state",
- .get = get_virtqueue_state,
- .put = put_virtqueue_state,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(vring.avail, struct VirtQueue),
+ VMSTATE_UINT64(vring.used, struct VirtQueue),
+ VMSTATE_END_OF_LIST()
+ }
};
static const VMStateDescription vmstate_virtio_virtqueues = {
@@ -1091,12 +1286,74 @@ static const VMStateDescription vmstate_virtio_virtqueues = {
.minimum_version_id = 1,
.needed = &virtio_virtqueue_needed,
.fields = (VMStateField[]) {
+ VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+ VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_ringsize = {
+ .name = "ringsize_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(vring.num_default, struct VirtQueue),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_virtio_ringsize = {
+ .name = "virtio/ringsize",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_ringsize_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+ VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static int get_extra_state(QEMUFile *f, void *pv, size_t size)
+{
+ VirtIODevice *vdev = pv;
+ BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+ if (!k->load_extra_state) {
+ return -1;
+ } else {
+ return k->load_extra_state(qbus->parent, f);
+ }
+}
+
+static void put_extra_state(QEMUFile *f, void *pv, size_t size)
+{
+ VirtIODevice *vdev = pv;
+ BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+ k->save_extra_state(qbus->parent, f);
+}
+
+static const VMStateInfo vmstate_info_extra_state = {
+ .name = "virtqueue_extra_state",
+ .get = get_extra_state,
+ .put = put_extra_state,
+};
+
+static const VMStateDescription vmstate_virtio_extra_state = {
+ .name = "virtio/extra_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_extra_state_needed,
+ .fields = (VMStateField[]) {
{
- .name = "virtqueues",
+ .name = "extra_state",
.version_id = 0,
.field_exists = NULL,
.size = 0,
- .info = &vmstate_info_virtqueue,
+ .info = &vmstate_info_extra_state,
.flags = VMS_SINGLE,
.offset = 0,
},
@@ -1138,6 +1395,8 @@ static const VMStateDescription vmstate_virtio = {
&vmstate_virtio_device_endian,
&vmstate_virtio_64bit_features,
&vmstate_virtio_virtqueues,
+ &vmstate_virtio_ringsize,
+ &vmstate_virtio_extra_state,
NULL
}
};
@@ -1264,7 +1523,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
num = qemu_get_be32(f);
if (num > VIRTIO_QUEUE_MAX) {
- error_report("Invalid number of PCI queues: 0x%x", num);
+ error_report("Invalid number of virtqueues: 0x%x", num);
return -1;
}
@@ -1348,6 +1607,8 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
vdev->vq[i].last_avail_idx, nheads);
return -1;
}
+ vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
+ vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
}
}
@@ -1430,6 +1691,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
vdev);
vdev->device_endian = virtio_default_endian();
+ vdev->use_guest_notifier_mask = true;
}
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
@@ -1460,7 +1722,7 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
{
return offsetof(VRingAvail, ring) +
- sizeof(uint64_t) * vdev->vq[n].vring.num;
+ sizeof(uint16_t) * vdev->vq[n].vring.num;
}
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
@@ -1483,6 +1745,7 @@ uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
{
vdev->vq[n].last_avail_idx = idx;
+ vdev->vq[n].shadow_avail_idx = idx;
}
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
@@ -1512,10 +1775,10 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
bool with_irqfd)
{
if (assign && !with_irqfd) {
- event_notifier_set_handler(&vq->guest_notifier,
+ event_notifier_set_handler(&vq->guest_notifier, false,
virtio_queue_guest_notifier_read);
} else {
- event_notifier_set_handler(&vq->guest_notifier, NULL);
+ event_notifier_set_handler(&vq->guest_notifier, false, NULL);
}
if (!assign) {
/* Test and clear notifier before closing it,
@@ -1529,6 +1792,31 @@ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
return &vq->guest_notifier;
}
+static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
+{
+ VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+ if (event_notifier_test_and_clear(n)) {
+ virtio_queue_notify_aio_vq(vq);
+ }
+}
+
+void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
+ void (*handle_output)(VirtIODevice *,
+ VirtQueue *))
+{
+ if (handle_output) {
+ vq->handle_aio_output = handle_output;
+ aio_set_event_notifier(ctx, &vq->host_notifier, true,
+ virtio_queue_host_notifier_aio_read);
+ } else {
+ aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
+ /* Test and clear notifier before after disabling event,
+ * in case poll callback didn't have time to run. */
+ virtio_queue_host_notifier_aio_read(&vq->host_notifier);
+ vq->handle_aio_output = NULL;
+ }
+}
+
static void virtio_queue_host_notifier_read(EventNotifier *n)
{
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
@@ -1541,10 +1829,10 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
bool set_handler)
{
if (assign && set_handler) {
- event_notifier_set_handler(&vq->host_notifier,
+ event_notifier_set_handler(&vq->host_notifier, true,
virtio_queue_host_notifier_read);
} else {
- event_notifier_set_handler(&vq->host_notifier, NULL);
+ event_notifier_set_handler(&vq->host_notifier, true, NULL);
}
if (!assign) {
/* Test and clear notifier before after disabling event,