summaryrefslogtreecommitdiffstats
path: root/kernel/tools/virtio
diff options
context:
space:
mode:
authorYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 12:17:53 -0700
committerYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 15:44:42 -0700
commit9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/tools/virtio
parent98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/tools/virtio')
-rw-r--r--kernel/tools/virtio/.gitignore3
-rw-r--r--kernel/tools/virtio/Makefile14
-rw-r--r--kernel/tools/virtio/asm/barrier.h14
-rw-r--r--kernel/tools/virtio/linux/bug.h10
-rw-r--r--kernel/tools/virtio/linux/device.h2
-rw-r--r--kernel/tools/virtio/linux/err.h26
-rw-r--r--kernel/tools/virtio/linux/hrtimer.h0
-rw-r--r--kernel/tools/virtio/linux/irqreturn.h1
-rw-r--r--kernel/tools/virtio/linux/kernel.h105
-rw-r--r--kernel/tools/virtio/linux/kmemleak.h3
-rw-r--r--kernel/tools/virtio/linux/module.h6
-rw-r--r--kernel/tools/virtio/linux/printk.h4
-rw-r--r--kernel/tools/virtio/linux/ratelimit.h4
-rw-r--r--kernel/tools/virtio/linux/scatterlist.h189
-rw-r--r--kernel/tools/virtio/linux/slab.h2
-rw-r--r--kernel/tools/virtio/linux/uaccess.h50
-rw-r--r--kernel/tools/virtio/linux/uio.h3
-rw-r--r--kernel/tools/virtio/linux/virtio.h68
-rw-r--r--kernel/tools/virtio/linux/virtio_byteorder.h8
-rw-r--r--kernel/tools/virtio/linux/virtio_config.h72
-rw-r--r--kernel/tools/virtio/linux/virtio_ring.h1
-rw-r--r--kernel/tools/virtio/linux/vringh.h1
-rw-r--r--kernel/tools/virtio/uapi/linux/uio.h1
-rw-r--r--kernel/tools/virtio/uapi/linux/virtio_config.h1
-rw-r--r--kernel/tools/virtio/uapi/linux/virtio_ring.h4
-rw-r--r--kernel/tools/virtio/uapi/linux/virtio_types.h1
-rw-r--r--kernel/tools/virtio/vhost_test/Makefile2
-rw-r--r--kernel/tools/virtio/vhost_test/vhost_test.c1
-rw-r--r--kernel/tools/virtio/virtio-trace/Makefile13
-rw-r--r--kernel/tools/virtio/virtio-trace/README118
-rw-r--r--kernel/tools/virtio/virtio-trace/trace-agent-ctl.c137
-rw-r--r--kernel/tools/virtio/virtio-trace/trace-agent-rw.c192
-rw-r--r--kernel/tools/virtio/virtio-trace/trace-agent.c270
-rw-r--r--kernel/tools/virtio/virtio-trace/trace-agent.h75
-rw-r--r--kernel/tools/virtio/virtio_test.c302
-rw-r--r--kernel/tools/virtio/vringh_test.c749
36 files changed, 2452 insertions, 0 deletions
diff --git a/kernel/tools/virtio/.gitignore b/kernel/tools/virtio/.gitignore
new file mode 100644
index 000000000..1cfbb0157
--- /dev/null
+++ b/kernel/tools/virtio/.gitignore
@@ -0,0 +1,3 @@
+*.d
+virtio_test
+vringh_test
diff --git a/kernel/tools/virtio/Makefile b/kernel/tools/virtio/Makefile
new file mode 100644
index 000000000..505ad51b3
--- /dev/null
+++ b/kernel/tools/virtio/Makefile
@@ -0,0 +1,14 @@
+all: test mod
+test: virtio_test vringh_test
+virtio_test: virtio_ring.o virtio_test.o
+vringh_test: vringh_test.o vringh.o virtio_ring.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+vpath %.c ../../drivers/virtio ../../drivers/vhost
+mod:
+ ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
+.PHONY: all test mod clean
+clean:
+ ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
+ vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/kernel/tools/virtio/asm/barrier.h b/kernel/tools/virtio/asm/barrier.h
new file mode 100644
index 000000000..aff61e133
--- /dev/null
+++ b/kernel/tools/virtio/asm/barrier.h
@@ -0,0 +1,14 @@
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define mb() __sync_synchronize()
+
+#define smp_mb() mb()
+# define smp_rmb() barrier()
+# define smp_wmb() barrier()
+/* Weak barriers should be used. If not - it's a bug */
+# define rmb() abort()
+# define wmb() abort()
+#else
+#error Please fill in barrier macros
+#endif
+
diff --git a/kernel/tools/virtio/linux/bug.h b/kernel/tools/virtio/linux/bug.h
new file mode 100644
index 000000000..fb94f0787
--- /dev/null
+++ b/kernel/tools/virtio/linux/bug.h
@@ -0,0 +1,10 @@
+#ifndef BUG_H
+#define BUG_H
+
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+#define BUILD_BUG_ON(x)
+
+#define BUG() abort()
+
+#endif /* BUG_H */
diff --git a/kernel/tools/virtio/linux/device.h b/kernel/tools/virtio/linux/device.h
new file mode 100644
index 000000000..4ad7e1df0
--- /dev/null
+++ b/kernel/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/kernel/tools/virtio/linux/err.h b/kernel/tools/virtio/linux/err.h
new file mode 100644
index 000000000..e32eff8b2
--- /dev/null
+++ b/kernel/tools/virtio/linux/err.h
@@ -0,0 +1,26 @@
+#ifndef ERR_H
+#define ERR_H
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+ return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline long __must_check IS_ERR(const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
+{
+ return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+#endif /* ERR_H */
diff --git a/kernel/tools/virtio/linux/hrtimer.h b/kernel/tools/virtio/linux/hrtimer.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/kernel/tools/virtio/linux/hrtimer.h
diff --git a/kernel/tools/virtio/linux/irqreturn.h b/kernel/tools/virtio/linux/irqreturn.h
new file mode 100644
index 000000000..a3c4e7be7
--- /dev/null
+++ b/kernel/tools/virtio/linux/irqreturn.h
@@ -0,0 +1 @@
+#include "../../../include/linux/irqreturn.h"
diff --git a/kernel/tools/virtio/linux/kernel.h b/kernel/tools/virtio/linux/kernel.h
new file mode 100644
index 000000000..1e8ce6979
--- /dev/null
+++ b/kernel/tools/virtio/linux/kernel.h
@@ -0,0 +1,105 @@
+#ifndef KERNEL_H
+#define KERNEL_H
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bug.h>
+#include <errno.h>
+#include <unistd.h>
+#include <asm/barrier.h>
+
+#define CONFIG_SMP
+
+#define PAGE_SIZE getpagesize()
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+typedef unsigned long long dma_addr_t;
+typedef size_t __kernel_size_t;
+
+struct page {
+ unsigned long long dummy;
+};
+
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
+#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
+
+#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
+
+#define __printf(a,b) __attribute__((format(printf,a,b)))
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+ if (__kmalloc_fake)
+ return __kmalloc_fake;
+ return malloc(s);
+}
+
+static inline void kfree(void *p)
+{
+ if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
+ return;
+ free(p);
+}
+
+static inline void *krealloc(void *p, size_t s, gfp_t gfp)
+{
+ return realloc(p, s);
+}
+
+
+static inline unsigned long __get_free_page(gfp_t gfp)
+{
+ void *p;
+
+ posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
+ return (unsigned long)p;
+}
+
+static inline void free_page(unsigned long addr)
+{
+ free((void *)addr);
+}
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define uninitialized_var(x) x = x
+
+# ifndef likely
+# define likely(x) (__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+# define unlikely(x) (__builtin_expect(!!(x), 0))
+# endif
+
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#endif /* KERNEL_H */
diff --git a/kernel/tools/virtio/linux/kmemleak.h b/kernel/tools/virtio/linux/kmemleak.h
new file mode 100644
index 000000000..c07072270
--- /dev/null
+++ b/kernel/tools/virtio/linux/kmemleak.h
@@ -0,0 +1,3 @@
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
diff --git a/kernel/tools/virtio/linux/module.h b/kernel/tools/virtio/linux/module.h
new file mode 100644
index 000000000..28ce95a05
--- /dev/null
+++ b/kernel/tools/virtio/linux/module.h
@@ -0,0 +1,6 @@
+#include <linux/export.h>
+
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+ static __attribute__((unused)) const char *__MODULE_LICENSE_name = \
+ __MODULE_LICENSE_value
+
diff --git a/kernel/tools/virtio/linux/printk.h b/kernel/tools/virtio/linux/printk.h
new file mode 100644
index 000000000..9f2423bd8
--- /dev/null
+++ b/kernel/tools/virtio/linux/printk.h
@@ -0,0 +1,4 @@
+#include "../../../include/linux/kern_levels.h"
+
+#define printk printf
+#define vprintk vprintf
diff --git a/kernel/tools/virtio/linux/ratelimit.h b/kernel/tools/virtio/linux/ratelimit.h
new file mode 100644
index 000000000..dcce1725f
--- /dev/null
+++ b/kernel/tools/virtio/linux/ratelimit.h
@@ -0,0 +1,4 @@
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0
+
+#define __ratelimit(x) (*(x))
+
diff --git a/kernel/tools/virtio/linux/scatterlist.h b/kernel/tools/virtio/linux/scatterlist.h
new file mode 100644
index 000000000..68c9e2adc
--- /dev/null
+++ b/kernel/tools/virtio/linux/scatterlist.h
@@ -0,0 +1,189 @@
+#ifndef SCATTERLIST_H
+#define SCATTERLIST_H
+#include <linux/kernel.h>
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+};
+
+/* Scatterlist helpers, stolen from linux/scatterlist.h */
+#define sg_is_chain(sg) ((sg)->page_link & 0x01)
+#define sg_is_last(sg) ((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+/**
+ * sg_assign_page - Assign a given page to an SG entry
+ * @sg: SG entry
+ * @page: The page
+ *
+ * Description:
+ * Assign page to sg entry. Also see sg_set_page(), the most commonly used
+ * variant.
+ *
+ **/
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+ unsigned long page_link = sg->page_link & 0x3;
+
+ /*
+ * In order for the low bit stealing approach to work, pages
+ * must be aligned at a 32-bit boundary as a minimum.
+ */
+ BUG_ON((unsigned long) page & 0x03);
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+ BUG_ON(sg_is_chain(sg));
+#endif
+ sg->page_link = page_link | (unsigned long) page;
+}
+
+/**
+ * sg_set_page - Set sg entry to point at given page
+ * @sg: SG entry
+ * @page: The page
+ * @len: Length of data
+ * @offset: Offset into page
+ *
+ * Description:
+ * Use this function to set an sg entry pointing at a page, never assign
+ * the page directly. We encode sg table information in the lower bits
+ * of the page pointer. See sg_page() for looking up the page belonging
+ * to an sg entry.
+ *
+ **/
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ sg_assign_page(sg, page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+ BUG_ON(sg_is_chain(sg));
+#endif
+ return (struct page *)((sg)->page_link & ~0x3);
+}
+
+/*
+ * Loop over each sg element, following the pointer to a new list if necessary
+ */
+#define for_each_sg(sglist, sg, nr, __i) \
+ for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+/**
+ * sg_chain - Chain two sglists together
+ * @prv: First scatterlist
+ * @prv_nents: Number of entries in prv
+ * @sgl: Second scatterlist
+ *
+ * Description:
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ **/
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ prv[prv_nents - 1].offset = 0;
+ prv[prv_nents - 1].length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sg: SG entryScatterlist
+ *
+ * Description:
+ * Marks the passed in sg entry as the termination point for the sg
+ * table. A call to sg_next() on this entry will return NULL.
+ *
+ **/
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+ /*
+ * Set termination bit, clear potential chain bit
+ */
+ sg->page_link |= 0x02;
+ sg->page_link &= ~0x01;
+}
+
+/**
+ * sg_unmark_end - Undo setting the end of the scatterlist
+ * @sg: SG entryScatterlist
+ *
+ * Description:
+ * Removes the termination marker from the given entry of the scatterlist.
+ *
+ **/
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+ sg->page_link &= ~0x02;
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+#ifdef CONFIG_DEBUG_SG
+ {
+ unsigned int i;
+ for (i = 0; i < nents; i++)
+ sgl[i].sg_magic = SG_MAGIC;
+ }
+#endif
+ sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline dma_addr_t sg_phys(struct scatterlist *sg)
+{
+ return page_to_phys(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline void sg_init_one(struct scatterlist *sg,
+ const void *buf, unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+#endif /* SCATTERLIST_H */
diff --git a/kernel/tools/virtio/linux/slab.h b/kernel/tools/virtio/linux/slab.h
new file mode 100644
index 000000000..81baeac8a
--- /dev/null
+++ b/kernel/tools/virtio/linux/slab.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_SLAB_H
+#endif
diff --git a/kernel/tools/virtio/linux/uaccess.h b/kernel/tools/virtio/linux/uaccess.h
new file mode 100644
index 000000000..0a578fe18
--- /dev/null
+++ b/kernel/tools/virtio/linux/uaccess.h
@@ -0,0 +1,50 @@
+#ifndef UACCESS_H
+#define UACCESS_H
+extern void *__user_addr_min, *__user_addr_max;
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+static inline void __chk_user_ptr(const volatile void *p, size_t size)
+{
+ assert(p >= __user_addr_min && p + size <= __user_addr_max);
+}
+
+#define put_user(x, ptr) \
+({ \
+ typeof(ptr) __pu_ptr = (ptr); \
+ __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
+ ACCESS_ONCE(*(__pu_ptr)) = x; \
+ 0; \
+})
+
+#define get_user(x, ptr) \
+({ \
+ typeof(ptr) __pu_ptr = (ptr); \
+ __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
+ x = ACCESS_ONCE(*(__pu_ptr)); \
+ 0; \
+})
+
+static void volatile_memcpy(volatile char *to, const volatile char *from,
+ unsigned long n)
+{
+ while (n--)
+ *(to++) = *(from++);
+}
+
+static inline int copy_from_user(void *to, const void __user volatile *from,
+ unsigned long n)
+{
+ __chk_user_ptr(from, n);
+ volatile_memcpy(to, from, n);
+ return 0;
+}
+
+static inline int copy_to_user(void __user volatile *to, const void *from,
+ unsigned long n)
+{
+ __chk_user_ptr(to, n);
+ volatile_memcpy(to, from, n);
+ return 0;
+}
+#endif /* UACCESS_H */
diff --git a/kernel/tools/virtio/linux/uio.h b/kernel/tools/virtio/linux/uio.h
new file mode 100644
index 000000000..cd20f0ba3
--- /dev/null
+++ b/kernel/tools/virtio/linux/uio.h
@@ -0,0 +1,3 @@
+#include <linux/kernel.h>
+
+#include "../../../include/linux/uio.h"
diff --git a/kernel/tools/virtio/linux/virtio.h b/kernel/tools/virtio/linux/virtio.h
new file mode 100644
index 000000000..a3e07016a
--- /dev/null
+++ b/kernel/tools/virtio/linux/virtio.h
@@ -0,0 +1,68 @@
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
+/* end of stubs */
+
+struct virtio_device {
+ void *dev;
+ u64 features;
+};
+
+struct virtqueue {
+ /* TODO: commented as list macros are empty stubs for now.
+ * Broken but enough for virtio_ring.c
+ * struct list_head list; */
+ void (*callback)(struct virtqueue *vq);
+ const char *name;
+ struct virtio_device *vdev;
+ unsigned int index;
+ unsigned int num_free;
+ void *priv;
+};
+
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_sgs(struct virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ gfp_t gfp);
+
+int virtqueue_add_outbuf(struct virtqueue *vq,
+ struct scatterlist sg[], unsigned int num,
+ void *data,
+ gfp_t gfp);
+
+int virtqueue_add_inbuf(struct virtqueue *vq,
+ struct scatterlist sg[], unsigned int num,
+ void *data,
+ gfp_t gfp);
+
+bool virtqueue_kick(struct virtqueue *vq);
+
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+
+void virtqueue_disable_cb(struct virtqueue *vq);
+
+bool virtqueue_enable_cb(struct virtqueue *vq);
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+ unsigned int num,
+ unsigned int vring_align,
+ struct virtio_device *vdev,
+ bool weak_barriers,
+ void *pages,
+ bool (*notify)(struct virtqueue *vq),
+ void (*callback)(struct virtqueue *vq),
+ const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+
+#endif
diff --git a/kernel/tools/virtio/linux/virtio_byteorder.h b/kernel/tools/virtio/linux/virtio_byteorder.h
new file mode 100644
index 000000000..9de9e6ac1
--- /dev/null
+++ b/kernel/tools/virtio/linux/virtio_byteorder.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H
+#define _LINUX_VIRTIO_BYTEORDER_STUB_H
+
+#include <asm/byteorder.h>
+#include "../../include/linux/byteorder/generic.h"
+#include "../../include/linux/virtio_byteorder.h"
+
+#endif
diff --git a/kernel/tools/virtio/linux/virtio_config.h b/kernel/tools/virtio/linux/virtio_config.h
new file mode 100644
index 000000000..806d683ab
--- /dev/null
+++ b/kernel/tools/virtio/linux/virtio_config.h
@@ -0,0 +1,72 @@
+#include <linux/virtio_byteorder.h>
+#include <linux/virtio.h>
+#include <uapi/linux/virtio_config.h>
+
+/*
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ * Devices should normally use virtio_has_feature,
+ * which includes more checks.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ return vdev->features & (1ULL << fbit);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ vdev->features |= (1ULL << fbit);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ vdev->features &= ~(1ULL << fbit);
+}
+
+#define virtio_has_feature(dev, feature) \
+ (__virtio_test_bit((dev), feature))
+
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+ return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+ return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+ return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+ return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+ return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+ return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
diff --git a/kernel/tools/virtio/linux/virtio_ring.h b/kernel/tools/virtio/linux/virtio_ring.h
new file mode 100644
index 000000000..8949c4e27
--- /dev/null
+++ b/kernel/tools/virtio/linux/virtio_ring.h
@@ -0,0 +1 @@
+#include "../../../include/linux/virtio_ring.h"
diff --git a/kernel/tools/virtio/linux/vringh.h b/kernel/tools/virtio/linux/vringh.h
new file mode 100644
index 000000000..9348957be
--- /dev/null
+++ b/kernel/tools/virtio/linux/vringh.h
@@ -0,0 +1 @@
+#include "../../../include/linux/vringh.h"
diff --git a/kernel/tools/virtio/uapi/linux/uio.h b/kernel/tools/virtio/uapi/linux/uio.h
new file mode 100644
index 000000000..7230e9002
--- /dev/null
+++ b/kernel/tools/virtio/uapi/linux/uio.h
@@ -0,0 +1 @@
+#include <sys/uio.h>
diff --git a/kernel/tools/virtio/uapi/linux/virtio_config.h b/kernel/tools/virtio/uapi/linux/virtio_config.h
new file mode 100644
index 000000000..4c86675f0
--- /dev/null
+++ b/kernel/tools/virtio/uapi/linux/virtio_config.h
@@ -0,0 +1 @@
+#include "../../../../include/uapi/linux/virtio_config.h"
diff --git a/kernel/tools/virtio/uapi/linux/virtio_ring.h b/kernel/tools/virtio/uapi/linux/virtio_ring.h
new file mode 100644
index 000000000..4d99c7823
--- /dev/null
+++ b/kernel/tools/virtio/uapi/linux/virtio_ring.h
@@ -0,0 +1,4 @@
+#ifndef VIRTIO_RING_H
+#define VIRTIO_RING_H
+#include "../../../../include/uapi/linux/virtio_ring.h"
+#endif /* VIRTIO_RING_H */
diff --git a/kernel/tools/virtio/uapi/linux/virtio_types.h b/kernel/tools/virtio/uapi/linux/virtio_types.h
new file mode 100644
index 000000000..e7a1096e7
--- /dev/null
+++ b/kernel/tools/virtio/uapi/linux/virtio_types.h
@@ -0,0 +1 @@
+#include "../../include/uapi/linux/virtio_types.h"
diff --git a/kernel/tools/virtio/vhost_test/Makefile b/kernel/tools/virtio/vhost_test/Makefile
new file mode 100644
index 000000000..a1d35b81b
--- /dev/null
+++ b/kernel/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/kernel/tools/virtio/vhost_test/vhost_test.c b/kernel/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 000000000..18735189e
--- /dev/null
+++ b/kernel/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/kernel/tools/virtio/virtio-trace/Makefile b/kernel/tools/virtio/virtio-trace/Makefile
new file mode 100644
index 000000000..0d2381633
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/Makefile
@@ -0,0 +1,13 @@
+CC = gcc
+CFLAGS = -O2 -Wall -pthread
+
+all: trace-agent
+
+.c.o:
+ $(CC) $(CFLAGS) -c $^ -o $@
+
+trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ rm -f *.o trace-agent
diff --git a/kernel/tools/virtio/virtio-trace/README b/kernel/tools/virtio/virtio-trace/README
new file mode 100644
index 000000000..b64845b82
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/README
@@ -0,0 +1,118 @@
+Trace Agent for virtio-trace
+============================
+
+Trace agent is a user tool for sending trace data of a guest to a Host in low
+overhead. Trace agent has the following functions:
+ - splice a page of ring-buffer to read_pipe without memory copying
+ - splice the page from write_pipe to virtio-console without memory copying
+ - write trace data to stdout by using -o option
+ - controlled by start/stop orders from a Host
+
+The trace agent operates as follows:
+ 1) Initialize all structures.
+ 2) Create a read/write thread per CPU. Each thread is bound to a CPU.
+ The read/write threads hold it.
+ 3) A controller thread does poll() for a start order of a host.
+ 4) After the controller of the trace agent receives a start order from a host,
+ the controller wake read/write threads.
+ 5) The read/write threads start to read trace data from ring-buffers and
+ write the data to virtio-serial.
+ 6) If the controller receives a stop order from a host, the read/write threads
+ stop to read trace data.
+
+
+Files
+=====
+
+README: this file
+Makefile: Makefile of trace agent for virtio-trace
+trace-agent.c: includes main function, sets up for operating trace agent
+trace-agent.h: includes all structures and some macros
+trace-agent-ctl.c: includes controller function for read/write threads
+trace-agent-rw.c: includes read/write threads function
+
+
+Setup
+=====
+
+To use this trace agent for virtio-trace, we need to prepare some virtio-serial
+I/Fs.
+
+1) Make FIFO in a host
+ virtio-trace uses virtio-serial pipe as trace data paths as to the number
+of CPUs and a control path, so FIFO (named pipe) should be created as follows:
+ # mkdir /tmp/virtio-trace/
+ # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
+ # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
+
+For example, if a guest use three CPUs, the names are
+ trace-path-cpu{0,1,2}.{in.out}
+and
+ agent-ctl-path.{in,out}.
+
+2) Set up of virtio-serial pipe in a host
+ Add qemu option to use virtio-serial pipe.
+
+ ##virtio-serial device##
+ -device virtio-serial-pci,id=virtio-serial0\
+ ##control path##
+ -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
+ -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
+ id=channel0,name=agent-ctl-path\
+ ##data path##
+ -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
+ -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
+ id=channel1,name=trace-path-cpu0\
+ ...
+
+If you manage guests with libvirt, add the following tags to domain XML files.
+Then, libvirt passes the same command option to qemu.
+
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/agent-ctl-path'/>
+ <target type='virtio' name='agent-ctl-path'/>
+ <address type='virtio-serial' controller='0' bus='0' port='0'/>
+ </channel>
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/trace-path-cpu0'/>
+ <target type='virtio' name='trace-path-cpu0'/>
+ <address type='virtio-serial' controller='0' bus='0' port='1'/>
+ </channel>
+ ...
+Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
+example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
+trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
+
+3) Boot the guest
+ You can find some chardev in /dev/virtio-ports/ in the guest.
+
+
+Run
+===
+
+0) Build trace agent in a guest
+ $ make
+
+1) Enable ftrace in the guest
+ <Example>
+ # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+
+2) Run trace agent in the guest
+ This agent must be operated as root.
+ # ./trace-agent
+read/write threads in the agent wait for start order from host. If you add -o
+option, trace data are output via stdout in the guest.
+
+3) Open FIFO in a host
+ # cat /tmp/virtio-trace/trace-path-cpu0.out
+If a host does not open these, trace data get stuck in buffers of virtio. Then,
+the guest will stop by specification of chardev in QEMU. This blocking mode may
+be solved in the future.
+
+4) Start to read trace data by ordering from a host
+ A host injects read start order to the guest via virtio-serial.
+ # echo 1 > /tmp/virtio-trace/agent-ctl-path.in
+
+5) Stop to read trace data by ordering from a host
+ A host injects read stop order to the guest via virtio-serial.
+ # echo 0 > /tmp/virtio-trace/agent-ctl-path.in
diff --git a/kernel/tools/virtio/virtio-trace/trace-agent-ctl.c b/kernel/tools/virtio/virtio-trace/trace-agent-ctl.c
new file mode 100644
index 000000000..a2d0403c4
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -0,0 +1,137 @@
+/*
+ * Controller of read/write threads for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define HOST_MSG_SIZE 256
+#define EVENT_WAIT_MSEC 100
+
+static volatile sig_atomic_t global_signal_val;
+bool global_sig_receive; /* default false */
+bool global_run_operation; /* default false*/
+
+/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
+static void signal_handler(int sig)
+{
+ global_signal_val = sig;
+}
+
+int rw_ctl_init(const char *ctl_path)
+{
+ int ctl_fd;
+
+ ctl_fd = open(ctl_path, O_RDONLY);
+ if (ctl_fd == -1) {
+ pr_err("Cannot open ctl_fd\n");
+ goto error;
+ }
+
+ return ctl_fd;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static int wait_order(int ctl_fd)
+{
+ struct pollfd poll_fd;
+ int ret = 0;
+
+ while (!global_sig_receive) {
+ poll_fd.fd = ctl_fd;
+ poll_fd.events = POLLIN;
+
+ ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
+
+ if (global_signal_val) {
+ global_sig_receive = true;
+ pr_info("Receive interrupt %d\n", global_signal_val);
+
+ /* Wakes rw-threads when they are sleeping */
+ if (!global_run_operation)
+ pthread_cond_broadcast(&cond_wakeup);
+
+ ret = -1;
+ break;
+ }
+
+ if (ret < 0) {
+ pr_err("Polling error\n");
+ goto error;
+ }
+
+ if (ret)
+ break;
+ };
+
+ return ret;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * contol read/write threads by handling global_run_operation
+ */
+void *rw_ctl_loop(int ctl_fd)
+{
+ ssize_t rlen;
+ char buf[HOST_MSG_SIZE];
+ int ret;
+
+ /* Setup signal handlers */
+ signal(SIGTERM, signal_handler);
+ signal(SIGINT, signal_handler);
+ signal(SIGQUIT, signal_handler);
+
+ while (!global_sig_receive) {
+
+ ret = wait_order(ctl_fd);
+ if (ret < 0)
+ break;
+
+ rlen = read(ctl_fd, buf, sizeof(buf));
+ if (rlen < 0) {
+ pr_err("read data error in ctl thread\n");
+ goto error;
+ }
+
+ if (rlen == 2 && buf[0] == '1') {
+ /*
+ * If host writes '1' to a control path,
+ * this controller wakes all read/write threads.
+ */
+ global_run_operation = true;
+ pthread_cond_broadcast(&cond_wakeup);
+ pr_debug("Wake up all read/write threads\n");
+ } else if (rlen == 2 && buf[0] == '0') {
+ /*
+ * If host writes '0' to a control path, read/write
+ * threads will wait for notification from Host.
+ */
+ global_run_operation = false;
+ pr_debug("Stop all read/write threads\n");
+ } else
+ pr_info("Invalid host notification: %s\n", buf);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
diff --git a/kernel/tools/virtio/virtio-trace/trace-agent-rw.c b/kernel/tools/virtio/virtio-trace/trace-agent-rw.c
new file mode 100644
index 000000000..3aace5ea4
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/trace-agent-rw.c
@@ -0,0 +1,192 @@
+/*
+ * Read/write thread of a guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include "trace-agent.h"
+
+#define READ_WAIT_USEC 100000
+
+void *rw_thread_info_new(void)
+{
+ struct rw_thread_info *rw_ti;
+
+ rw_ti = zalloc(sizeof(struct rw_thread_info));
+ if (rw_ti == NULL) {
+ pr_err("rw_thread_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ rw_ti->cpu_num = -1;
+ rw_ti->in_fd = -1;
+ rw_ti->out_fd = -1;
+ rw_ti->read_pipe = -1;
+ rw_ti->write_pipe = -1;
+ rw_ti->pipe_size = PIPE_INIT;
+
+ return rw_ti;
+}
+
+void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti)
+{
+ int data_pipe[2];
+
+ rw_ti->cpu_num = cpu;
+
+ /* set read(input) fd */
+ rw_ti->in_fd = open(in_path, O_RDONLY);
+ if (rw_ti->in_fd == -1) {
+ pr_err("Could not open in_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+
+ /* set write(output) fd */
+ if (!stdout_flag) {
+ /* virtio-serial output mode */
+ rw_ti->out_fd = open(out_path, O_WRONLY);
+ if (rw_ti->out_fd == -1) {
+ pr_err("Could not open out_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+ } else
+ /* stdout mode */
+ rw_ti->out_fd = STDOUT_FILENO;
+
+ if (pipe2(data_pipe, O_NONBLOCK) < 0) {
+ pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ /*
+ * Size of pipe is 64kB in default based on fs/pipe.c.
+ * To read/write trace data speedy, pipe size is changed.
+ */
+ if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
+ pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ rw_ti->read_pipe = data_pipe[1];
+ rw_ti->write_pipe = data_pipe[0];
+ rw_ti->pipe_size = pipe_size;
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/* Bind a thread to a cpu */
+static void bind_cpu(int cpu_num)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu_num, &mask);
+
+ /* bind my thread to cpu_num by assigning zero to the first argument */
+ if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
+ pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
+}
+
+static void *rw_thread_main(void *thread_info)
+{
+ ssize_t rlen, wlen;
+ ssize_t ret;
+ struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
+
+ bind_cpu(ts->cpu_num);
+
+ while (1) {
+ /* Wait for a read order of trace data by Host OS */
+ if (!global_run_operation) {
+ pthread_mutex_lock(&mutex_notify);
+ pthread_cond_wait(&cond_wakeup, &mutex_notify);
+ pthread_mutex_unlock(&mutex_notify);
+ }
+
+ if (global_sig_receive)
+ break;
+
+ /*
+ * Each thread read trace_pipe_raw of each cpu bounding the
+ * thread, so contention of multi-threads does not occur.
+ */
+ rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
+ ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (rlen < 0) {
+ pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
+ goto error;
+ } else if (rlen == 0) {
+ /*
+ * If trace data do not exist or are unreadable not
+ * for exceeding the page size, splice_read returns
+ * NULL. Then, this waits for being filled the data in a
+ * ring-buffer.
+ */
+ usleep(READ_WAIT_USEC);
+ pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
+ continue;
+ }
+
+ wlen = 0;
+
+ do {
+ ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
+ rlen - wlen,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (ret < 0) {
+ pr_err("Splice_write in rw-thread(%d)\n",
+ ts->cpu_num);
+ goto error;
+ } else if (ret == 0)
+ /*
+ * When host reader is not in time for reading
+ * trace data, guest will be stopped. This is
+ * because char dev in QEMU is not supported
+ * non-blocking mode. Then, writer might be
+ * sleep in that case.
+ * This sleep will be removed by supporting
+ * non-blocking mode.
+ */
+ sleep(1);
+ wlen += ret;
+ } while (wlen < rlen);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+
+pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
+{
+ int ret;
+ pthread_t rw_thread_per_cpu;
+
+ ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
+ if (ret != 0) {
+ pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
+ exit(EXIT_FAILURE);
+ }
+
+ return rw_thread_per_cpu;
+}
diff --git a/kernel/tools/virtio/virtio-trace/trace-agent.c b/kernel/tools/virtio/virtio-trace/trace-agent.c
new file mode 100644
index 000000000..0a0a7dd4e
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/trace-agent.c
@@ -0,0 +1,270 @@
+/*
+ * Guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
+#define PIPE_DEF_BUFS 16
+#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
+#define PIPE_MAX_SIZE (1024*1024)
+#define READ_PATH_FMT \
+ "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
+#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
+
+pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
+
+static int get_total_cpus(void)
+{
+ int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
+
+ if (nr_cpus <= 0) {
+ pr_err("Could not read cpus\n");
+ goto error;
+ } else if (nr_cpus > MAX_CPUS) {
+ pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
+ goto error;
+ }
+
+ return nr_cpus;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *agent_info_new(void)
+{
+ struct agent_info *s;
+ int i;
+
+ s = zalloc(sizeof(struct agent_info));
+ if (s == NULL) {
+ pr_err("agent_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ s->pipe_size = PIPE_INIT;
+ s->use_stdout = false;
+ s->cpus = get_total_cpus();
+ s->ctl_fd = -1;
+
+ /* read/write threads init */
+ for (i = 0; i < s->cpus; i++)
+ s->rw_ti[i] = rw_thread_info_new();
+
+ return s;
+}
+
+static unsigned long parse_size(const char *arg)
+{
+ unsigned long value, round;
+ char *ptr;
+
+ value = strtoul(arg, &ptr, 10);
+ switch (*ptr) {
+ case 'K': case 'k':
+ value <<= 10;
+ break;
+ case 'M': case 'm':
+ value <<= 20;
+ break;
+ default:
+ break;
+ }
+
+ if (value > PIPE_MAX_SIZE) {
+ pr_err("Pipe size must be less than 1MB\n");
+ goto error;
+ } else if (value < PIPE_MIN_SIZE) {
+ pr_err("Pipe size must be over 64KB\n");
+ goto error;
+ }
+
+ /* Align buffer size with page unit */
+ round = value & (PAGE_SIZE - 1);
+ value = value - round;
+
+ return value;
+error:
+ return 0;
+}
+
+static void usage(char const *prg)
+{
+ pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
+}
+
+static const char *make_path(int cpu_num, bool this_is_write_path)
+{
+ int ret;
+ char *buf;
+
+ buf = zalloc(PATH_MAX);
+ if (buf == NULL) {
+ pr_err("Could not allocate buffer\n");
+ goto error;
+ }
+
+ if (this_is_write_path)
+ /* write(output) path */
+ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
+ else
+ /* read(input) path */
+ ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+
+ if (ret <= 0) {
+ pr_err("Failed to generate %s path(CPU#%d):%d\n",
+ this_is_write_path ? "read" : "write", cpu_num, ret);
+ goto error;
+ }
+
+ return buf;
+
+error:
+ free(buf);
+ return NULL;
+}
+
+static const char *make_input_path(int cpu_num)
+{
+ return make_path(cpu_num, false);
+}
+
+static const char *make_output_path(int cpu_num)
+{
+ return make_path(cpu_num, true);
+}
+
+static void *agent_info_init(struct agent_info *s)
+{
+ int cpu;
+ const char *in_path = NULL;
+ const char *out_path = NULL;
+
+ /* init read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ /* set read(input) path per read/write thread */
+ in_path = make_input_path(cpu);
+ if (in_path == NULL)
+ goto error;
+
+ /* set write(output) path per read/write thread*/
+ if (!s->use_stdout) {
+ out_path = make_output_path(cpu);
+ if (out_path == NULL)
+ goto error;
+ } else
+ /* stdout mode */
+ pr_debug("stdout mode\n");
+
+ rw_thread_init(cpu, in_path, out_path, s->use_stdout,
+ s->pipe_size, s->rw_ti[cpu]);
+ }
+
+ /* init controller of read/write threads */
+ s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *parse_args(int argc, char *argv[], struct agent_info *s)
+{
+ int cmd;
+ unsigned long size;
+
+ while ((cmd = getopt(argc, argv, "hos:")) != -1) {
+ switch (cmd) {
+ /* stdout mode */
+ case 'o':
+ s->use_stdout = true;
+ break;
+ /* size of pipe */
+ case 's':
+ size = parse_size(optarg);
+ if (size == 0)
+ goto error;
+ s->pipe_size = size;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ goto error;
+ }
+ }
+
+ agent_info_init(s);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void agent_main_loop(struct agent_info *s)
+{
+ int cpu;
+ pthread_t rw_thread_per_cpu[MAX_CPUS];
+
+ /* Start all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++)
+ rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
+
+ rw_ctl_loop(s->ctl_fd);
+
+ /* Finish all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ int ret;
+
+ ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
+ if (ret != 0) {
+ pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void agent_info_free(struct agent_info *s)
+{
+ int i;
+
+ close(s->ctl_fd);
+ for (i = 0; i < s->cpus; i++) {
+ close(s->rw_ti[i]->in_fd);
+ close(s->rw_ti[i]->out_fd);
+ close(s->rw_ti[i]->read_pipe);
+ close(s->rw_ti[i]->write_pipe);
+ free(s->rw_ti[i]);
+ }
+ free(s);
+}
+
+int main(int argc, char *argv[])
+{
+ struct agent_info *s = NULL;
+
+ s = agent_info_new();
+ parse_args(argc, argv, s);
+
+ agent_main_loop(s);
+
+ agent_info_free(s);
+
+ return 0;
+}
diff --git a/kernel/tools/virtio/virtio-trace/trace-agent.h b/kernel/tools/virtio/virtio-trace/trace-agent.h
new file mode 100644
index 000000000..8de79bfea
--- /dev/null
+++ b/kernel/tools/virtio/virtio-trace/trace-agent.h
@@ -0,0 +1,75 @@
+#ifndef __TRACE_AGENT_H__
+#define __TRACE_AGENT_H__
+#include <pthread.h>
+#include <stdbool.h>
+
+#define MAX_CPUS 256
+#define PIPE_INIT (1024*1024)
+
+/*
+ * agent_info - structure managing total information of guest agent
+ * @pipe_size: size of pipe (default 1MB)
+ * @use_stdout: set to true when o option is added (default false)
+ * @cpus: total number of CPUs
+ * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
+ * @rw_ti: structure managing information of read/write threads
+ */
+struct agent_info {
+ unsigned long pipe_size;
+ bool use_stdout;
+ int cpus;
+ int ctl_fd;
+ struct rw_thread_info *rw_ti[MAX_CPUS];
+};
+
+/*
+ * rw_thread_info - structure managing a read/write thread a cpu
+ * @cpu_num: cpu number operating this read/write thread
+ * @in_fd: fd of reading trace data path in cpu_num
+ * @out_fd: fd of writing trace data path in cpu_num
+ * @read_pipe: fd of read pipe
+ * @write_pipe: fd of write pipe
+ * @pipe_size: size of pipe (default 1MB)
+ */
+struct rw_thread_info {
+ int cpu_num;
+ int in_fd;
+ int out_fd;
+ int read_pipe;
+ int write_pipe;
+ unsigned long pipe_size;
+};
+
+/* use for stopping rw threads */
+extern bool global_sig_receive;
+
+/* use for notification */
+extern bool global_run_operation;
+extern pthread_mutex_t mutex_notify;
+extern pthread_cond_t cond_wakeup;
+
+/* for controller of read/write threads */
+extern int rw_ctl_init(const char *ctl_path);
+extern void *rw_ctl_loop(int ctl_fd);
+
+/* for trace read/write thread */
+extern void *rw_thread_info_new(void);
+extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti);
+extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
+
+static inline void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+
+#endif /*__TRACE_AGENT_H__*/
diff --git a/kernel/tools/virtio/virtio_test.c b/kernel/tools/virtio/virtio_test.c
new file mode 100644
index 000000000..e0445898f
--- /dev/null
+++ b/kernel/tools/virtio/virtio_test.c
@@ -0,0 +1,302 @@
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/virtio_types.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+
+/* Unused */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+ int kick;
+ int call;
+ int num;
+ int idx;
+ void *ring;
+ /* copy used for control */
+ struct vring vring;
+ struct virtqueue *vq;
+};
+
+struct vdev_info {
+ struct virtio_device vdev;
+ int control;
+ struct pollfd fds[1];
+ struct vq_info vqs[1];
+ int nvqs;
+ void *buf;
+ size_t buf_size;
+ struct vhost_memory *mem;
+};
+
+bool vq_notify(struct virtqueue *vq)
+{
+ struct vq_info *info = vq->priv;
+ unsigned long long v = 1;
+ int r;
+ r = write(info->kick, &v, sizeof v);
+ assert(r == sizeof v);
+ return true;
+}
+
+void vq_callback(struct virtqueue *vq)
+{
+}
+
+
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+ struct vhost_vring_state state = { .index = info->idx };
+ struct vhost_vring_file file = { .index = info->idx };
+ unsigned long long features = dev->vdev.features;
+ struct vhost_vring_addr addr = {
+ .index = info->idx,
+ .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+ .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+ .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+ };
+ int r;
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ assert(r >= 0);
+ state.num = info->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ assert(r >= 0);
+ state.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ assert(r >= 0);
+ r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ assert(r >= 0);
+ file.fd = info->kick;
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ assert(r >= 0);
+ file.fd = info->call;
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ assert(r >= 0);
+}
+
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+ struct vq_info *info = &dev->vqs[dev->nvqs];
+ int r;
+ info->idx = dev->nvqs;
+ info->kick = eventfd(0, EFD_NONBLOCK);
+ info->call = eventfd(0, EFD_NONBLOCK);
+ r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+ assert(r >= 0);
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = vring_new_virtqueue(info->idx,
+ info->vring.num, 4096, &dev->vdev,
+ true, info->ring,
+ vq_notify, vq_callback, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+ vhost_vq_setup(dev, info);
+ dev->fds[info->idx].fd = info->call;
+ dev->fds[info->idx].events = POLLIN;
+ dev->nvqs++;
+}
+
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+ int r;
+ memset(dev, 0, sizeof *dev);
+ dev->vdev.features = features;
+ dev->buf_size = 1024;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+ dev->control = open("/dev/vhost-test", O_RDWR);
+ assert(dev->control >= 0);
+ r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+ assert(r >= 0);
+ dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ assert(dev->mem);
+ memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ dev->mem->nregions = 1;
+ dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+ dev->mem->regions[0].userspace_addr = (long)dev->buf;
+ dev->mem->regions[0].memory_size = dev->buf_size;
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+}
+
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+ int i;
+ unsigned long long val;
+ poll(dev->fds, dev->nvqs, -1);
+ for (i = 0; i < dev->nvqs; ++i)
+ if (dev->fds[i].revents & POLLIN) {
+ read(dev->fds[i].fd, &val, sizeof val);
+ }
+}
+
+static void run_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ struct scatterlist sl;
+ long started = 0, completed = 0;
+ long completed_before;
+ int r, test = 1;
+ unsigned len;
+ long long spurious = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ for (;;) {
+ virtqueue_disable_cb(vq->vq);
+ completed_before = completed;
+ do {
+ if (started < bufs) {
+ sg_init_one(&sl, dev->buf, dev->buf_size);
+ r = virtqueue_add_outbuf(vq->vq, &sl, 1,
+ dev->buf + started,
+ GFP_ATOMIC);
+ if (likely(r == 0)) {
+ ++started;
+ if (unlikely(!virtqueue_kick(vq->vq)))
+ r = -1;
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (virtqueue_get_buf(vq->vq, &len)) {
+ ++completed;
+ r = 0;
+ }
+
+ } while (r == 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (completed == bufs)
+ break;
+ if (delayed) {
+ if (virtqueue_enable_cb_delayed(vq->vq))
+ wait_for_interrupt(dev);
+ } else {
+ if (virtqueue_enable_cb(vq->vq))
+ wait_for_interrupt(dev);
+ }
+ }
+ test = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+}
+
+const char optstring[] = "h";
+const struct option longopts[] = {
+ {
+ .name = "help",
+ .val = 'h',
+ },
+ {
+ .name = "event-idx",
+ .val = 'E',
+ },
+ {
+ .name = "no-event-idx",
+ .val = 'e',
+ },
+ {
+ .name = "indirect",
+ .val = 'I',
+ },
+ {
+ .name = "no-indirect",
+ .val = 'i',
+ },
+ {
+ .name = "virtio-1",
+ .val = '1',
+ },
+ {
+ .name = "no-virtio-1",
+ .val = '0',
+ },
+ {
+ .name = "delayed-interrupt",
+ .val = 'D',
+ },
+ {
+ .name = "no-delayed-interrupt",
+ .val = 'd',
+ },
+ {
+ }
+};
+
+static void help(void)
+{
+ fprintf(stderr, "Usage: virtio_test [--help]"
+ " [--no-indirect]"
+ " [--no-event-idx]"
+ " [--no-virtio-1]"
+ " [--delayed-interrupt]"
+ "\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct vdev_info dev;
+ unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+ int o;
+ bool delayed = false;
+
+ for (;;) {
+ o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'e':
+ features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+ break;
+ case 'h':
+ help();
+ goto done;
+ case 'i':
+ features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+ break;
+ case '0':
+ features &= ~(1ULL << VIRTIO_F_VERSION_1);
+ break;
+ case 'D':
+ delayed = true;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+done:
+ vdev_info_init(&dev, features);
+ vq_info_add(&dev, 256);
+ run_test(&dev, &dev.vqs[0], delayed, 0x100000);
+ return 0;
+}
diff --git a/kernel/tools/virtio/vringh_test.c b/kernel/tools/virtio/vringh_test.c
new file mode 100644
index 000000000..5f94f5105
--- /dev/null
+++ b/kernel/tools/virtio/vringh_test.c
@@ -0,0 +1,749 @@
+/* Simple test of virtio code, entirely in userpsace. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <err.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/vringh.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/uaccess.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+
+#define USER_MEM (1024*1024)
+void *__user_addr_min, *__user_addr_max;
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static u64 user_addr_offset;
+
+#define RINGSIZE 256
+#define ALIGN 4096
+
+static bool never_notify_host(struct virtqueue *vq)
+{
+ abort();
+}
+
+static void never_callback_guest(struct virtqueue *vq)
+{
+ abort();
+}
+
+static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+ if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+ return false;
+ if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+ return false;
+
+ r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
+ r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
+ r->offset = user_addr_offset;
+ return true;
+}
+
+/* We return single byte ranges. */
+static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+ if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+ return false;
+ if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+ return false;
+
+ r->start = addr;
+ r->end_incl = r->start;
+ r->offset = user_addr_offset;
+ return true;
+}
+
+struct guest_virtio_device {
+ struct virtio_device vdev;
+ int to_host_fd;
+ unsigned long notifies;
+};
+
+static bool parallel_notify_host(struct virtqueue *vq)
+{
+ int rc;
+ struct guest_virtio_device *gvdev;
+
+ gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
+ rc = write(gvdev->to_host_fd, "", 1);
+ if (rc < 0)
+ return false;
+ gvdev->notifies++;
+ return true;
+}
+
+static bool no_notify_host(struct virtqueue *vq)
+{
+ return true;
+}
+
+#define NUM_XFERS (10000000)
+
+/* We aim for two "distant" cpus. */
+static void find_cpus(unsigned int *first, unsigned int *last)
+{
+ unsigned int i;
+
+ *first = -1U;
+ *last = 0;
+ for (i = 0; i < 4096; i++) {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ CPU_SET(i, &set);
+ if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
+ if (i < *first)
+ *first = i;
+ if (i > *last)
+ *last = i;
+ }
+ }
+}
+
+/* Opencoded version for fast mode */
+static inline int vringh_get_head(struct vringh *vrh, u16 *head)
+{
+ u16 avail_idx, i;
+ int err;
+
+ err = get_user(avail_idx, &vrh->vring.avail->idx);
+ if (err)
+ return err;
+
+ if (vrh->last_avail_idx == avail_idx)
+ return 0;
+
+ /* Only get avail ring entries after they have been exposed by guest. */
+ virtio_rmb(vrh->weak_barriers);
+
+ i = vrh->last_avail_idx & (vrh->vring.num - 1);
+
+ err = get_user(*head, &vrh->vring.avail->ring[i]);
+ if (err)
+ return err;
+
+ vrh->last_avail_idx++;
+ return 1;
+}
+
+static int parallel_test(u64 features,
+ bool (*getrange)(struct vringh *vrh,
+ u64 addr, struct vringh_range *r),
+ bool fast_vringh)
+{
+ void *host_map, *guest_map;
+ int fd, mapsize, to_guest[2], to_host[2];
+ unsigned long xfers = 0, notifies = 0, receives = 0;
+ unsigned int first_cpu, last_cpu;
+ cpu_set_t cpu_set;
+ char buf[128];
+
+ /* Create real file to mmap. */
+ fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ err(1, "Opening /tmp/vringh_test-file");
+
+ /* Extra room at the end for some data, and indirects */
+ mapsize = vring_size(RINGSIZE, ALIGN)
+ + RINGSIZE * 2 * sizeof(int)
+ + RINGSIZE * 6 * sizeof(struct vring_desc);
+ mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
+ ftruncate(fd, mapsize);
+
+ /* Parent and child use separate addresses, to check our mapping logic! */
+ host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+ pipe(to_guest);
+ pipe(to_host);
+
+ CPU_ZERO(&cpu_set);
+ find_cpus(&first_cpu, &last_cpu);
+ printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
+ fflush(stdout);
+
+ if (fork() != 0) {
+ struct vringh vrh;
+ int status, err, rlen = 0;
+ char rbuf[5];
+
+ /* We are the host: never access guest addresses! */
+ munmap(guest_map, mapsize);
+
+ __user_addr_min = host_map;
+ __user_addr_max = __user_addr_min + mapsize;
+ user_addr_offset = host_map - guest_map;
+ assert(user_addr_offset);
+
+ close(to_guest[0]);
+ close(to_host[1]);
+
+ vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
+ vringh_init_user(&vrh, features, RINGSIZE, true,
+ vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+ CPU_SET(first_cpu, &cpu_set);
+ if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+ errx(1, "Could not set affinity to cpu %u", first_cpu);
+
+ while (xfers < NUM_XFERS) {
+ struct iovec host_riov[2], host_wiov[2];
+ struct vringh_iov riov, wiov;
+ u16 head, written;
+
+ if (fast_vringh) {
+ for (;;) {
+ err = vringh_get_head(&vrh, &head);
+ if (err != 0)
+ break;
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i",
+ err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+ }
+ if (err != 1)
+ errx(1, "vringh_get_head");
+ written = 0;
+ goto complete;
+ } else {
+ vringh_iov_init(&riov,
+ host_riov,
+ ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov,
+ host_wiov,
+ ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov,
+ getrange, &head);
+ }
+ if (err == 0) {
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i",
+ err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+
+ if (!vringh_notify_enable_user(&vrh))
+ continue;
+
+ /* Swallow all notifies at once. */
+ if (read(to_host[0], buf, sizeof(buf)) < 1)
+ break;
+
+ vringh_notify_disable_user(&vrh);
+ receives++;
+ continue;
+ }
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ /* We simply copy bytes. */
+ if (riov.used) {
+ rlen = vringh_iov_pull_user(&riov, rbuf,
+ sizeof(rbuf));
+ if (rlen != 4)
+ errx(1, "vringh_iov_pull_user: %i",
+ rlen);
+ assert(riov.i == riov.used);
+ written = 0;
+ } else {
+ err = vringh_iov_push_user(&wiov, rbuf, rlen);
+ if (err != rlen)
+ errx(1, "vringh_iov_push_user: %i",
+ err);
+ assert(wiov.i == wiov.used);
+ written = err;
+ }
+ complete:
+ xfers++;
+
+ err = vringh_complete_user(&vrh, head, written);
+ if (err != 0)
+ errx(1, "vringh_complete_user: %i", err);
+ }
+
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i", err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+ wait(&status);
+ if (!WIFEXITED(status))
+ errx(1, "Child died with signal %i?", WTERMSIG(status));
+ if (WEXITSTATUS(status) != 0)
+ errx(1, "Child exited %i?", WEXITSTATUS(status));
+ printf("Host: notified %lu, pinged %lu\n", notifies, receives);
+ return 0;
+ } else {
+ struct guest_virtio_device gvdev;
+ struct virtqueue *vq;
+ unsigned int *data;
+ struct vring_desc *indirects;
+ unsigned int finished = 0;
+
+ /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
+ data = guest_map + vring_size(RINGSIZE, ALIGN);
+ indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
+
+ /* We are the guest. */
+ munmap(host_map, mapsize);
+
+ close(to_guest[1]);
+ close(to_host[0]);
+
+ gvdev.vdev.features = features;
+ gvdev.to_host_fd = to_host[1];
+ gvdev.notifies = 0;
+
+ CPU_SET(first_cpu, &cpu_set);
+ if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+ err(1, "Could not set affinity to cpu %u", first_cpu);
+
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
+ guest_map, fast_vringh ? no_notify_host
+ : parallel_notify_host,
+ never_callback_guest, "guest vq");
+
+ /* Don't kfree indirects. */
+ __kfree_ignore_start = indirects;
+ __kfree_ignore_end = indirects + RINGSIZE * 6;
+
+ while (xfers < NUM_XFERS) {
+ struct scatterlist sg[4];
+ unsigned int num_sg, len;
+ int *dbuf, err;
+ bool output = !(xfers % 2);
+
+ /* Consume bufs. */
+ while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len == 4)
+ assert(*dbuf == finished - 1);
+ else if (!fast_vringh)
+ assert(*dbuf == finished);
+ finished++;
+ }
+
+ /* Produce a buffer. */
+ dbuf = data + (xfers % (RINGSIZE + 1));
+
+ if (output)
+ *dbuf = xfers;
+ else
+ *dbuf = -1;
+
+ switch ((xfers / sizeof(*dbuf)) % 4) {
+ case 0:
+ /* Nasty three-element sg list. */
+ sg_init_table(sg, num_sg = 3);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
+ sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
+ break;
+ case 1:
+ sg_init_table(sg, num_sg = 2);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
+ break;
+ case 2:
+ sg_init_table(sg, num_sg = 1);
+ sg_set_buf(&sg[0], (void *)dbuf, 4);
+ break;
+ case 3:
+ sg_init_table(sg, num_sg = 4);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
+ sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
+ sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
+ break;
+ }
+
+ /* May allocate an indirect, so force it to allocate
+ * user addr */
+ __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
+ if (output)
+ err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
+ GFP_KERNEL);
+ else
+ err = virtqueue_add_inbuf(vq, sg, num_sg,
+ dbuf, GFP_KERNEL);
+
+ if (err == -ENOSPC) {
+ if (!virtqueue_enable_cb_delayed(vq))
+ continue;
+ /* Swallow all notifies at once. */
+ if (read(to_guest[0], buf, sizeof(buf)) < 1)
+ break;
+
+ receives++;
+ virtqueue_disable_cb(vq);
+ continue;
+ }
+
+ if (err)
+ errx(1, "virtqueue_add_in/outbuf: %i", err);
+
+ xfers++;
+ virtqueue_kick(vq);
+ }
+
+ /* Any extra? */
+ while (finished != xfers) {
+ int *dbuf;
+ unsigned int len;
+
+ /* Consume bufs. */
+ dbuf = virtqueue_get_buf(vq, &len);
+ if (dbuf) {
+ if (len == 4)
+ assert(*dbuf == finished - 1);
+ else
+ assert(len == 0);
+ finished++;
+ continue;
+ }
+
+ if (!virtqueue_enable_cb_delayed(vq))
+ continue;
+ if (read(to_guest[0], buf, sizeof(buf)) < 1)
+ break;
+
+ receives++;
+ virtqueue_disable_cb(vq);
+ }
+
+ printf("Guest: notified %lu, pinged %lu\n",
+ gvdev.notifies, receives);
+ vring_del_virtqueue(vq);
+ return 0;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct virtio_device vdev;
+ struct virtqueue *vq;
+ struct vringh vrh;
+ struct scatterlist guest_sg[RINGSIZE], *sgs[2];
+ struct iovec host_riov[2], host_wiov[2];
+ struct vringh_iov riov, wiov;
+ struct vring_used_elem used[RINGSIZE];
+ char buf[28];
+ u16 head;
+ int err;
+ unsigned i;
+ void *ret;
+ bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
+ bool fast_vringh = false, parallel = false;
+
+ getrange = getrange_iov;
+ vdev.features = 0;
+
+ while (argv[1]) {
+ if (strcmp(argv[1], "--indirect") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+ else if (strcmp(argv[1], "--eventidx") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
+ else if (strcmp(argv[1], "--virtio-1") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
+ else if (strcmp(argv[1], "--slow-range") == 0)
+ getrange = getrange_slow;
+ else if (strcmp(argv[1], "--fast-vringh") == 0)
+ fast_vringh = true;
+ else if (strcmp(argv[1], "--parallel") == 0)
+ parallel = true;
+ else
+ errx(1, "Unknown arg %s", argv[1]);
+ argv++;
+ }
+
+ if (parallel)
+ return parallel_test(vdev.features, getrange, fast_vringh);
+
+ if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
+ abort();
+ __user_addr_max = __user_addr_min + USER_MEM;
+ memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
+
+ /* Set up guest side. */
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+ __user_addr_min,
+ never_notify_host, never_callback_guest,
+ "guest vq");
+
+ /* Set up host side. */
+ vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
+ vringh_init_user(&vrh, vdev.features, RINGSIZE, true,
+ vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+
+ /* No descriptor to get yet... */
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 0)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ /* Guest puts in a descriptor. */
+ memcpy(__user_addr_max - 1, "a", 1);
+ sg_init_table(guest_sg, 1);
+ sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+ sg_init_table(guest_sg+1, 1);
+ sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
+ sgs[0] = &guest_sg[0];
+ sgs[1] = &guest_sg[1];
+
+ /* May allocate an indirect, so force it to allocate user addr */
+ __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_sgs: %i", err);
+ __kmalloc_fake = NULL;
+
+ /* Host retreives it. */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ assert(riov.used == 1);
+ assert(riov.iov[0].iov_base == __user_addr_max - 1);
+ assert(riov.iov[0].iov_len == 1);
+ if (getrange != getrange_slow) {
+ assert(wiov.used == 1);
+ assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+ assert(wiov.iov[0].iov_len == 2);
+ } else {
+ assert(wiov.used == 2);
+ assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+ assert(wiov.iov[0].iov_len == 1);
+ assert(wiov.iov[1].iov_base == __user_addr_max - 2);
+ assert(wiov.iov[1].iov_len == 1);
+ }
+
+ err = vringh_iov_pull_user(&riov, buf, 5);
+ if (err != 1)
+ errx(1, "vringh_iov_pull_user: %i", err);
+ assert(buf[0] == 'a');
+ assert(riov.i == 1);
+ assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
+
+ memcpy(buf, "bcdef", 5);
+ err = vringh_iov_push_user(&wiov, buf, 5);
+ if (err != 2)
+ errx(1, "vringh_iov_push_user: %i", err);
+ assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
+ assert(wiov.i == wiov.used);
+ assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
+
+ /* Host is done. */
+ err = vringh_complete_user(&vrh, head, err);
+ if (err != 0)
+ errx(1, "vringh_complete_user: %i", err);
+
+ /* Guest should see used token now. */
+ __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ __kfree_ignore_end = __kfree_ignore_start + 1;
+ ret = virtqueue_get_buf(vq, &i);
+ if (ret != &err)
+ errx(1, "virtqueue_get_buf: %p", ret);
+ assert(i == 2);
+
+ /* Guest puts in a huge descriptor. */
+ sg_init_table(guest_sg, RINGSIZE);
+ for (i = 0; i < RINGSIZE; i++) {
+ sg_set_buf(&guest_sg[i],
+ __user_addr_max - USER_MEM/4, USER_MEM/4);
+ }
+
+ /* Fill contents with recognisable garbage. */
+ for (i = 0; i < USER_MEM/4; i++)
+ ((char *)__user_addr_max - USER_MEM/4)[i] = i;
+
+ /* This will allocate an indirect, so force it to allocate user addr */
+ __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (large): %i", err);
+ __kmalloc_fake = NULL;
+
+ /* Host picks it up (allocates new iov). */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+ assert(riov.iov != host_riov);
+ if (getrange != getrange_slow)
+ assert(riov.used == RINGSIZE);
+ else
+ assert(riov.used == RINGSIZE * USER_MEM/4);
+
+ assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
+ assert(wiov.used == 0);
+
+ /* Pull data back out (in odd chunks), should be as expected. */
+ for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
+ err = vringh_iov_pull_user(&riov, buf, 3);
+ if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
+ errx(1, "vringh_iov_pull_user large: %i", err);
+ assert(buf[0] == (char)i);
+ assert(err < 2 || buf[1] == (char)(i + 1));
+ assert(err < 3 || buf[2] == (char)(i + 2));
+ }
+ assert(riov.i == riov.used);
+ vringh_iov_cleanup(&riov);
+ vringh_iov_cleanup(&wiov);
+
+ /* Complete using multi interface, just because we can. */
+ used[0].id = head;
+ used[0].len = 0;
+ err = vringh_complete_multi_user(&vrh, used, 1);
+ if (err)
+ errx(1, "vringh_complete_multi_user(1): %i", err);
+
+ /* Free up those descriptors. */
+ ret = virtqueue_get_buf(vq, &i);
+ if (ret != &err)
+ errx(1, "virtqueue_get_buf: %p", ret);
+
+ /* Add lots of descriptors. */
+ sg_init_table(guest_sg, 1);
+ sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+ for (i = 0; i < RINGSIZE; i++) {
+ err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (multiple): %i", err);
+ }
+
+ /* Now get many, and consume them all at once. */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ for (i = 0; i < RINGSIZE; i++) {
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+ used[i].id = head;
+ used[i].len = 0;
+ }
+ /* Make sure it wraps around ring, to test! */
+ assert(vrh.vring.used->idx % RINGSIZE != 0);
+ err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
+ if (err)
+ errx(1, "vringh_complete_multi_user: %i", err);
+
+ /* Free those buffers. */
+ for (i = 0; i < RINGSIZE; i++) {
+ unsigned len;
+ assert(virtqueue_get_buf(vq, &len) != NULL);
+ }
+
+ /* Test weird (but legal!) indirect. */
+ if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+ char *data = __user_addr_max - USER_MEM/4;
+ struct vring_desc *d = __user_addr_max - USER_MEM/2;
+ struct vring vring;
+
+ /* Force creation of direct, which we modify. */
+ __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+ __user_addr_min,
+ never_notify_host,
+ never_callback_guest,
+ "guest vq");
+
+ sg_init_table(guest_sg, 4);
+ sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
+ sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
+ sg_set_buf(&guest_sg[2], data + 6, 4);
+ sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
+
+ err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (indirect): %i", err);
+
+ vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
+
+ /* They're used in order, but double-check... */
+ assert(vring.desc[0].addr == (unsigned long)d);
+ assert(vring.desc[1].addr == (unsigned long)(d+2));
+ assert(vring.desc[2].addr == (unsigned long)data + 6);
+ assert(vring.desc[3].addr == (unsigned long)(d+3));
+ vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
+ vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
+ vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
+
+ /* First indirect */
+ d[0].addr = (unsigned long)data;
+ d[0].len = 1;
+ d[0].flags = VRING_DESC_F_NEXT;
+ d[0].next = 1;
+ d[1].addr = (unsigned long)data + 1;
+ d[1].len = 2;
+ d[1].flags = 0;
+
+ /* Second indirect */
+ d[2].addr = (unsigned long)data + 3;
+ d[2].len = 3;
+ d[2].flags = 0;
+
+ /* Third indirect */
+ d[3].addr = (unsigned long)data + 10;
+ d[3].len = 5;
+ d[3].flags = VRING_DESC_F_NEXT;
+ d[3].next = 1;
+ d[4].addr = (unsigned long)data + 15;
+ d[4].len = 6;
+ d[4].flags = VRING_DESC_F_NEXT;
+ d[4].next = 2;
+ d[5].addr = (unsigned long)data + 21;
+ d[5].len = 7;
+ d[5].flags = 0;
+
+ /* Host picks it up (allocates new iov). */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ if (head != 0)
+ errx(1, "vringh_getdesc_user: head %i not 0", head);
+
+ assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+ if (getrange != getrange_slow)
+ assert(riov.used == 7);
+ else
+ assert(riov.used == 28);
+ err = vringh_iov_pull_user(&riov, buf, 29);
+ assert(err == 28);
+
+ /* Data should be linear. */
+ for (i = 0; i < err; i++)
+ assert(buf[i] == i);
+ vringh_iov_cleanup(&riov);
+ }
+
+ /* Don't leak memory... */
+ vring_del_virtqueue(vq);
+ free(__user_addr_min);
+
+ return 0;
+}