summaryrefslogtreecommitdiffstats
path: root/qemu/fsdev
diff options
context:
space:
mode:
authorYang Zhang <yang.z.zhang@intel.com>2015-08-28 09:58:54 +0800
committerYang Zhang <yang.z.zhang@intel.com>2015-09-01 12:44:00 +0800
commite44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch)
tree66b09f592c55df2878107a468a91d21506104d3f /qemu/fsdev
parent9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff)
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5 Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/fsdev')
-rw-r--r--qemu/fsdev/Makefile.objs12
-rw-r--r--qemu/fsdev/file-op-9p.h150
-rw-r--r--qemu/fsdev/qemu-fsdev-dummy.c22
-rw-r--r--qemu/fsdev/qemu-fsdev-opts.c85
-rw-r--r--qemu/fsdev/qemu-fsdev.c101
-rw-r--r--qemu/fsdev/qemu-fsdev.h48
-rw-r--r--qemu/fsdev/virtfs-proxy-helper.c1166
-rw-r--r--qemu/fsdev/virtfs-proxy-helper.texi63
-rw-r--r--qemu/fsdev/virtio-9p-marshal.c323
-rw-r--r--qemu/fsdev/virtio-9p-marshal.h90
10 files changed, 2060 insertions, 0 deletions
diff --git a/qemu/fsdev/Makefile.objs b/qemu/fsdev/Makefile.objs
new file mode 100644
index 000000000..c27dad3f6
--- /dev/null
+++ b/qemu/fsdev/Makefile.objs
@@ -0,0 +1,12 @@
+ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
+# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
+# only pull in the actual virtio-9p device if we also enabled virtio.
+common-obj-y = qemu-fsdev.o virtio-9p-marshal.o
+else
+common-obj-y = qemu-fsdev-dummy.o
+endif
+common-obj-y += qemu-fsdev-opts.o
+
+# Toplevel always builds this; targets without virtio will put it in
+# common-obj-y
+common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o
diff --git a/qemu/fsdev/file-op-9p.h b/qemu/fsdev/file-op-9p.h
new file mode 100644
index 000000000..956fda091
--- /dev/null
+++ b/qemu/fsdev/file-op-9p.h
@@ -0,0 +1,150 @@
+/*
+ * Virtio 9p
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#ifndef _FILEOP_H
+#define _FILEOP_H
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <utime.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/vfs.h>
+
+#define SM_LOCAL_MODE_BITS 0600
+#define SM_LOCAL_DIR_MODE_BITS 0700
+
+typedef struct FsCred
+{
+ uid_t fc_uid;
+ gid_t fc_gid;
+ mode_t fc_mode;
+ dev_t fc_rdev;
+} FsCred;
+
+struct xattr_operations;
+struct FsContext;
+struct V9fsPath;
+
+typedef struct extended_ops {
+ int (*get_st_gen)(struct FsContext *, struct V9fsPath *,
+ mode_t, uint64_t *);
+} extended_ops;
+
+/* export flags */
+#define V9FS_IMMEDIATE_WRITEOUT 0x00000001
+#define V9FS_PATHNAME_FSCONTEXT 0x00000002
+/*
+ * uid/gid set on fileserver files
+ */
+#define V9FS_SM_PASSTHROUGH 0x00000004
+/*
+ * uid/gid part of xattr
+ */
+#define V9FS_SM_MAPPED 0x00000008
+/*
+ * Server will try to set uid/gid.
+ * On failure ignore the error.
+ */
+#define V9FS_SM_NONE 0x00000010
+/*
+ * uid/gid part of .virtfs_meatadata namespace
+ */
+#define V9FS_SM_MAPPED_FILE 0x00000020
+#define V9FS_RDONLY 0x00000040
+#define V9FS_PROXY_SOCK_FD 0x00000080
+#define V9FS_PROXY_SOCK_NAME 0x00000100
+
+#define V9FS_SEC_MASK 0x0000003C
+
+
+typedef struct FileOperations FileOperations;
+/*
+ * Structure to store the various fsdev's passed through command line.
+ */
+typedef struct FsDriverEntry {
+ char *fsdev_id;
+ char *path;
+ int export_flags;
+ FileOperations *ops;
+} FsDriverEntry;
+
+typedef struct FsContext
+{
+ uid_t uid;
+ char *fs_root;
+ int export_flags;
+ struct xattr_operations **xops;
+ struct extended_ops exops;
+ /* fs driver specific data */
+ void *private;
+} FsContext;
+
+typedef struct V9fsPath {
+ uint16_t size;
+ char *data;
+} V9fsPath;
+
+typedef union V9fsFidOpenState V9fsFidOpenState;
+
+void cred_init(FsCred *);
+
+struct FileOperations
+{
+ int (*parse_opts)(QemuOpts *, struct FsDriverEntry *);
+ int (*init)(struct FsContext *);
+ int (*lstat)(FsContext *, V9fsPath *, struct stat *);
+ ssize_t (*readlink)(FsContext *, V9fsPath *, char *, size_t);
+ int (*chmod)(FsContext *, V9fsPath *, FsCred *);
+ int (*chown)(FsContext *, V9fsPath *, FsCred *);
+ int (*mknod)(FsContext *, V9fsPath *, const char *, FsCred *);
+ int (*utimensat)(FsContext *, V9fsPath *, const struct timespec *);
+ int (*remove)(FsContext *, const char *);
+ int (*symlink)(FsContext *, const char *, V9fsPath *,
+ const char *, FsCred *);
+ int (*link)(FsContext *, V9fsPath *, V9fsPath *, const char *);
+ int (*setuid)(FsContext *, uid_t);
+ int (*close)(FsContext *, V9fsFidOpenState *);
+ int (*closedir)(FsContext *, V9fsFidOpenState *);
+ int (*opendir)(FsContext *, V9fsPath *, V9fsFidOpenState *);
+ int (*open)(FsContext *, V9fsPath *, int, V9fsFidOpenState *);
+ int (*open2)(FsContext *, V9fsPath *, const char *,
+ int, FsCred *, V9fsFidOpenState *);
+ void (*rewinddir)(FsContext *, V9fsFidOpenState *);
+ off_t (*telldir)(FsContext *, V9fsFidOpenState *);
+ int (*readdir_r)(FsContext *, V9fsFidOpenState *,
+ struct dirent *, struct dirent **);
+ void (*seekdir)(FsContext *, V9fsFidOpenState *, off_t);
+ ssize_t (*preadv)(FsContext *, V9fsFidOpenState *,
+ const struct iovec *, int, off_t);
+ ssize_t (*pwritev)(FsContext *, V9fsFidOpenState *,
+ const struct iovec *, int, off_t);
+ int (*mkdir)(FsContext *, V9fsPath *, const char *, FsCred *);
+ int (*fstat)(FsContext *, int, V9fsFidOpenState *, struct stat *);
+ int (*rename)(FsContext *, const char *, const char *);
+ int (*truncate)(FsContext *, V9fsPath *, off_t);
+ int (*fsync)(FsContext *, int, V9fsFidOpenState *, int);
+ int (*statfs)(FsContext *s, V9fsPath *path, struct statfs *stbuf);
+ ssize_t (*lgetxattr)(FsContext *, V9fsPath *,
+ const char *, void *, size_t);
+ ssize_t (*llistxattr)(FsContext *, V9fsPath *, void *, size_t);
+ int (*lsetxattr)(FsContext *, V9fsPath *,
+ const char *, void *, size_t, int);
+ int (*lremovexattr)(FsContext *, V9fsPath *, const char *);
+ int (*name_to_path)(FsContext *, V9fsPath *, const char *, V9fsPath *);
+ int (*renameat)(FsContext *ctx, V9fsPath *olddir, const char *old_name,
+ V9fsPath *newdir, const char *new_name);
+ int (*unlinkat)(FsContext *ctx, V9fsPath *dir, const char *name, int flags);
+ void *opaque;
+};
+
+#endif
diff --git a/qemu/fsdev/qemu-fsdev-dummy.c b/qemu/fsdev/qemu-fsdev-dummy.c
new file mode 100644
index 000000000..7dc2630a7
--- /dev/null
+++ b/qemu/fsdev/qemu-fsdev-dummy.c
@@ -0,0 +1,22 @@
+/*
+ * Virtio 9p
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Gautham R Shenoy <ego@in.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#include <stdio.h>
+#include <string.h>
+#include "qemu-fsdev.h"
+#include "qemu/config-file.h"
+#include "qemu/module.h"
+
+int qemu_fsdev_add(QemuOpts *opts)
+{
+ return 0;
+}
diff --git a/qemu/fsdev/qemu-fsdev-opts.c b/qemu/fsdev/qemu-fsdev-opts.c
new file mode 100644
index 000000000..6311c7a7e
--- /dev/null
+++ b/qemu/fsdev/qemu-fsdev-opts.c
@@ -0,0 +1,85 @@
+/*
+ * Virtio 9p
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+
+static QemuOptsList qemu_fsdev_opts = {
+ .name = "fsdev",
+ .implied_opt_name = "fsdriver",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_fsdev_opts.head),
+ .desc = {
+ {
+ .name = "fsdriver",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "security_model",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "writeout",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "readonly",
+ .type = QEMU_OPT_BOOL,
+
+ }, {
+ .name = "socket",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "sock_fd",
+ .type = QEMU_OPT_NUMBER,
+ },
+
+ { /*End of list */ }
+ },
+};
+
+static QemuOptsList qemu_virtfs_opts = {
+ .name = "virtfs",
+ .implied_opt_name = "fsdriver",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_virtfs_opts.head),
+ .desc = {
+ {
+ .name = "fsdriver",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "mount_tag",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "security_model",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "writeout",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "readonly",
+ .type = QEMU_OPT_BOOL,
+ }, {
+ .name = "socket",
+ .type = QEMU_OPT_STRING,
+ }, {
+ .name = "sock_fd",
+ .type = QEMU_OPT_NUMBER,
+ },
+
+ { /*End of list */ }
+ },
+};
+
+static void fsdev_register_config(void)
+{
+ qemu_add_opts(&qemu_fsdev_opts);
+ qemu_add_opts(&qemu_virtfs_opts);
+}
+machine_init(fsdev_register_config);
diff --git a/qemu/fsdev/qemu-fsdev.c b/qemu/fsdev/qemu-fsdev.c
new file mode 100644
index 000000000..ccfec139a
--- /dev/null
+++ b/qemu/fsdev/qemu-fsdev.c
@@ -0,0 +1,101 @@
+/*
+ * Virtio 9p
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Gautham R Shenoy <ego@in.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#include <stdio.h>
+#include <string.h>
+#include "qemu-fsdev.h"
+#include "qemu/queue.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+
+static QTAILQ_HEAD(FsDriverEntry_head, FsDriverListEntry) fsdriver_entries =
+ QTAILQ_HEAD_INITIALIZER(fsdriver_entries);
+
+static FsDriverTable FsDrivers[] = {
+ { .name = "local", .ops = &local_ops},
+#ifdef CONFIG_OPEN_BY_HANDLE
+ { .name = "handle", .ops = &handle_ops},
+#endif
+ { .name = "synth", .ops = &synth_ops},
+ { .name = "proxy", .ops = &proxy_ops},
+};
+
+int qemu_fsdev_add(QemuOpts *opts)
+{
+ int i;
+ struct FsDriverListEntry *fsle;
+ const char *fsdev_id = qemu_opts_id(opts);
+ const char *fsdriver = qemu_opt_get(opts, "fsdriver");
+ const char *writeout = qemu_opt_get(opts, "writeout");
+ bool ro = qemu_opt_get_bool(opts, "readonly", 0);
+
+ if (!fsdev_id) {
+ fprintf(stderr, "fsdev: No id specified\n");
+ return -1;
+ }
+
+ if (fsdriver) {
+ for (i = 0; i < ARRAY_SIZE(FsDrivers); i++) {
+ if (strcmp(FsDrivers[i].name, fsdriver) == 0) {
+ break;
+ }
+ }
+
+ if (i == ARRAY_SIZE(FsDrivers)) {
+ fprintf(stderr, "fsdev: fsdriver %s not found\n", fsdriver);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "fsdev: No fsdriver specified\n");
+ return -1;
+ }
+
+ fsle = g_malloc0(sizeof(*fsle));
+ fsle->fse.fsdev_id = g_strdup(fsdev_id);
+ fsle->fse.ops = FsDrivers[i].ops;
+ if (writeout) {
+ if (!strcmp(writeout, "immediate")) {
+ fsle->fse.export_flags |= V9FS_IMMEDIATE_WRITEOUT;
+ }
+ }
+ if (ro) {
+ fsle->fse.export_flags |= V9FS_RDONLY;
+ } else {
+ fsle->fse.export_flags &= ~V9FS_RDONLY;
+ }
+
+ if (fsle->fse.ops->parse_opts) {
+ if (fsle->fse.ops->parse_opts(opts, &fsle->fse)) {
+ g_free(fsle->fse.fsdev_id);
+ g_free(fsle);
+ return -1;
+ }
+ }
+
+ QTAILQ_INSERT_TAIL(&fsdriver_entries, fsle, next);
+ return 0;
+}
+
+FsDriverEntry *get_fsdev_fsentry(char *id)
+{
+ if (id) {
+ struct FsDriverListEntry *fsle;
+
+ QTAILQ_FOREACH(fsle, &fsdriver_entries, next) {
+ if (strcmp(fsle->fse.fsdev_id, id) == 0) {
+ return &fsle->fse;
+ }
+ }
+ }
+ return NULL;
+}
diff --git a/qemu/fsdev/qemu-fsdev.h b/qemu/fsdev/qemu-fsdev.h
new file mode 100644
index 000000000..9fa45bf51
--- /dev/null
+++ b/qemu/fsdev/qemu-fsdev.h
@@ -0,0 +1,48 @@
+/*
+ * Virtio 9p
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Gautham R Shenoy <ego@in.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_FSDEV_H
+#define QEMU_FSDEV_H
+#include "qemu/option.h"
+#include "file-op-9p.h"
+
+
+/*
+ * A table to store the various file systems and their callback operations.
+ * -----------------
+ * fstype | ops
+ * -----------------
+ * local | local_ops
+ * . |
+ * . |
+ * . |
+ * . |
+ * -----------------
+ * etc
+ */
+typedef struct FsDriverTable {
+ const char *name;
+ FileOperations *ops;
+} FsDriverTable;
+
+typedef struct FsDriverListEntry {
+ FsDriverEntry fse;
+ QTAILQ_ENTRY(FsDriverListEntry) next;
+} FsDriverListEntry;
+
+int qemu_fsdev_add(QemuOpts *opts);
+FsDriverEntry *get_fsdev_fsentry(char *id);
+extern FileOperations local_ops;
+extern FileOperations handle_ops;
+extern FileOperations synth_ops;
+extern FileOperations proxy_ops;
+#endif
diff --git a/qemu/fsdev/virtfs-proxy-helper.c b/qemu/fsdev/virtfs-proxy-helper.c
new file mode 100644
index 000000000..9097d15c9
--- /dev/null
+++ b/qemu/fsdev/virtfs-proxy-helper.c
@@ -0,0 +1,1166 @@
+/*
+ * Helper for QEMU Proxy FS Driver
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * M. Mohan Kumar <mohan@in.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <sys/resource.h>
+#include <getopt.h>
+#include <syslog.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/vfs.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#ifdef CONFIG_LINUX_MAGIC_H
+#include <linux/magic.h>
+#endif
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/xattr.h"
+#include "virtio-9p-marshal.h"
+#include "hw/9pfs/virtio-9p-proxy.h"
+#include "fsdev/virtio-9p-marshal.h"
+
+#define PROGNAME "virtfs-proxy-helper"
+
+#ifndef XFS_SUPER_MAGIC
+#define XFS_SUPER_MAGIC 0x58465342
+#endif
+#ifndef EXT2_SUPER_MAGIC
+#define EXT2_SUPER_MAGIC 0xEF53
+#endif
+#ifndef REISERFS_SUPER_MAGIC
+#define REISERFS_SUPER_MAGIC 0x52654973
+#endif
+#ifndef BTRFS_SUPER_MAGIC
+#define BTRFS_SUPER_MAGIC 0x9123683E
+#endif
+
+static struct option helper_opts[] = {
+ {"fd", required_argument, NULL, 'f'},
+ {"path", required_argument, NULL, 'p'},
+ {"nodaemon", no_argument, NULL, 'n'},
+ {"socket", required_argument, NULL, 's'},
+ {"uid", required_argument, NULL, 'u'},
+ {"gid", required_argument, NULL, 'g'},
+ {},
+};
+
+static bool is_daemon;
+static bool get_version; /* IOC getversion IOCTL supported */
+
+static void GCC_FMT_ATTR(2, 3) do_log(int loglevel, const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ if (is_daemon) {
+ vsyslog(LOG_CRIT, format, ap);
+ } else {
+ vfprintf(stderr, format, ap);
+ }
+ va_end(ap);
+}
+
+static void do_perror(const char *string)
+{
+ if (is_daemon) {
+ syslog(LOG_CRIT, "%s:%s", string, strerror(errno));
+ } else {
+ fprintf(stderr, "%s:%s\n", string, strerror(errno));
+ }
+}
+
+static int do_cap_set(cap_value_t *cap_value, int size, int reset)
+{
+ cap_t caps;
+ if (reset) {
+ /*
+ * Start with an empty set and set permitted and effective
+ */
+ caps = cap_init();
+ if (caps == NULL) {
+ do_perror("cap_init");
+ return -1;
+ }
+ if (cap_set_flag(caps, CAP_PERMITTED, size, cap_value, CAP_SET) < 0) {
+ do_perror("cap_set_flag");
+ goto error;
+ }
+ } else {
+ caps = cap_get_proc();
+ if (!caps) {
+ do_perror("cap_get_proc");
+ return -1;
+ }
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, size, cap_value, CAP_SET) < 0) {
+ do_perror("cap_set_flag");
+ goto error;
+ }
+ if (cap_set_proc(caps) < 0) {
+ do_perror("cap_set_proc");
+ goto error;
+ }
+ cap_free(caps);
+ return 0;
+
+error:
+ cap_free(caps);
+ return -1;
+}
+
+static int init_capabilities(void)
+{
+ /* helper needs following capabilities only */
+ cap_value_t cap_list[] = {
+ CAP_CHOWN,
+ CAP_DAC_OVERRIDE,
+ CAP_FOWNER,
+ CAP_FSETID,
+ CAP_SETGID,
+ CAP_MKNOD,
+ CAP_SETUID,
+ };
+ return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 1);
+}
+
+static int socket_read(int sockfd, void *buff, ssize_t size)
+{
+ ssize_t retval, total = 0;
+
+ while (size) {
+ retval = read(sockfd, buff, size);
+ if (retval == 0) {
+ return -EIO;
+ }
+ if (retval < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ return -errno;
+ }
+ size -= retval;
+ buff += retval;
+ total += retval;
+ }
+ return total;
+}
+
+static int socket_write(int sockfd, void *buff, ssize_t size)
+{
+ ssize_t retval, total = 0;
+
+ while (size) {
+ retval = write(sockfd, buff, size);
+ if (retval < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ return -errno;
+ }
+ size -= retval;
+ buff += retval;
+ total += retval;
+ }
+ return total;
+}
+
+static int read_request(int sockfd, struct iovec *iovec, ProxyHeader *header)
+{
+ int retval;
+
+ /*
+ * read the request header.
+ */
+ iovec->iov_len = 0;
+ retval = socket_read(sockfd, iovec->iov_base, PROXY_HDR_SZ);
+ if (retval < 0) {
+ return retval;
+ }
+ iovec->iov_len = PROXY_HDR_SZ;
+ retval = proxy_unmarshal(iovec, 0, "dd", &header->type, &header->size);
+ if (retval < 0) {
+ return retval;
+ }
+ /*
+ * We can't process message.size > PROXY_MAX_IO_SZ.
+ * Treat it as fatal error
+ */
+ if (header->size > PROXY_MAX_IO_SZ) {
+ return -ENOBUFS;
+ }
+ retval = socket_read(sockfd, iovec->iov_base + PROXY_HDR_SZ, header->size);
+ if (retval < 0) {
+ return retval;
+ }
+ iovec->iov_len += header->size;
+ return 0;
+}
+
+static int send_fd(int sockfd, int fd)
+{
+ struct msghdr msg;
+ struct iovec iov;
+ int retval, data;
+ struct cmsghdr *cmsg;
+ union MsgControl msg_control;
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ /* No ancillary data on error */
+ if (fd < 0) {
+ /* fd is really negative errno if the request failed */
+ data = fd;
+ } else {
+ data = V9FS_FD_VALID;
+ msg.msg_control = &msg_control;
+ msg.msg_controllen = sizeof(msg_control);
+
+ cmsg = &msg_control.cmsg;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+ }
+
+ do {
+ retval = sendmsg(sockfd, &msg, 0);
+ } while (retval < 0 && errno == EINTR);
+ if (fd >= 0) {
+ close(fd);
+ }
+ if (retval < 0) {
+ return retval;
+ }
+ return 0;
+}
+
+static int send_status(int sockfd, struct iovec *iovec, int status)
+{
+ ProxyHeader header;
+ int retval, msg_size;
+
+ if (status < 0) {
+ header.type = T_ERROR;
+ } else {
+ header.type = T_SUCCESS;
+ }
+ header.size = sizeof(status);
+ /*
+ * marshal the return status. We don't check error.
+ * because we are sure we have enough space for the status
+ */
+ msg_size = proxy_marshal(iovec, 0, "ddd", header.type,
+ header.size, status);
+ if (msg_size < 0) {
+ return msg_size;
+ }
+ retval = socket_write(sockfd, iovec->iov_base, msg_size);
+ if (retval < 0) {
+ return retval;
+ }
+ return 0;
+}
+
+/*
+ * from man 7 capabilities, section
+ * Effect of User ID Changes on Capabilities:
+ * If the effective user ID is changed from nonzero to 0, then the permitted
+ * set is copied to the effective set. If the effective user ID is changed
+ * from 0 to nonzero, then all capabilities are are cleared from the effective
+ * set.
+ *
+ * The setfsuid/setfsgid man pages warn that changing the effective user ID may
+ * expose the program to unwanted signals, but this is not true anymore: for an
+ * unprivileged (without CAP_KILL) program to send a signal, the real or
+ * effective user ID of the sending process must equal the real or saved user
+ * ID of the target process. Even when dropping privileges, it is enough to
+ * keep the saved UID to a "privileged" value and virtfs-proxy-helper won't
+ * be exposed to signals. So just use setresuid/setresgid.
+ */
+static int setugid(int uid, int gid, int *suid, int *sgid)
+{
+ int retval;
+
+ /*
+ * We still need DAC_OVERRIDE because we don't change
+ * supplementary group ids, and hence may be subjected DAC rules
+ */
+ cap_value_t cap_list[] = {
+ CAP_DAC_OVERRIDE,
+ };
+
+ *suid = geteuid();
+ *sgid = getegid();
+
+ if (setresgid(-1, gid, *sgid) == -1) {
+ retval = -errno;
+ goto err_out;
+ }
+
+ if (setresuid(-1, uid, *suid) == -1) {
+ retval = -errno;
+ goto err_sgid;
+ }
+
+ if (uid != 0 || gid != 0) {
+ if (do_cap_set(cap_list, ARRAY_SIZE(cap_list), 0) < 0) {
+ retval = -errno;
+ goto err_suid;
+ }
+ }
+ return 0;
+
+err_suid:
+ if (setresuid(-1, *suid, *suid) == -1) {
+ abort();
+ }
+err_sgid:
+ if (setresgid(-1, *sgid, *sgid) == -1) {
+ abort();
+ }
+err_out:
+ return retval;
+}
+
+/*
+ * This is used to reset the ugid back with the saved values
+ * There is nothing much we can do checking error values here.
+ */
+static void resetugid(int suid, int sgid)
+{
+ if (setresgid(-1, sgid, sgid) == -1) {
+ abort();
+ }
+ if (setresuid(-1, suid, suid) == -1) {
+ abort();
+ }
+}
+
+/*
+ * send response in two parts
+ * 1) ProxyHeader
+ * 2) Response or error status
+ * This function should be called with marshaled response
+ * send_response constructs header part and error part only.
+ * send response sends {ProxyHeader,Response} if the request was success
+ * otherwise sends {ProxyHeader,error status}
+ */
+static int send_response(int sock, struct iovec *iovec, int size)
+{
+ int retval;
+ ProxyHeader header;
+
+ /*
+ * If response size exceeds available iovec->iov_len,
+ * we return ENOBUFS
+ */
+ if (size > PROXY_MAX_IO_SZ) {
+ size = -ENOBUFS;
+ }
+
+ if (size < 0) {
+ /*
+ * In case of error we would not have got the error encoded
+ * already so encode the error here.
+ */
+ header.type = T_ERROR;
+ header.size = sizeof(size);
+ proxy_marshal(iovec, PROXY_HDR_SZ, "d", size);
+ } else {
+ header.type = T_SUCCESS;
+ header.size = size;
+ }
+ proxy_marshal(iovec, 0, "dd", header.type, header.size);
+ retval = socket_write(sock, iovec->iov_base, header.size + PROXY_HDR_SZ);
+ if (retval < 0) {
+ return retval;
+ }
+ return 0;
+}
+
+/*
+ * gets generation number
+ * returns -errno on failure and sizeof(generation number) on success
+ */
+static int do_getversion(struct iovec *iovec, struct iovec *out_iovec)
+{
+ uint64_t version;
+ int retval = -ENOTTY;
+#ifdef FS_IOC_GETVERSION
+ int fd;
+ V9fsString path;
+#endif
+
+
+ /* no need to issue ioctl */
+ if (!get_version) {
+ version = 0;
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "q", version);
+ return retval;
+ }
+#ifdef FS_IOC_GETVERSION
+ retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "s", &path);
+ if (retval < 0) {
+ return retval;
+ }
+
+ fd = open(path.data, O_RDONLY);
+ if (fd < 0) {
+ retval = -errno;
+ goto err_out;
+ }
+ if (ioctl(fd, FS_IOC_GETVERSION, &version) < 0) {
+ retval = -errno;
+ } else {
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "q", version);
+ }
+ close(fd);
+err_out:
+ v9fs_string_free(&path);
+#endif
+ return retval;
+}
+
+static int do_getxattr(int type, struct iovec *iovec, struct iovec *out_iovec)
+{
+ int size = 0, offset, retval;
+ V9fsString path, name, xattr;
+
+ v9fs_string_init(&xattr);
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "ds", &size, &path);
+ if (retval < 0) {
+ return retval;
+ }
+ offset = PROXY_HDR_SZ + retval;
+
+ if (size) {
+ xattr.data = g_malloc(size);
+ xattr.size = size;
+ }
+ switch (type) {
+ case T_LGETXATTR:
+ v9fs_string_init(&name);
+ retval = proxy_unmarshal(iovec, offset, "s", &name);
+ if (retval > 0) {
+ retval = lgetxattr(path.data, name.data, xattr.data, size);
+ if (retval < 0) {
+ retval = -errno;
+ } else {
+ xattr.size = retval;
+ }
+ }
+ v9fs_string_free(&name);
+ break;
+ case T_LLISTXATTR:
+ retval = llistxattr(path.data, xattr.data, size);
+ if (retval < 0) {
+ retval = -errno;
+ } else {
+ xattr.size = retval;
+ }
+ break;
+ }
+ if (retval < 0) {
+ goto err_out;
+ }
+
+ if (!size) {
+ proxy_marshal(out_iovec, PROXY_HDR_SZ, "d", retval);
+ retval = sizeof(retval);
+ } else {
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "s", &xattr);
+ }
+err_out:
+ v9fs_string_free(&xattr);
+ v9fs_string_free(&path);
+ return retval;
+}
+
+static void stat_to_prstat(ProxyStat *pr_stat, struct stat *stat)
+{
+ memset(pr_stat, 0, sizeof(*pr_stat));
+ pr_stat->st_dev = stat->st_dev;
+ pr_stat->st_ino = stat->st_ino;
+ pr_stat->st_nlink = stat->st_nlink;
+ pr_stat->st_mode = stat->st_mode;
+ pr_stat->st_uid = stat->st_uid;
+ pr_stat->st_gid = stat->st_gid;
+ pr_stat->st_rdev = stat->st_rdev;
+ pr_stat->st_size = stat->st_size;
+ pr_stat->st_blksize = stat->st_blksize;
+ pr_stat->st_blocks = stat->st_blocks;
+ pr_stat->st_atim_sec = stat->st_atim.tv_sec;
+ pr_stat->st_atim_nsec = stat->st_atim.tv_nsec;
+ pr_stat->st_mtim_sec = stat->st_mtim.tv_sec;
+ pr_stat->st_mtim_nsec = stat->st_mtim.tv_nsec;
+ pr_stat->st_ctim_sec = stat->st_ctim.tv_sec;
+ pr_stat->st_ctim_nsec = stat->st_ctim.tv_nsec;
+}
+
+static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, struct statfs *stfs)
+{
+ memset(pr_stfs, 0, sizeof(*pr_stfs));
+ pr_stfs->f_type = stfs->f_type;
+ pr_stfs->f_bsize = stfs->f_bsize;
+ pr_stfs->f_blocks = stfs->f_blocks;
+ pr_stfs->f_bfree = stfs->f_bfree;
+ pr_stfs->f_bavail = stfs->f_bavail;
+ pr_stfs->f_files = stfs->f_files;
+ pr_stfs->f_ffree = stfs->f_ffree;
+ pr_stfs->f_fsid[0] = stfs->f_fsid.__val[0];
+ pr_stfs->f_fsid[1] = stfs->f_fsid.__val[1];
+ pr_stfs->f_namelen = stfs->f_namelen;
+ pr_stfs->f_frsize = stfs->f_frsize;
+}
+
+/*
+ * Gets stat/statfs information and packs in out_iovec structure
+ * on success returns number of bytes packed in out_iovec struture
+ * otherwise returns -errno
+ */
+static int do_stat(int type, struct iovec *iovec, struct iovec *out_iovec)
+{
+ int retval;
+ V9fsString path;
+ ProxyStat pr_stat;
+ ProxyStatFS pr_stfs;
+ struct stat st_buf;
+ struct statfs stfs_buf;
+
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "s", &path);
+ if (retval < 0) {
+ return retval;
+ }
+
+ switch (type) {
+ case T_LSTAT:
+ retval = lstat(path.data, &st_buf);
+ if (retval < 0) {
+ retval = -errno;
+ } else {
+ stat_to_prstat(&pr_stat, &st_buf);
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ,
+ "qqqdddqqqqqqqqqq", pr_stat.st_dev,
+ pr_stat.st_ino, pr_stat.st_nlink,
+ pr_stat.st_mode, pr_stat.st_uid,
+ pr_stat.st_gid, pr_stat.st_rdev,
+ pr_stat.st_size, pr_stat.st_blksize,
+ pr_stat.st_blocks,
+ pr_stat.st_atim_sec, pr_stat.st_atim_nsec,
+ pr_stat.st_mtim_sec, pr_stat.st_mtim_nsec,
+ pr_stat.st_ctim_sec, pr_stat.st_ctim_nsec);
+ }
+ break;
+ case T_STATFS:
+ retval = statfs(path.data, &stfs_buf);
+ if (retval < 0) {
+ retval = -errno;
+ } else {
+ statfs_to_prstatfs(&pr_stfs, &stfs_buf);
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ,
+ "qqqqqqqqqqq", pr_stfs.f_type,
+ pr_stfs.f_bsize, pr_stfs.f_blocks,
+ pr_stfs.f_bfree, pr_stfs.f_bavail,
+ pr_stfs.f_files, pr_stfs.f_ffree,
+ pr_stfs.f_fsid[0], pr_stfs.f_fsid[1],
+ pr_stfs.f_namelen, pr_stfs.f_frsize);
+ }
+ break;
+ }
+ v9fs_string_free(&path);
+ return retval;
+}
+
+static int do_readlink(struct iovec *iovec, struct iovec *out_iovec)
+{
+ char *buffer;
+ int size, retval;
+ V9fsString target, path;
+
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sd", &path, &size);
+ if (retval < 0) {
+ v9fs_string_free(&path);
+ return retval;
+ }
+ buffer = g_malloc(size);
+ v9fs_string_init(&target);
+ retval = readlink(path.data, buffer, size - 1);
+ if (retval > 0) {
+ buffer[retval] = '\0';
+ v9fs_string_sprintf(&target, "%s", buffer);
+ retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "s", &target);
+ } else {
+ retval = -errno;
+ }
+ g_free(buffer);
+ v9fs_string_free(&target);
+ v9fs_string_free(&path);
+ return retval;
+}
+
+/*
+ * create other filesystem objects and send 0 on success
+ * return -errno on error
+ */
+static int do_create_others(int type, struct iovec *iovec)
+{
+ dev_t rdev;
+ int retval = 0;
+ int offset = PROXY_HDR_SZ;
+ V9fsString oldpath, path;
+ int mode, uid, gid, cur_uid, cur_gid;
+
+ v9fs_string_init(&path);
+ v9fs_string_init(&oldpath);
+
+ retval = proxy_unmarshal(iovec, offset, "dd", &uid, &gid);
+ if (retval < 0) {
+ return retval;
+ }
+ offset += retval;
+ retval = setugid(uid, gid, &cur_uid, &cur_gid);
+ if (retval < 0) {
+ goto unmarshal_err_out;
+ }
+ switch (type) {
+ case T_MKNOD:
+ retval = proxy_unmarshal(iovec, offset, "sdq", &path, &mode, &rdev);
+ if (retval < 0) {
+ goto err_out;
+ }
+ retval = mknod(path.data, mode, rdev);
+ break;
+ case T_MKDIR:
+ retval = proxy_unmarshal(iovec, offset, "sd", &path, &mode);
+ if (retval < 0) {
+ goto err_out;
+ }
+ retval = mkdir(path.data, mode);
+ break;
+ case T_SYMLINK:
+ retval = proxy_unmarshal(iovec, offset, "ss", &oldpath, &path);
+ if (retval < 0) {
+ goto err_out;
+ }
+ retval = symlink(oldpath.data, path.data);
+ break;
+ }
+ if (retval < 0) {
+ retval = -errno;
+ }
+
+err_out:
+ resetugid(cur_uid, cur_gid);
+unmarshal_err_out:
+ v9fs_string_free(&path);
+ v9fs_string_free(&oldpath);
+ return retval;
+}
+
+/*
+ * create a file and send fd on success
+ * return -errno on error
+ */
+static int do_create(struct iovec *iovec)
+{
+ int ret;
+ V9fsString path;
+ int flags, mode, uid, gid, cur_uid, cur_gid;
+
+ v9fs_string_init(&path);
+ ret = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sdddd",
+ &path, &flags, &mode, &uid, &gid);
+ if (ret < 0) {
+ goto unmarshal_err_out;
+ }
+ ret = setugid(uid, gid, &cur_uid, &cur_gid);
+ if (ret < 0) {
+ goto unmarshal_err_out;
+ }
+ ret = open(path.data, flags, mode);
+ if (ret < 0) {
+ ret = -errno;
+ }
+
+ resetugid(cur_uid, cur_gid);
+unmarshal_err_out:
+ v9fs_string_free(&path);
+ return ret;
+}
+
+/*
+ * open a file and send fd on success
+ * return -errno on error
+ */
+static int do_open(struct iovec *iovec)
+{
+ int flags, ret;
+ V9fsString path;
+
+ v9fs_string_init(&path);
+ ret = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sd", &path, &flags);
+ if (ret < 0) {
+ goto err_out;
+ }
+ ret = open(path.data, flags);
+ if (ret < 0) {
+ ret = -errno;
+ }
+err_out:
+ v9fs_string_free(&path);
+ return ret;
+}
+
+/* create unix domain socket and return the descriptor */
+static int proxy_socket(const char *path, uid_t uid, gid_t gid)
+{
+ int sock, client;
+ struct sockaddr_un proxy, qemu;
+ socklen_t size;
+
+ /* requested socket already exists, refuse to start */
+ if (!access(path, F_OK)) {
+ do_log(LOG_CRIT, "socket already exists\n");
+ return -1;
+ }
+
+ if (strlen(path) >= sizeof(proxy.sun_path)) {
+ do_log(LOG_CRIT, "UNIX domain socket path exceeds %zu characters\n",
+ sizeof(proxy.sun_path));
+ return -1;
+ }
+
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ do_perror("socket");
+ return -1;
+ }
+
+ /* mask other part of mode bits */
+ umask(7);
+
+ proxy.sun_family = AF_UNIX;
+ strcpy(proxy.sun_path, path);
+ if (bind(sock, (struct sockaddr *)&proxy,
+ sizeof(struct sockaddr_un)) < 0) {
+ do_perror("bind");
+ goto error;
+ }
+ if (chown(proxy.sun_path, uid, gid) < 0) {
+ do_perror("chown");
+ goto error;
+ }
+ if (listen(sock, 1) < 0) {
+ do_perror("listen");
+ goto error;
+ }
+
+ size = sizeof(qemu);
+ client = accept(sock, (struct sockaddr *)&qemu, &size);
+ if (client < 0) {
+ do_perror("accept");
+ goto error;
+ }
+ close(sock);
+ return client;
+
+error:
+ close(sock);
+ return -1;
+}
+
+static void usage(char *prog)
+{
+ fprintf(stderr, "usage: %s\n"
+ " -p|--path <path> 9p path to export\n"
+ " {-f|--fd <socket-descriptor>} socket file descriptor to be used\n"
+ " {-s|--socket <socketname> socket file used for communication\n"
+ " \t-u|--uid <uid> -g|--gid <gid>} - uid:gid combination to give "
+ " access to this socket\n"
+ " \tNote: -s & -f can not be used together\n"
+ " [-n|--nodaemon] Run as a normal program\n",
+ basename(prog));
+}
+
+static int process_reply(int sock, int type,
+ struct iovec *out_iovec, int retval)
+{
+ switch (type) {
+ case T_OPEN:
+ case T_CREATE:
+ if (send_fd(sock, retval) < 0) {
+ return -1;
+ }
+ break;
+ case T_MKNOD:
+ case T_MKDIR:
+ case T_SYMLINK:
+ case T_LINK:
+ case T_CHMOD:
+ case T_CHOWN:
+ case T_TRUNCATE:
+ case T_UTIME:
+ case T_RENAME:
+ case T_REMOVE:
+ case T_LSETXATTR:
+ case T_LREMOVEXATTR:
+ if (send_status(sock, out_iovec, retval) < 0) {
+ return -1;
+ }
+ break;
+ case T_LSTAT:
+ case T_STATFS:
+ case T_READLINK:
+ case T_LGETXATTR:
+ case T_LLISTXATTR:
+ case T_GETVERSION:
+ if (send_response(sock, out_iovec, retval) < 0) {
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+static int process_requests(int sock)
+{
+ int flags;
+ int size = 0;
+ int retval = 0;
+ uint64_t offset;
+ ProxyHeader header;
+ int mode, uid, gid;
+ V9fsString name, value;
+ struct timespec spec[2];
+ V9fsString oldpath, path;
+ struct iovec in_iovec, out_iovec;
+
+ in_iovec.iov_base = g_malloc(PROXY_MAX_IO_SZ + PROXY_HDR_SZ);
+ in_iovec.iov_len = PROXY_MAX_IO_SZ + PROXY_HDR_SZ;
+ out_iovec.iov_base = g_malloc(PROXY_MAX_IO_SZ + PROXY_HDR_SZ);
+ out_iovec.iov_len = PROXY_MAX_IO_SZ + PROXY_HDR_SZ;
+
+ while (1) {
+ /*
+ * initialize the header type, so that we send
+ * response to proper request type.
+ */
+ header.type = 0;
+ retval = read_request(sock, &in_iovec, &header);
+ if (retval < 0) {
+ goto err_out;
+ }
+
+ switch (header.type) {
+ case T_OPEN:
+ retval = do_open(&in_iovec);
+ break;
+ case T_CREATE:
+ retval = do_create(&in_iovec);
+ break;
+ case T_MKNOD:
+ case T_MKDIR:
+ case T_SYMLINK:
+ retval = do_create_others(header.type, &in_iovec);
+ break;
+ case T_LINK:
+ v9fs_string_init(&path);
+ v9fs_string_init(&oldpath);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ,
+ "ss", &oldpath, &path);
+ if (retval > 0) {
+ retval = link(oldpath.data, path.data);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&oldpath);
+ v9fs_string_free(&path);
+ break;
+ case T_LSTAT:
+ case T_STATFS:
+ retval = do_stat(header.type, &in_iovec, &out_iovec);
+ break;
+ case T_READLINK:
+ retval = do_readlink(&in_iovec, &out_iovec);
+ break;
+ case T_CHMOD:
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ,
+ "sd", &path, &mode);
+ if (retval > 0) {
+ retval = chmod(path.data, mode);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ break;
+ case T_CHOWN:
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sdd", &path,
+ &uid, &gid);
+ if (retval > 0) {
+ retval = lchown(path.data, uid, gid);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ break;
+ case T_TRUNCATE:
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sq",
+ &path, &offset);
+ if (retval > 0) {
+ retval = truncate(path.data, offset);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ break;
+ case T_UTIME:
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sqqqq", &path,
+ &spec[0].tv_sec, &spec[0].tv_nsec,
+ &spec[1].tv_sec, &spec[1].tv_nsec);
+ if (retval > 0) {
+ retval = qemu_utimens(path.data, spec);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ break;
+ case T_RENAME:
+ v9fs_string_init(&path);
+ v9fs_string_init(&oldpath);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ,
+ "ss", &oldpath, &path);
+ if (retval > 0) {
+ retval = rename(oldpath.data, path.data);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&oldpath);
+ v9fs_string_free(&path);
+ break;
+ case T_REMOVE:
+ v9fs_string_init(&path);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "s", &path);
+ if (retval > 0) {
+ retval = remove(path.data);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ break;
+ case T_LGETXATTR:
+ case T_LLISTXATTR:
+ retval = do_getxattr(header.type, &in_iovec, &out_iovec);
+ break;
+ case T_LSETXATTR:
+ v9fs_string_init(&path);
+ v9fs_string_init(&name);
+ v9fs_string_init(&value);
+ retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sssdd", &path,
+ &name, &value, &size, &flags);
+ if (retval > 0) {
+ retval = lsetxattr(path.data,
+ name.data, value.data, size, flags);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ v9fs_string_free(&name);
+ v9fs_string_free(&value);
+ break;
+ case T_LREMOVEXATTR:
+ v9fs_string_init(&path);
+ v9fs_string_init(&name);
+ retval = proxy_unmarshal(&in_iovec,
+ PROXY_HDR_SZ, "ss", &path, &name);
+ if (retval > 0) {
+ retval = lremovexattr(path.data, name.data);
+ if (retval < 0) {
+ retval = -errno;
+ }
+ }
+ v9fs_string_free(&path);
+ v9fs_string_free(&name);
+ break;
+ case T_GETVERSION:
+ retval = do_getversion(&in_iovec, &out_iovec);
+ break;
+ default:
+ goto err_out;
+ break;
+ }
+
+ if (process_reply(sock, header.type, &out_iovec, retval) < 0) {
+ goto err_out;
+ }
+ }
+err_out:
+ g_free(in_iovec.iov_base);
+ g_free(out_iovec.iov_base);
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ int sock;
+ uid_t own_u;
+ gid_t own_g;
+ char *rpath = NULL;
+ char *sock_name = NULL;
+ struct stat stbuf;
+ int c, option_index;
+#ifdef FS_IOC_GETVERSION
+ int retval;
+ struct statfs st_fs;
+#endif
+
+ is_daemon = true;
+ sock = -1;
+ own_u = own_g = -1;
+ while (1) {
+ option_index = 0;
+ c = getopt_long(argc, argv, "p:nh?f:s:u:g:", helper_opts,
+ &option_index);
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case 'p':
+ rpath = g_strdup(optarg);
+ break;
+ case 'n':
+ is_daemon = false;
+ break;
+ case 'f':
+ sock = atoi(optarg);
+ break;
+ case 's':
+ sock_name = g_strdup(optarg);
+ break;
+ case 'u':
+ own_u = atoi(optarg);
+ break;
+ case 'g':
+ own_g = atoi(optarg);
+ break;
+ case '?':
+ case 'h':
+ default:
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* Parameter validation */
+ if ((sock_name == NULL && sock == -1) || rpath == NULL) {
+ fprintf(stderr, "socket, socket descriptor or path not specified\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ if (sock_name && sock != -1) {
+ fprintf(stderr, "both named socket and socket descriptor specified\n");
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (sock_name && (own_u == -1 || own_g == -1)) {
+ fprintf(stderr, "owner uid:gid not specified, ");
+ fprintf(stderr,
+ "owner uid:gid specifies who can access the socket file\n");
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (lstat(rpath, &stbuf) < 0) {
+ fprintf(stderr, "invalid path \"%s\" specified, %s\n",
+ rpath, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (!S_ISDIR(stbuf.st_mode)) {
+ fprintf(stderr, "specified path \"%s\" is not directory\n", rpath);
+ exit(EXIT_FAILURE);
+ }
+
+ if (is_daemon) {
+ if (daemon(0, 0) < 0) {
+ fprintf(stderr, "daemon call failed\n");
+ exit(EXIT_FAILURE);
+ }
+ openlog(PROGNAME, LOG_PID, LOG_DAEMON);
+ }
+
+ do_log(LOG_INFO, "Started\n");
+ if (sock_name) {
+ sock = proxy_socket(sock_name, own_u, own_g);
+ if (sock < 0) {
+ goto error;
+ }
+ }
+
+ get_version = false;
+#ifdef FS_IOC_GETVERSION
+ /* check whether underlying FS support IOC_GETVERSION */
+ retval = statfs(rpath, &st_fs);
+ if (!retval) {
+ switch (st_fs.f_type) {
+ case EXT2_SUPER_MAGIC:
+ case BTRFS_SUPER_MAGIC:
+ case REISERFS_SUPER_MAGIC:
+ case XFS_SUPER_MAGIC:
+ get_version = true;
+ break;
+ }
+ }
+#endif
+
+ if (chdir("/") < 0) {
+ do_perror("chdir");
+ goto error;
+ }
+ if (chroot(rpath) < 0) {
+ do_perror("chroot");
+ goto error;
+ }
+ umask(0);
+
+ if (init_capabilities() < 0) {
+ goto error;
+ }
+
+ process_requests(sock);
+error:
+ do_log(LOG_INFO, "Done\n");
+ closelog();
+ return 0;
+}
diff --git a/qemu/fsdev/virtfs-proxy-helper.texi b/qemu/fsdev/virtfs-proxy-helper.texi
new file mode 100644
index 000000000..e60e3b946
--- /dev/null
+++ b/qemu/fsdev/virtfs-proxy-helper.texi
@@ -0,0 +1,63 @@
+@example
+@c man begin SYNOPSIS
+usage: virtfs-proxy-helper options
+@c man end
+@end example
+
+@c man begin DESCRIPTION
+@table @description
+Pass-through security model in QEMU 9p server needs root privilege to do
+few file operations (like chown, chmod to any mode/uid:gid). There are two
+issues in pass-through security model
+
+1) TOCTTOU vulnerability: Following symbolic links in the server could
+provide access to files beyond 9p export path.
+
+2) Running QEMU with root privilege could be a security issue.
+
+To overcome above issues, following approach is used: A new filesytem
+type 'proxy' is introduced. Proxy FS uses chroot + socket combination
+for securing the vulnerability known with following symbolic links.
+Intention of adding a new filesystem type is to allow qemu to run
+in non-root mode, but doing privileged operations using socket IO.
+
+Proxy helper(a stand alone binary part of qemu) is invoked with
+root privileges. Proxy helper chroots into 9p export path and creates
+a socket pair or a named socket based on the command line parameter.
+QEMU and proxy helper communicate using this socket. QEMU proxy fs
+driver sends filesystem request to proxy helper and receives the
+response from it.
+
+Proxy helper is designed so that it can drop the root privilege with
+retaining capbilities needed for doing filesystem operations only.
+
+@end table
+@c man end
+
+@c man begin OPTIONS
+The following options are supported:
+@table @option
+@item -h
+@findex -h
+Display help and exit
+@item -p|--path path
+Path to export for proxy filesystem driver
+@item -f|--fd socket-id
+Use given file descriptor as socket descriptor for communicating with
+qemu proxy fs drier. Usually a helper like libvirt will create
+socketpair and pass one of the fds as parameter to -f|--fd
+@item -s|--socket socket-file
+Creates named socket file for communicating with qemu proxy fs driver
+@item -u|--uid uid -g|--gid gid
+uid:gid combination to give access to named socket file
+@item -n|--nodaemon
+Run as a normal program. By default program will run in daemon mode
+@end table
+@c man end
+
+@setfilename virtfs-proxy-helper
+@settitle QEMU 9p virtfs proxy filesystem helper
+
+@c man begin AUTHOR
+M. Mohan Kumar
+@c man end
diff --git a/qemu/fsdev/virtio-9p-marshal.c b/qemu/fsdev/virtio-9p-marshal.c
new file mode 100644
index 000000000..20f308b76
--- /dev/null
+++ b/qemu/fsdev/virtio-9p-marshal.c
@@ -0,0 +1,323 @@
+/*
+ * Virtio 9p backend
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <glib.h>
+#include <glib/gprintf.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/time.h>
+#include <utime.h>
+#include <sys/uio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include "qemu/compiler.h"
+#include "virtio-9p-marshal.h"
+#include "qemu/bswap.h"
+
+void v9fs_string_free(V9fsString *str)
+{
+ g_free(str->data);
+ str->data = NULL;
+ str->size = 0;
+}
+
+void v9fs_string_null(V9fsString *str)
+{
+ v9fs_string_free(str);
+}
+
+void GCC_FMT_ATTR(2, 3)
+v9fs_string_sprintf(V9fsString *str, const char *fmt, ...)
+{
+ va_list ap;
+
+ v9fs_string_free(str);
+
+ va_start(ap, fmt);
+ str->size = g_vasprintf(&str->data, fmt, ap);
+ va_end(ap);
+}
+
+void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs)
+{
+ v9fs_string_free(lhs);
+ v9fs_string_sprintf(lhs, "%s", rhs->data);
+}
+
+
+static ssize_t v9fs_packunpack(void *addr, struct iovec *sg, int sg_count,
+ size_t offset, size_t size, int pack)
+{
+ int i = 0;
+ size_t copied = 0;
+ size_t req_size = size;
+
+
+ for (i = 0; size && i < sg_count; i++) {
+ size_t len;
+ if (offset >= sg[i].iov_len) {
+ /* skip this sg */
+ offset -= sg[i].iov_len;
+ continue;
+ } else {
+ len = MIN(sg[i].iov_len - offset, size);
+ if (pack) {
+ memcpy(sg[i].iov_base + offset, addr, len);
+ } else {
+ memcpy(addr, sg[i].iov_base + offset, len);
+ }
+ size -= len;
+ copied += len;
+ addr += len;
+ if (size) {
+ offset = 0;
+ continue;
+ }
+ }
+ }
+ if (copied < req_size) {
+ /*
+ * We copied less that requested size. error out
+ */
+ return -ENOBUFS;
+ }
+ return copied;
+}
+
+static ssize_t v9fs_unpack(void *dst, struct iovec *out_sg, int out_num,
+ size_t offset, size_t size)
+{
+ return v9fs_packunpack(dst, out_sg, out_num, offset, size, 0);
+}
+
+ssize_t v9fs_pack(struct iovec *in_sg, int in_num, size_t offset,
+ const void *src, size_t size)
+{
+ return v9fs_packunpack((void *)src, in_sg, in_num, offset, size, 1);
+}
+
+ssize_t v9fs_unmarshal(struct iovec *out_sg, int out_num, size_t offset,
+ int bswap, const char *fmt, ...)
+{
+ int i;
+ va_list ap;
+ ssize_t copied = 0;
+ size_t old_offset = offset;
+
+ va_start(ap, fmt);
+ for (i = 0; fmt[i]; i++) {
+ switch (fmt[i]) {
+ case 'b': {
+ uint8_t *valp = va_arg(ap, uint8_t *);
+ copied = v9fs_unpack(valp, out_sg, out_num, offset, sizeof(*valp));
+ break;
+ }
+ case 'w': {
+ uint16_t val, *valp;
+ valp = va_arg(ap, uint16_t *);
+ copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val));
+ if (bswap) {
+ *valp = le16_to_cpu(val);
+ } else {
+ *valp = val;
+ }
+ break;
+ }
+ case 'd': {
+ uint32_t val, *valp;
+ valp = va_arg(ap, uint32_t *);
+ copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val));
+ if (bswap) {
+ *valp = le32_to_cpu(val);
+ } else {
+ *valp = val;
+ }
+ break;
+ }
+ case 'q': {
+ uint64_t val, *valp;
+ valp = va_arg(ap, uint64_t *);
+ copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val));
+ if (bswap) {
+ *valp = le64_to_cpu(val);
+ } else {
+ *valp = val;
+ }
+ break;
+ }
+ case 's': {
+ V9fsString *str = va_arg(ap, V9fsString *);
+ copied = v9fs_unmarshal(out_sg, out_num, offset, bswap,
+ "w", &str->size);
+ if (copied > 0) {
+ offset += copied;
+ str->data = g_malloc(str->size + 1);
+ copied = v9fs_unpack(str->data, out_sg, out_num, offset,
+ str->size);
+ if (copied > 0) {
+ str->data[str->size] = 0;
+ } else {
+ v9fs_string_free(str);
+ }
+ }
+ break;
+ }
+ case 'Q': {
+ V9fsQID *qidp = va_arg(ap, V9fsQID *);
+ copied = v9fs_unmarshal(out_sg, out_num, offset, bswap, "bdq",
+ &qidp->type, &qidp->version, &qidp->path);
+ break;
+ }
+ case 'S': {
+ V9fsStat *statp = va_arg(ap, V9fsStat *);
+ copied = v9fs_unmarshal(out_sg, out_num, offset, bswap,
+ "wwdQdddqsssssddd",
+ &statp->size, &statp->type, &statp->dev,
+ &statp->qid, &statp->mode, &statp->atime,
+ &statp->mtime, &statp->length,
+ &statp->name, &statp->uid, &statp->gid,
+ &statp->muid, &statp->extension,
+ &statp->n_uid, &statp->n_gid,
+ &statp->n_muid);
+ break;
+ }
+ case 'I': {
+ V9fsIattr *iattr = va_arg(ap, V9fsIattr *);
+ copied = v9fs_unmarshal(out_sg, out_num, offset, bswap,
+ "ddddqqqqq",
+ &iattr->valid, &iattr->mode,
+ &iattr->uid, &iattr->gid, &iattr->size,
+ &iattr->atime_sec, &iattr->atime_nsec,
+ &iattr->mtime_sec, &iattr->mtime_nsec);
+ break;
+ }
+ default:
+ break;
+ }
+ if (copied < 0) {
+ va_end(ap);
+ return copied;
+ }
+ offset += copied;
+ }
+ va_end(ap);
+
+ return offset - old_offset;
+}
+
+ssize_t v9fs_marshal(struct iovec *in_sg, int in_num, size_t offset,
+ int bswap, const char *fmt, ...)
+{
+ int i;
+ va_list ap;
+ ssize_t copied = 0;
+ size_t old_offset = offset;
+
+ va_start(ap, fmt);
+ for (i = 0; fmt[i]; i++) {
+ switch (fmt[i]) {
+ case 'b': {
+ uint8_t val = va_arg(ap, int);
+ copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val));
+ break;
+ }
+ case 'w': {
+ uint16_t val;
+ if (bswap) {
+ cpu_to_le16w(&val, va_arg(ap, int));
+ } else {
+ val = va_arg(ap, int);
+ }
+ copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val));
+ break;
+ }
+ case 'd': {
+ uint32_t val;
+ if (bswap) {
+ cpu_to_le32w(&val, va_arg(ap, uint32_t));
+ } else {
+ val = va_arg(ap, uint32_t);
+ }
+ copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val));
+ break;
+ }
+ case 'q': {
+ uint64_t val;
+ if (bswap) {
+ cpu_to_le64w(&val, va_arg(ap, uint64_t));
+ } else {
+ val = va_arg(ap, uint64_t);
+ }
+ copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val));
+ break;
+ }
+ case 's': {
+ V9fsString *str = va_arg(ap, V9fsString *);
+ copied = v9fs_marshal(in_sg, in_num, offset, bswap,
+ "w", str->size);
+ if (copied > 0) {
+ offset += copied;
+ copied = v9fs_pack(in_sg, in_num, offset, str->data, str->size);
+ }
+ break;
+ }
+ case 'Q': {
+ V9fsQID *qidp = va_arg(ap, V9fsQID *);
+ copied = v9fs_marshal(in_sg, in_num, offset, bswap, "bdq",
+ qidp->type, qidp->version, qidp->path);
+ break;
+ }
+ case 'S': {
+ V9fsStat *statp = va_arg(ap, V9fsStat *);
+ copied = v9fs_marshal(in_sg, in_num, offset, bswap,
+ "wwdQdddqsssssddd",
+ statp->size, statp->type, statp->dev,
+ &statp->qid, statp->mode, statp->atime,
+ statp->mtime, statp->length, &statp->name,
+ &statp->uid, &statp->gid, &statp->muid,
+ &statp->extension, statp->n_uid,
+ statp->n_gid, statp->n_muid);
+ break;
+ }
+ case 'A': {
+ V9fsStatDotl *statp = va_arg(ap, V9fsStatDotl *);
+ copied = v9fs_marshal(in_sg, in_num, offset, bswap,
+ "qQdddqqqqqqqqqqqqqqq",
+ statp->st_result_mask,
+ &statp->qid, statp->st_mode,
+ statp->st_uid, statp->st_gid,
+ statp->st_nlink, statp->st_rdev,
+ statp->st_size, statp->st_blksize,
+ statp->st_blocks, statp->st_atime_sec,
+ statp->st_atime_nsec, statp->st_mtime_sec,
+ statp->st_mtime_nsec, statp->st_ctime_sec,
+ statp->st_ctime_nsec, statp->st_btime_sec,
+ statp->st_btime_nsec, statp->st_gen,
+ statp->st_data_version);
+ break;
+ }
+ default:
+ break;
+ }
+ if (copied < 0) {
+ va_end(ap);
+ return copied;
+ }
+ offset += copied;
+ }
+ va_end(ap);
+
+ return offset - old_offset;
+}
diff --git a/qemu/fsdev/virtio-9p-marshal.h b/qemu/fsdev/virtio-9p-marshal.h
new file mode 100644
index 000000000..5df65a835
--- /dev/null
+++ b/qemu/fsdev/virtio-9p-marshal.h
@@ -0,0 +1,90 @@
+#ifndef _QEMU_VIRTIO_9P_MARSHAL_H
+#define _QEMU_VIRTIO_9P_MARSHAL_H
+
+typedef struct V9fsString
+{
+ uint16_t size;
+ char *data;
+} V9fsString;
+
+typedef struct V9fsQID
+{
+ int8_t type;
+ int32_t version;
+ int64_t path;
+} V9fsQID;
+
+typedef struct V9fsStat
+{
+ int16_t size;
+ int16_t type;
+ int32_t dev;
+ V9fsQID qid;
+ int32_t mode;
+ int32_t atime;
+ int32_t mtime;
+ int64_t length;
+ V9fsString name;
+ V9fsString uid;
+ V9fsString gid;
+ V9fsString muid;
+ /* 9p2000.u */
+ V9fsString extension;
+ int32_t n_uid;
+ int32_t n_gid;
+ int32_t n_muid;
+} V9fsStat;
+
+typedef struct V9fsIattr
+{
+ int32_t valid;
+ int32_t mode;
+ int32_t uid;
+ int32_t gid;
+ int64_t size;
+ int64_t atime_sec;
+ int64_t atime_nsec;
+ int64_t mtime_sec;
+ int64_t mtime_nsec;
+} V9fsIattr;
+
+typedef struct V9fsStatDotl {
+ uint64_t st_result_mask;
+ V9fsQID qid;
+ uint32_t st_mode;
+ uint32_t st_uid;
+ uint32_t st_gid;
+ uint64_t st_nlink;
+ uint64_t st_rdev;
+ uint64_t st_size;
+ uint64_t st_blksize;
+ uint64_t st_blocks;
+ uint64_t st_atime_sec;
+ uint64_t st_atime_nsec;
+ uint64_t st_mtime_sec;
+ uint64_t st_mtime_nsec;
+ uint64_t st_ctime_sec;
+ uint64_t st_ctime_nsec;
+ uint64_t st_btime_sec;
+ uint64_t st_btime_nsec;
+ uint64_t st_gen;
+ uint64_t st_data_version;
+} V9fsStatDotl;
+
+static inline void v9fs_string_init(V9fsString *str)
+{
+ str->data = NULL;
+ str->size = 0;
+}
+extern void v9fs_string_free(V9fsString *str);
+extern void v9fs_string_null(V9fsString *str);
+extern void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...);
+extern void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs);
+
+ssize_t v9fs_pack(struct iovec *in_sg, int in_num, size_t offset,
+ const void *src, size_t size);
+ssize_t v9fs_unmarshal(struct iovec *out_sg, int out_num, size_t offset,
+ int bswap, const char *fmt, ...);
+ssize_t v9fs_marshal(struct iovec *in_sg, int in_num, size_t offset,
+ int bswap, const char *fmt, ...);
+#endif