diff options
author | Don Dugger <n0ano@n0ano.com> | 2016-06-03 03:33:22 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@172.30.200.206> | 2016-06-03 03:33:23 +0000 |
commit | da27230f80795d0028333713f036d44c53cb0e68 (patch) | |
tree | b3d379eaf000adf72b36cb01cdf4d79c3e3f064c /qemu/net | |
parent | 0e68cb048bb8aadb14675f5d4286d8ab2fc35449 (diff) | |
parent | 437fd90c0250dee670290f9b714253671a990160 (diff) |
Merge "These changes are the raw update to qemu-2.6."
Diffstat (limited to 'qemu/net')
-rw-r--r-- | qemu/net/Makefile.objs | 3 | ||||
-rw-r--r-- | qemu/net/checksum.c | 11 | ||||
-rw-r--r-- | qemu/net/dump.c | 246 | ||||
-rw-r--r-- | qemu/net/eth.c | 1 | ||||
-rw-r--r-- | qemu/net/filter-buffer.c | 212 | ||||
-rw-r--r-- | qemu/net/filter-mirror.c | 427 | ||||
-rw-r--r-- | qemu/net/filter.c | 275 | ||||
-rw-r--r-- | qemu/net/hub.c | 5 | ||||
-rw-r--r-- | qemu/net/l2tpv3.c | 8 | ||||
-rw-r--r-- | qemu/net/net.c | 298 | ||||
-rw-r--r-- | qemu/net/netmap.c | 163 | ||||
-rw-r--r-- | qemu/net/queue.c | 25 | ||||
-rw-r--r-- | qemu/net/slirp.c | 119 | ||||
-rw-r--r-- | qemu/net/socket.c | 26 | ||||
-rw-r--r-- | qemu/net/tap-aix.c | 3 | ||||
-rw-r--r-- | qemu/net/tap-bsd.c | 42 | ||||
-rw-r--r-- | qemu/net/tap-haiku.c | 3 | ||||
-rw-r--r-- | qemu/net/tap-linux.c | 8 | ||||
-rw-r--r-- | qemu/net/tap-linux.h | 1 | ||||
-rw-r--r-- | qemu/net/tap-solaris.c | 4 | ||||
-rw-r--r-- | qemu/net/tap-win32.c | 55 | ||||
-rw-r--r-- | qemu/net/tap.c | 17 | ||||
-rw-r--r-- | qemu/net/util.c | 3 | ||||
-rw-r--r-- | qemu/net/util.h | 1 | ||||
-rw-r--r-- | qemu/net/vde.c | 6 | ||||
-rw-r--r-- | qemu/net/vhost-user.c | 185 |
26 files changed, 1799 insertions, 348 deletions
diff --git a/qemu/net/Makefile.objs b/qemu/net/Makefile.objs index ec19cb31d..b7c22fddb 100644 --- a/qemu/net/Makefile.objs +++ b/qemu/net/Makefile.objs @@ -13,3 +13,6 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o common-obj-$(CONFIG_SLIRP) += slirp.o common-obj-$(CONFIG_VDE) += vde.o common-obj-$(CONFIG_NETMAP) += netmap.o +common-obj-y += filter.o +common-obj-y += filter-buffer.o +common-obj-y += filter-mirror.o diff --git a/qemu/net/checksum.c b/qemu/net/checksum.c index 14c08550e..d0fa424cc 100644 --- a/qemu/net/checksum.c +++ b/qemu/net/checksum.c @@ -15,6 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "net/checksum.h" @@ -59,6 +60,11 @@ void net_checksum_calculate(uint8_t *data, int length) int hlen, plen, proto, csum_offset; uint16_t csum; + /* Ensure data has complete L2 & L3 headers. */ + if (length < 14 + 20) { + return; + } + if ((data[14] & 0xf0) != 0x40) return; /* not IPv4 */ hlen = (data[14] & 0x0f) * 4; @@ -76,8 +82,9 @@ void net_checksum_calculate(uint8_t *data, int length) return; } - if (plen < csum_offset+2) - return; + if (plen < csum_offset + 2 || 14 + hlen + plen > length) { + return; + } data[14+hlen+csum_offset] = 0; data[14+hlen+csum_offset+1] = 0; diff --git a/qemu/net/dump.c b/qemu/net/dump.c index 02c8064be..41f7673ef 100644 --- a/qemu/net/dump.c +++ b/qemu/net/dump.c @@ -22,15 +22,18 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "clients.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/error-report.h" +#include "qemu/iov.h" #include "qemu/log.h" #include "qemu/timer.h" -#include "hub.h" +#include "qapi/visitor.h" +#include "net/filter.h" typedef struct DumpState { - NetClientState nc; int64_t start_ts; int fd; int pcap_caplen; @@ -57,28 +60,33 @@ struct pcap_sf_pkthdr { uint32_t len; }; -static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size) +static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt) { - DumpState *s = DO_UPCAST(DumpState, nc, nc); struct pcap_sf_pkthdr hdr; int64_t ts; int caplen; + size_t size = iov_size(iov, cnt); + struct iovec dumpiov[cnt + 1]; /* Early return in case of previous error. */ if (s->fd < 0) { return size; } - ts = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 1000000, get_ticks_per_sec()); + ts = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL); caplen = size > s->pcap_caplen ? s->pcap_caplen : size; hdr.ts.tv_sec = ts / 1000000 + s->start_ts; hdr.ts.tv_usec = ts % 1000000; hdr.caplen = caplen; hdr.len = size; - if (write(s->fd, &hdr, sizeof(hdr)) != sizeof(hdr) || - write(s->fd, buf, caplen) != caplen) { - qemu_log("-net dump write error - stop dump\n"); + + dumpiov[0].iov_base = &hdr; + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen); + + if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { + error_report("network dump write error - stopping dump"); close(s->fd); s->fd = -1; } @@ -86,27 +94,16 @@ static ssize_t dump_receive(NetClientState *nc, const uint8_t *buf, size_t size) return size; } -static void dump_cleanup(NetClientState *nc) +static void dump_cleanup(DumpState *s) { - DumpState *s = DO_UPCAST(DumpState, nc, nc); - close(s->fd); + s->fd = -1; } -static NetClientInfo net_dump_info = { - .type = NET_CLIENT_OPTIONS_KIND_DUMP, - .size = sizeof(DumpState), - .receive = dump_receive, - .cleanup = dump_cleanup, -}; - -static int net_dump_init(NetClientState *peer, const char *device, - const char *name, const char *filename, int len, - Error **errp) +static int net_dump_state_init(DumpState *s, const char *filename, + int len, Error **errp) { struct pcap_file_hdr hdr; - NetClientState *nc; - DumpState *s; struct tm tm; int fd; @@ -130,13 +127,6 @@ static int net_dump_init(NetClientState *peer, const char *device, return -1; } - nc = qemu_new_net_client(&net_dump_info, peer, device, name); - - snprintf(nc->info_str, sizeof(nc->info_str), - "dump to %s (len=%d)", filename, len); - - s = DO_UPCAST(DumpState, nc, nc); - s->fd = fd; s->pcap_caplen = len; @@ -146,16 +136,61 @@ static int net_dump_init(NetClientState *peer, const char *device, return 0; } +/* Dumping via VLAN netclient */ + +struct DumpNetClient { + NetClientState nc; + DumpState ds; +}; +typedef struct DumpNetClient DumpNetClient; + +static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = size + }; + + return dump_receive_iov(&dc->ds, &iov, 1); +} + +static ssize_t dumpclient_receive_iov(NetClientState *nc, + const struct iovec *iov, int cnt) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + return dump_receive_iov(&dc->ds, iov, cnt); +} + +static void dumpclient_cleanup(NetClientState *nc) +{ + DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc); + + dump_cleanup(&dc->ds); +} + +static NetClientInfo net_dump_info = { + .type = NET_CLIENT_OPTIONS_KIND_DUMP, + .size = sizeof(DumpNetClient), + .receive = dumpclient_receive, + .receive_iov = dumpclient_receive_iov, + .cleanup = dumpclient_cleanup, +}; + int net_init_dump(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { - int len; + int len, rc; const char *file; char def_file[128]; const NetdevDumpOptions *dump; + NetClientState *nc; + DumpNetClient *dnc; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP); - dump = opts->dump; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_DUMP); + dump = opts->u.dump.data; assert(peer); @@ -182,5 +217,148 @@ int net_init_dump(const NetClientOptions *opts, const char *name, len = 65536; } - return net_dump_init(peer, "dump", name, file, len, errp); + nc = qemu_new_net_client(&net_dump_info, peer, "dump", name); + snprintf(nc->info_str, sizeof(nc->info_str), + "dump to %s (len=%d)", file, len); + + dnc = DO_UPCAST(DumpNetClient, nc, nc); + rc = net_dump_state_init(&dnc->ds, file, len, errp); + if (rc) { + qemu_del_net_client(nc); + } + return rc; +} + +/* Dumping via filter */ + +#define TYPE_FILTER_DUMP "filter-dump" + +#define FILTER_DUMP(obj) \ + OBJECT_CHECK(NetFilterDumpState, (obj), TYPE_FILTER_DUMP) + +struct NetFilterDumpState { + NetFilterState nfs; + DumpState ds; + char *filename; + uint32_t maxlen; +}; +typedef struct NetFilterDumpState NetFilterDumpState; + +static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr, + unsigned flags, const struct iovec *iov, + int iovcnt, NetPacketSent *sent_cb) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_receive_iov(&nfds->ds, iov, iovcnt); + return 0; } + +static void filter_dump_cleanup(NetFilterState *nf) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + dump_cleanup(&nfds->ds); +} + +static void filter_dump_setup(NetFilterState *nf, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(nf); + + if (!nfds->filename) { + error_setg(errp, "dump filter needs 'file' property set!"); + return; + } + + net_dump_state_init(&nfds->ds, nfds->filename, nfds->maxlen, errp); +} + +static void filter_dump_get_maxlen(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + uint32_t value = nfds->maxlen; + + visit_type_uint32(v, name, &value, errp); +} + +static void filter_dump_set_maxlen(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (value == 0) { + error_setg(&local_err, "Property '%s.%s' doesn't take value '%u'", + object_get_typename(obj), name, value); + goto out; + } + nfds->maxlen = value; + +out: + error_propagate(errp, local_err); +} + +static char *file_dump_get_filename(Object *obj, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + return g_strdup(nfds->filename); +} + +static void file_dump_set_filename(Object *obj, const char *value, Error **errp) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + g_free(nfds->filename); + nfds->filename = g_strdup(value); +} + +static void filter_dump_instance_init(Object *obj) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + nfds->maxlen = 65536; + + object_property_add(obj, "maxlen", "int", filter_dump_get_maxlen, + filter_dump_set_maxlen, NULL, NULL, NULL); + object_property_add_str(obj, "file", file_dump_get_filename, + file_dump_set_filename, NULL); +} + +static void filter_dump_instance_finalize(Object *obj) +{ + NetFilterDumpState *nfds = FILTER_DUMP(obj); + + g_free(nfds->filename); +} + +static void filter_dump_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_dump_setup; + nfc->cleanup = filter_dump_cleanup; + nfc->receive_iov = filter_dump_receive_iov; +} + +static const TypeInfo filter_dump_info = { + .name = TYPE_FILTER_DUMP, + .parent = TYPE_NETFILTER, + .class_init = filter_dump_class_init, + .instance_init = filter_dump_instance_init, + .instance_finalize = filter_dump_instance_finalize, + .instance_size = sizeof(NetFilterDumpState), +}; + +static void filter_dump_register_types(void) +{ + type_register_static(&filter_dump_info); +} + +type_init(filter_dump_register_types); diff --git a/qemu/net/eth.c b/qemu/net/eth.c index 7c61132cb..7e32d274c 100644 --- a/qemu/net/eth.c +++ b/qemu/net/eth.c @@ -15,6 +15,7 @@ * */ +#include "qemu/osdep.h" #include "net/eth.h" #include "net/checksum.h" #include "qemu-common.h" diff --git a/qemu/net/filter-buffer.c b/qemu/net/filter-buffer.c new file mode 100644 index 000000000..cc6bd9444 --- /dev/null +++ b/qemu/net/filter-buffer.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "net/filter.h" +#include "net/queue.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/iov.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" + +#define TYPE_FILTER_BUFFER "filter-buffer" + +#define FILTER_BUFFER(obj) \ + OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER) + +typedef struct FilterBufferState { + NetFilterState parent_obj; + + NetQueue *incoming_queue; + uint32_t interval; + QEMUTimer release_timer; +} FilterBufferState; + +static void filter_buffer_flush(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!qemu_net_queue_flush(s->incoming_queue)) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(s->incoming_queue, nf->netdev); + } +} + +static void filter_buffer_release_timer(void *opaque) +{ + NetFilterState *nf = opaque; + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * Note: filter_buffer_flush() drops packets that can't be sent + * TODO: We should leave them queued. But currently there's no way + * for the next filter or receiver to notify us that it can receive + * more packets. + */ + filter_buffer_flush(nf); + /* Timer rearmed to fire again in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); +} + +/* filter APIs */ +static ssize_t filter_buffer_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We return size when buffer a packet, the sender will take it as + * a already sent packet, so sent_cb should not be called later. + * + * FIXME: Even if the guest can't receive packets for some reasons, + * the filter can still accept packets until its internal queue is full. + * For example: + * For some reason, receiver could not receive more packets + * (.can_receive() returns zero). Without a filter, at most one packet + * will be queued in incoming queue and sender's poll will be disabled + * unit its sent_cb() was called. With a filter, it will keep receiving + * the packets without caring about the receiver. This is suboptimal. + * May need more thoughts (e.g keeping sent_cb). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, flags, + iov, iovcnt, NULL); + return iov_size(iov, iovcnt); +} + +static void filter_buffer_cleanup(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_del(&s->release_timer); + } + + /* flush packets */ + if (s->incoming_queue) { + filter_buffer_flush(nf); + g_free(s->incoming_queue); + } +} + +static void filter_buffer_setup_timer(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL, + filter_buffer_release_timer, nf); + /* Timer armed to fire in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); + } +} + +static void filter_buffer_setup(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We may want to accept zero interval when VM FT solutions like MC + * or COLO use this filter to release packets on demand. + */ + if (!s->interval) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval", + "a non-zero interval"); + return; + } + + s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + filter_buffer_setup_timer(nf); +} + +static void filter_buffer_status_changed(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!nf->on) { + if (s->interval) { + timer_del(&s->release_timer); + } + filter_buffer_flush(nf); + } else { + filter_buffer_setup_timer(nf); + } +} + +static void filter_buffer_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_buffer_setup; + nfc->cleanup = filter_buffer_cleanup; + nfc->receive_iov = filter_buffer_receive_iov; + nfc->status_changed = filter_buffer_status_changed; +} + +static void filter_buffer_get_interval(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + uint32_t value = s->interval; + + visit_type_uint32(v, name, &value, errp); +} + +static void filter_buffer_set_interval(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->interval = value; + +out: + error_propagate(errp, local_err); +} + +static void filter_buffer_init(Object *obj) +{ + object_property_add(obj, "interval", "int", + filter_buffer_get_interval, + filter_buffer_set_interval, NULL, NULL, NULL); +} + +static const TypeInfo filter_buffer_info = { + .name = TYPE_FILTER_BUFFER, + .parent = TYPE_NETFILTER, + .class_init = filter_buffer_class_init, + .instance_init = filter_buffer_init, + .instance_size = sizeof(FilterBufferState), +}; + +static void register_types(void) +{ + type_register_static(&filter_buffer_info); +} + +type_init(register_types); diff --git a/qemu/net/filter-mirror.c b/qemu/net/filter-mirror.c new file mode 100644 index 000000000..c0c4dc60b --- /dev/null +++ b/qemu/net/filter-mirror.c @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. + * Copyright (c) 2016 FUJITSU LIMITED + * Copyright (c) 2016 Intel Corporation + * + * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "net/filter.h" +#include "net/net.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "sysemu/char.h" +#include "qemu/iov.h" +#include "qemu/sockets.h" + +#define FILTER_MIRROR(obj) \ + OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_MIRROR) + +#define FILTER_REDIRECTOR(obj) \ + OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_REDIRECTOR) + +#define TYPE_FILTER_MIRROR "filter-mirror" +#define TYPE_FILTER_REDIRECTOR "filter-redirector" +#define REDIRECTOR_MAX_LEN NET_BUFSIZE + +typedef struct MirrorState { + NetFilterState parent_obj; + char *indev; + char *outdev; + CharDriverState *chr_in; + CharDriverState *chr_out; + int state; /* 0 = getting length, 1 = getting data */ + unsigned int index; + unsigned int packet_len; + uint8_t buf[REDIRECTOR_MAX_LEN]; +} MirrorState; + +static int filter_mirror_send(CharDriverState *chr_out, + const struct iovec *iov, + int iovcnt) +{ + int ret = 0; + ssize_t size = 0; + uint32_t len = 0; + char *buf; + + size = iov_size(iov, iovcnt); + if (!size) { + return 0; + } + + len = htonl(size); + ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len)); + if (ret != sizeof(len)) { + goto err; + } + + buf = g_malloc(size); + iov_to_buf(iov, iovcnt, 0, buf, size); + ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size); + g_free(buf); + if (ret != size) { + goto err; + } + + return 0; + +err: + return ret < 0 ? ret : -EIO; +} + +static void +redirector_to_filter(NetFilterState *nf, const uint8_t *buf, int len) +{ + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = len, + }; + + if (nf->direction == NET_FILTER_DIRECTION_ALL || + nf->direction == NET_FILTER_DIRECTION_TX) { + qemu_netfilter_pass_to_next(nf->netdev, 0, &iov, 1, nf); + } + + if (nf->direction == NET_FILTER_DIRECTION_ALL || + nf->direction == NET_FILTER_DIRECTION_RX) { + qemu_netfilter_pass_to_next(nf->netdev->peer, 0, &iov, 1, nf); + } +} + +static int redirector_chr_can_read(void *opaque) +{ + return REDIRECTOR_MAX_LEN; +} + +static void redirector_chr_read(void *opaque, const uint8_t *buf, int size) +{ + NetFilterState *nf = opaque; + MirrorState *s = FILTER_REDIRECTOR(nf); + unsigned int l; + + while (size > 0) { + /* reassemble a packet from the network */ + switch (s->state) { /* 0 = getting length, 1 = getting data */ + case 0: + l = 4 - s->index; + if (l > size) { + l = size; + } + memcpy(s->buf + s->index, buf, l); + buf += l; + size -= l; + s->index += l; + if (s->index == 4) { + /* got length */ + s->packet_len = ntohl(*(uint32_t *)s->buf); + s->index = 0; + s->state = 1; + } + break; + case 1: + l = s->packet_len - s->index; + if (l > size) { + l = size; + } + if (s->index + l <= sizeof(s->buf)) { + memcpy(s->buf + s->index, buf, l); + } else { + error_report("serious error: oversized packet received."); + s->index = s->state = 0; + qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); + return; + } + + s->index += l; + buf += l; + size -= l; + if (s->index >= s->packet_len) { + s->index = 0; + s->state = 0; + redirector_to_filter(nf, s->buf, s->packet_len); + } + break; + } + } +} + +static void redirector_chr_event(void *opaque, int event) +{ + NetFilterState *nf = opaque; + MirrorState *s = FILTER_REDIRECTOR(nf); + + switch (event) { + case CHR_EVENT_CLOSED: + qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); + break; + default: + break; + } +} + +static ssize_t filter_mirror_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + MirrorState *s = FILTER_MIRROR(nf); + int ret; + + ret = filter_mirror_send(s->chr_out, iov, iovcnt); + if (ret) { + error_report("filter_mirror_send failed(%s)", strerror(-ret)); + } + + /* + * we don't hope this error interrupt the normal + * path of net packet, so we always return zero. + */ + return 0; +} + +static ssize_t filter_redirector_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + MirrorState *s = FILTER_REDIRECTOR(nf); + int ret; + + if (s->chr_out) { + ret = filter_mirror_send(s->chr_out, iov, iovcnt); + if (ret) { + error_report("filter_mirror_send failed(%s)", strerror(-ret)); + } + return iov_size(iov, iovcnt); + } else { + return 0; + } +} + +static void filter_mirror_cleanup(NetFilterState *nf) +{ + MirrorState *s = FILTER_MIRROR(nf); + + if (s->chr_out) { + qemu_chr_fe_release(s->chr_out); + } +} + +static void filter_redirector_cleanup(NetFilterState *nf) +{ + MirrorState *s = FILTER_REDIRECTOR(nf); + + if (s->chr_in) { + qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL); + qemu_chr_fe_release(s->chr_in); + } + if (s->chr_out) { + qemu_chr_fe_release(s->chr_out); + } +} + +static void filter_mirror_setup(NetFilterState *nf, Error **errp) +{ + MirrorState *s = FILTER_MIRROR(nf); + + if (!s->outdev) { + error_setg(errp, "filter filter mirror needs 'outdev' " + "property set"); + return; + } + + s->chr_out = qemu_chr_find(s->outdev); + if (s->chr_out == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", s->outdev); + return; + } + + if (qemu_chr_fe_claim(s->chr_out) != 0) { + error_setg(errp, QERR_DEVICE_IN_USE, s->outdev); + return; + } +} + +static void filter_redirector_setup(NetFilterState *nf, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(nf); + + if (!s->indev && !s->outdev) { + error_setg(errp, "filter redirector needs 'indev' or " + "'outdev' at least one property set"); + return; + } else if (s->indev && s->outdev) { + if (!strcmp(s->indev, s->outdev)) { + error_setg(errp, "'indev' and 'outdev' could not be same " + "for filter redirector"); + return; + } + } + + s->state = s->index = 0; + + if (s->indev) { + s->chr_in = qemu_chr_find(s->indev); + if (s->chr_in == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "IN Device '%s' not found", s->indev); + return; + } + + qemu_chr_fe_claim_no_fail(s->chr_in); + qemu_chr_add_handlers(s->chr_in, redirector_chr_can_read, + redirector_chr_read, redirector_chr_event, nf); + } + + if (s->outdev) { + s->chr_out = qemu_chr_find(s->outdev); + if (s->chr_out == NULL) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "OUT Device '%s' not found", s->outdev); + return; + } + qemu_chr_fe_claim_no_fail(s->chr_out); + } +} + +static void filter_mirror_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_mirror_setup; + nfc->cleanup = filter_mirror_cleanup; + nfc->receive_iov = filter_mirror_receive_iov; +} + +static void filter_redirector_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_redirector_setup; + nfc->cleanup = filter_redirector_cleanup; + nfc->receive_iov = filter_redirector_receive_iov; +} + +static char *filter_redirector_get_indev(Object *obj, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + return g_strdup(s->indev); +} + +static void +filter_redirector_set_indev(Object *obj, const char *value, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + g_free(s->indev); + s->indev = g_strdup(value); +} + +static char *filter_mirror_get_outdev(Object *obj, Error **errp) +{ + MirrorState *s = FILTER_MIRROR(obj); + + return g_strdup(s->outdev); +} + +static void +filter_mirror_set_outdev(Object *obj, const char *value, Error **errp) +{ + MirrorState *s = FILTER_MIRROR(obj); + + g_free(s->outdev); + s->outdev = g_strdup(value); + if (!s->outdev) { + error_setg(errp, "filter filter mirror needs 'outdev' " + "property set"); + return; + } +} + +static char *filter_redirector_get_outdev(Object *obj, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + return g_strdup(s->outdev); +} + +static void +filter_redirector_set_outdev(Object *obj, const char *value, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + g_free(s->outdev); + s->outdev = g_strdup(value); +} + +static void filter_mirror_init(Object *obj) +{ + object_property_add_str(obj, "outdev", filter_mirror_get_outdev, + filter_mirror_set_outdev, NULL); +} + +static void filter_redirector_init(Object *obj) +{ + object_property_add_str(obj, "indev", filter_redirector_get_indev, + filter_redirector_set_indev, NULL); + object_property_add_str(obj, "outdev", filter_redirector_get_outdev, + filter_redirector_set_outdev, NULL); +} + +static void filter_mirror_fini(Object *obj) +{ + MirrorState *s = FILTER_MIRROR(obj); + + g_free(s->outdev); +} + +static void filter_redirector_fini(Object *obj) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + g_free(s->indev); + g_free(s->outdev); +} + +static const TypeInfo filter_redirector_info = { + .name = TYPE_FILTER_REDIRECTOR, + .parent = TYPE_NETFILTER, + .class_init = filter_redirector_class_init, + .instance_init = filter_redirector_init, + .instance_finalize = filter_redirector_fini, + .instance_size = sizeof(MirrorState), +}; + +static const TypeInfo filter_mirror_info = { + .name = TYPE_FILTER_MIRROR, + .parent = TYPE_NETFILTER, + .class_init = filter_mirror_class_init, + .instance_init = filter_mirror_init, + .instance_finalize = filter_mirror_fini, + .instance_size = sizeof(MirrorState), +}; + +static void register_types(void) +{ + type_register_static(&filter_mirror_info); + type_register_static(&filter_redirector_info); +} + +type_init(register_types); diff --git a/qemu/net/filter.c b/qemu/net/filter.c new file mode 100644 index 000000000..8ac79f3b7 --- /dev/null +++ b/qemu/net/filter.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" + +#include "net/filter.h" +#include "net/net.h" +#include "net/vhost_net.h" +#include "qom/object_interfaces.h" +#include "qemu/iov.h" + +static inline bool qemu_can_skip_netfilter(NetFilterState *nf) +{ + return !nf->on; +} + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + if (qemu_can_skip_netfilter(nf)) { + return 0; + } + if (nf->direction == direction || + nf->direction == NET_FILTER_DIRECTION_ALL) { + return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov( + nf, sender, flags, iov, iovcnt, sent_cb); + } + + return 0; +} + +static NetFilterState *netfilter_next(NetFilterState *nf, + NetFilterDirection dir) +{ + NetFilterState *next; + + if (dir == NET_FILTER_DIRECTION_TX) { + /* forward walk through filters */ + next = QTAILQ_NEXT(nf, next); + } else { + /* reverse order */ + next = QTAILQ_PREV(nf, NetFilterHead, next); + } + + return next; +} + +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + int ret = 0; + int direction; + NetFilterState *nf = opaque; + NetFilterState *next = NULL; + + if (!sender || !sender->peer) { + /* no receiver, or sender been deleted, no need to pass it further */ + goto out; + } + + if (nf->direction == NET_FILTER_DIRECTION_ALL) { + if (sender == nf->netdev) { + /* This packet is sent by netdev itself */ + direction = NET_FILTER_DIRECTION_TX; + } else { + direction = NET_FILTER_DIRECTION_RX; + } + } else { + direction = nf->direction; + } + + next = netfilter_next(nf, direction); + while (next) { + /* + * if qemu_netfilter_pass_to_next been called, means that + * the packet has been hold by filter and has already retured size + * to the sender, so sent_cb shouldn't be called later, just + * pass NULL to next. + */ + ret = qemu_netfilter_receive(next, direction, sender, flags, iov, + iovcnt, NULL); + if (ret) { + return ret; + } + next = netfilter_next(next, direction); + } + + /* + * We have gone through all filters, pass it to receiver. + * Do the valid check again incase sender or receiver been + * deleted while we go through filters. + */ + if (sender && sender->peer) { + qemu_net_queue_send_iov(sender->peer->incoming_queue, + sender, flags, iov, iovcnt, NULL); + } + +out: + /* no receiver, or sender been deleted */ + return iov_size(iov, iovcnt); +} + +static char *netfilter_get_netdev_id(Object *obj, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + return g_strdup(nf->netdev_id); +} + +static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + nf->netdev_id = g_strdup(str); +} + +static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED) +{ + NetFilterState *nf = NETFILTER(obj); + return nf->direction; +} + +static void netfilter_set_direction(Object *obj, int direction, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + nf->direction = direction; +} + +static char *netfilter_get_status(Object *obj, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + return nf->on ? g_strdup("on") : g_strdup("off"); +} + +static void netfilter_set_status(Object *obj, const char *str, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + NetFilterClass *nfc = NETFILTER_GET_CLASS(obj); + + if (strcmp(str, "on") && strcmp(str, "off")) { + error_setg(errp, "Invalid value for netfilter status, " + "should be 'on' or 'off'"); + return; + } + if (nf->on == !strcmp(str, "on")) { + return; + } + nf->on = !nf->on; + if (nf->netdev && nfc->status_changed) { + nfc->status_changed(nf, errp); + } +} + +static void netfilter_init(Object *obj) +{ + NetFilterState *nf = NETFILTER(obj); + + nf->on = true; + + object_property_add_str(obj, "netdev", + netfilter_get_netdev_id, netfilter_set_netdev_id, + NULL); + object_property_add_enum(obj, "queue", "NetFilterDirection", + NetFilterDirection_lookup, + netfilter_get_direction, netfilter_set_direction, + NULL); + object_property_add_str(obj, "status", + netfilter_get_status, netfilter_set_status, + NULL); +} + +static void netfilter_complete(UserCreatable *uc, Error **errp) +{ + NetFilterState *nf = NETFILTER(uc); + NetClientState *ncs[MAX_QUEUE_NUM]; + NetFilterClass *nfc = NETFILTER_GET_CLASS(uc); + int queues; + Error *local_err = NULL; + + if (!nf->netdev_id) { + error_setg(errp, "Parameter 'netdev' is required"); + return; + } + + queues = qemu_find_net_clients_except(nf->netdev_id, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + if (queues < 1) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev", + "a network backend id"); + return; + } else if (queues > 1) { + error_setg(errp, "multiqueue is not supported"); + return; + } + + if (get_vhost_net(ncs[0])) { + error_setg(errp, "Vhost is not supported"); + return; + } + + nf->netdev = ncs[0]; + + if (nfc->setup) { + nfc->setup(nf, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next); +} + +static void netfilter_finalize(Object *obj) +{ + NetFilterState *nf = NETFILTER(obj); + NetFilterClass *nfc = NETFILTER_GET_CLASS(obj); + + if (nfc->cleanup) { + nfc->cleanup(nf); + } + + if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters) && + nf->next.tqe_prev) { + QTAILQ_REMOVE(&nf->netdev->filters, nf, next); + } + g_free(nf->netdev_id); +} + +static void netfilter_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = netfilter_complete; +} + +static const TypeInfo netfilter_info = { + .name = TYPE_NETFILTER, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(NetFilterClass), + .class_init = netfilter_class_init, + .instance_size = sizeof(NetFilterState), + .instance_init = netfilter_init, + .instance_finalize = netfilter_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&netfilter_info); +} + +type_init(register_types); diff --git a/qemu/net/hub.c b/qemu/net/hub.c index 3047f1276..6d90c6ee6 100644 --- a/qemu/net/hub.c +++ b/qemu/net/hub.c @@ -12,6 +12,7 @@ * */ +#include "qemu/osdep.h" #include "monitor/monitor.h" #include "net/net.h" #include "clients.h" @@ -285,9 +286,9 @@ int net_init_hubport(const NetClientOptions *opts, const char *name, { const NetdevHubPortOptions *hubport; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT); + assert(opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT); assert(!peer); - hubport = opts->hubport; + hubport = opts->u.hubport.data; net_hub_add_port(hubport->hubid, name); return 0; diff --git a/qemu/net/l2tpv3.c b/qemu/net/l2tpv3.c index 4f9bceecc..5c668f737 100644 --- a/qemu/net/l2tpv3.c +++ b/qemu/net/l2tpv3.c @@ -23,9 +23,9 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include <linux/ip.h> #include <netdb.h> -#include "config-host.h" #include "net/net.h" #include "clients.h" #include "qemu-common.h" @@ -325,7 +325,7 @@ static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) if (s->cookie_is_64) { cookie = ldq_be_p(buf + s->cookie_offset); } else { - cookie = ldl_be_p(buf + s->cookie_offset); + cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL; } if (cookie != s->rx_cookie) { if (!s->header_mismatch) { @@ -545,8 +545,8 @@ int net_init_l2tpv3(const NetClientOptions *opts, s->queue_tail = 0; s->header_mismatch = false; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); - l2tpv3 = opts->l2tpv3; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->u.l2tpv3.data; if (l2tpv3->has_ipv6 && l2tpv3->ipv6) { s->ipv6 = l2tpv3->ipv6; diff --git a/qemu/net/net.c b/qemu/net/net.c index 28a5597b8..0ad6217cb 100644 --- a/qemu/net/net.c +++ b/qemu/net/net.c @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "config-host.h" +#include "qemu/osdep.h" #include "net/net.h" #include "clients.h" @@ -32,9 +32,11 @@ #include "monitor/monitor.h" #include "qemu-common.h" +#include "qemu/help_option.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/sockets.h" +#include "qemu/cutils.h" #include "qemu/config-file.h" #include "qmp-commands.h" #include "hw/qdev.h" @@ -42,8 +44,9 @@ #include "qemu/main-loop.h" #include "qapi-visit.h" #include "qapi/opts-visitor.h" -#include "qapi/dealloc-visitor.h" #include "sysemu/sysemu.h" +#include "net/filter.h" +#include "qapi/string-output-visitor.h" /* Net bridge is currently not supported for W32. */ #if !defined(_WIN32) @@ -78,34 +81,6 @@ int default_net = 1; /***********************************************************/ /* network device redirectors */ -#if defined(DEBUG_NET) -static void hex_dump(FILE *f, const uint8_t *buf, int size) -{ - int len, i, j, c; - - for(i=0;i<size;i+=16) { - len = size - i; - if (len > 16) - len = 16; - fprintf(f, "%08x ", i); - for(j=0;j<16;j++) { - if (j < len) - fprintf(f, " %02x", buf[i+j]); - else - fprintf(f, " "); - } - fprintf(f, " "); - for(j=0;j<len;j++) { - c = buf[i+j]; - if (c < ' ' || c > '~') - c = '.'; - fprintf(f, "%c", c); - } - fprintf(f, "\n"); - } -} -#endif - static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) { const char *p, *p1; @@ -285,8 +260,9 @@ static void qemu_net_client_setup(NetClientState *nc, } QTAILQ_INSERT_TAIL(&net_clients, nc, next); - nc->incoming_queue = qemu_new_net_queue(nc); + nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc); nc->destructor = destructor; + QTAILQ_INIT(&nc->filters); } NetClientState *qemu_new_net_client(NetClientInfo *info, @@ -384,6 +360,7 @@ void qemu_del_net_client(NetClientState *nc) { NetClientState *ncs[MAX_QUEUE_NUM]; int queues, i; + NetFilterState *nf, *next; assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC); @@ -395,6 +372,10 @@ void qemu_del_net_client(NetClientState *nc) MAX_QUEUE_NUM); assert(queues != 0); + QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) { + object_unparent(OBJECT(nf)); + } + /* If there is a peer NIC, delete and cleanup client, but do not free. */ if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { NICState *nic = qemu_get_nic(nc->peer); @@ -517,20 +498,28 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) int qemu_set_vnet_le(NetClientState *nc, bool is_le) { +#ifdef HOST_WORDS_BIGENDIAN if (!nc || !nc->info->set_vnet_le) { return -ENOSYS; } return nc->info->set_vnet_le(nc, is_le); +#else + return 0; +#endif } int qemu_set_vnet_be(NetClientState *nc, bool is_be) { +#ifdef HOST_WORDS_BIGENDIAN + return 0; +#else if (!nc || !nc->info->set_vnet_be) { return -ENOSYS; } return nc->info->set_vnet_be(nc, is_be); +#endif } int qemu_can_send_packet(NetClientState *sender) @@ -554,36 +543,54 @@ int qemu_can_send_packet(NetClientState *sender) return 1; } -ssize_t qemu_deliver_packet(NetClientState *sender, - unsigned flags, - const uint8_t *data, - size_t size, - void *opaque) +static ssize_t filter_receive_iov(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { - NetClientState *nc = opaque; - ssize_t ret; - - if (nc->link_down) { - return size; - } + ssize_t ret = 0; + NetFilterState *nf = NULL; - if (nc->receive_disabled) { - return 0; - } - - if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { - ret = nc->info->receive_raw(nc, data, size); + if (direction == NET_FILTER_DIRECTION_TX) { + QTAILQ_FOREACH(nf, &nc->filters, next) { + ret = qemu_netfilter_receive(nf, direction, sender, flags, iov, + iovcnt, sent_cb); + if (ret) { + return ret; + } + } } else { - ret = nc->info->receive(nc, data, size); - } - - if (ret == 0) { - nc->receive_disabled = 1; + QTAILQ_FOREACH_REVERSE(nf, &nc->filters, NetFilterHead, next) { + ret = qemu_netfilter_receive(nf, direction, sender, flags, iov, + iovcnt, sent_cb); + if (ret) { + return ret; + } + } } return ret; } +static ssize_t filter_receive(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb) +{ + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; + + return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb); +} + void qemu_purge_queued_packets(NetClientState *nc) { if (!nc->peer) { @@ -625,16 +632,30 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; #ifdef DEBUG_NET printf("qemu_send_packet_async:\n"); - hex_dump(stdout, buf, size); + qemu_hexdump((const char *)buf, stdout, "net", size); #endif if (sender->link_down || !sender->peer) { return size; } + /* Let filters handle the packet first */ + ret = filter_receive(sender, NET_FILTER_DIRECTION_TX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb); @@ -660,14 +681,30 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) } static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, - int iovcnt) + int iovcnt, unsigned flags) { - uint8_t buffer[NET_BUFSIZE]; + uint8_t *buf = NULL; + uint8_t *buffer; size_t offset; + ssize_t ret; - offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); + if (iovcnt == 1) { + buffer = iov[0].iov_base; + offset = iov[0].iov_len; + } else { + buf = g_new(uint8_t, NET_BUFSIZE); + buffer = buf; + offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE); + } - return nc->info->receive(nc, buffer, offset); + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + ret = nc->info->receive_raw(nc, buffer, offset); + } else { + ret = nc->info->receive(nc, buffer, offset); + } + + g_free(buf); + return ret; } ssize_t qemu_deliver_packet_iov(NetClientState *sender, @@ -690,7 +727,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, if (nc->info->receive_iov) { ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - ret = nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, iov, iovcnt, flags); } if (ret == 0) { @@ -705,11 +742,25 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; if (sender->link_down || !sender->peer) { return iov_size(iov, iovcnt); } + /* Let filters handle the packet first */ + ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send_iov(queue, sender, @@ -820,8 +871,8 @@ static int net_init_nic(const NetClientOptions *opts, const char *name, NICInfo *nd; const NetLegacyNicOptions *nic; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_NIC); - nic = opts->nic; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_NIC); + nic = opts->u.nic.data; idx = nic_get_free_idx(); if (idx == -1 || nb_nics >= MAX_NICS) { @@ -881,7 +932,7 @@ static int net_init_nic(const NetClientOptions *opts, const char *name, } -static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( +static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND__MAX])( const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) = { @@ -922,9 +973,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) opts = netdev->opts; name = netdev->id; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_DUMP || - opts->kind == NET_CLIENT_OPTIONS_KIND_NIC || - !net_client_init_fun[opts->kind]) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_DUMP || + opts->type == NET_CLIENT_OPTIONS_KIND_NIC || + !net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a netdev backend type"); return -1; @@ -935,16 +986,16 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) /* missing optional values have been initialized to "all bits zero" */ name = net->has_id ? net->id : net->name; - if (opts->kind == NET_CLIENT_OPTIONS_KIND_NONE) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_NONE) { return 0; /* nothing to do */ } - if (opts->kind == NET_CLIENT_OPTIONS_KIND_HUBPORT) { + if (opts->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net type"); return -1; } - if (!net_client_init_fun[opts->kind]) { + if (!net_client_init_fun[opts->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -952,17 +1003,17 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } /* Do not add to a vlan if it's a nic with a netdev= parameter. */ - if (opts->kind != NET_CLIENT_OPTIONS_KIND_NIC || - !opts->nic->has_netdev) { + if (opts->type != NET_CLIENT_OPTIONS_KIND_NIC || + !opts->u.nic.data->has_netdev) { peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); } } - if (net_client_init_fun[opts->kind](opts, name, peer, errp) < 0) { + if (net_client_init_fun[opts->type](opts, name, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, - NetClientOptionsKind_lookup[opts->kind]); + NetClientOptionsKind_lookup[opts->type]); } return -1; } @@ -970,41 +1021,63 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp) } -static void net_visit(Visitor *v, int is_netdev, void **object, Error **errp) -{ - if (is_netdev) { - visit_type_Netdev(v, (Netdev **)object, NULL, errp); - } else { - visit_type_NetLegacy(v, (NetLegacy **)object, NULL, errp); - } -} - - int net_client_init(QemuOpts *opts, int is_netdev, Error **errp) { void *object = NULL; Error *err = NULL; int ret = -1; + OptsVisitor *ov = opts_visitor_new(opts); + Visitor *v = opts_get_visitor(ov); { - OptsVisitor *ov = opts_visitor_new(opts); + /* Parse convenience option format ip6-net=fec0::0[/64] */ + const char *ip6_net = qemu_opt_get(opts, "ipv6-net"); + + if (ip6_net) { + char buf[strlen(ip6_net) + 1]; + + if (get_str_sep(buf, sizeof(buf), &ip6_net, '/') < 0) { + /* Default 64bit prefix length. */ + qemu_opt_set(opts, "ipv6-prefix", ip6_net, &error_abort); + qemu_opt_set_number(opts, "ipv6-prefixlen", 64, &error_abort); + } else { + /* User-specified prefix length. */ + unsigned long len; + int err; + + qemu_opt_set(opts, "ipv6-prefix", buf, &error_abort); + err = qemu_strtoul(ip6_net, NULL, 10, &len); + + if (err) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "ipv6-prefix", "a number"); + } else { + qemu_opt_set_number(opts, "ipv6-prefixlen", len, + &error_abort); + } + } + qemu_opt_unset(opts, "ipv6-net"); + } + } - net_visit(opts_get_visitor(ov), is_netdev, &object, &err); - opts_visitor_cleanup(ov); + if (is_netdev) { + visit_type_Netdev(v, NULL, (Netdev **)&object, &err); + } else { + visit_type_NetLegacy(v, NULL, (NetLegacy **)&object, &err); } if (!err) { ret = net_client_init1(object, is_netdev, &err); } - if (object) { - QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); - - net_visit(qapi_dealloc_get_visitor(dv), is_netdev, &object, NULL); - qapi_dealloc_visitor_cleanup(dv); + if (is_netdev) { + qapi_free_Netdev(object); + } else { + qapi_free_NetLegacy(object); } error_propagate(errp, err); + opts_visitor_cleanup(ov); return ret; } @@ -1123,12 +1196,49 @@ void qmp_netdev_del(const char *id, Error **errp) qemu_opts_del(opts); } +static void netfilter_print_info(Monitor *mon, NetFilterState *nf) +{ + char *str; + ObjectProperty *prop; + ObjectPropertyIterator iter; + StringOutputVisitor *ov; + + /* generate info str */ + object_property_iter_init(&iter, OBJECT(nf)); + while ((prop = object_property_iter_next(&iter))) { + if (!strcmp(prop->name, "type")) { + continue; + } + ov = string_output_visitor_new(false); + object_property_get(OBJECT(nf), string_output_get_visitor(ov), + prop->name, NULL); + str = string_output_get_string(ov); + string_output_visitor_cleanup(ov); + monitor_printf(mon, ",%s=%s", prop->name, str); + g_free(str); + } + monitor_printf(mon, "\n"); +} + void print_net_client(Monitor *mon, NetClientState *nc) { + NetFilterState *nf; + monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name, nc->queue_index, NetClientOptionsKind_lookup[nc->info->type], nc->info_str); + if (!QTAILQ_EMPTY(&nc->filters)) { + monitor_printf(mon, "filters:\n"); + } + QTAILQ_FOREACH(nf, &nc->filters, next) { + char *path = object_get_canonical_path_component(OBJECT(nf)); + + monitor_printf(mon, " - %s: type=%s", path, + object_get_typename(OBJECT(nf))); + netfilter_print_info(mon, nf); + g_free(path); + } } RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, @@ -1154,6 +1264,12 @@ RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, continue; } + /* only query information on queue 0 since the info is per nic, + * not per queue + */ + if (nc->queue_index != 0) + continue; + if (nc->info->query_rx_filter) { info = nc->info->query_rx_filter(nc); entry = g_malloc0(sizeof(*entry)); @@ -1216,7 +1332,7 @@ void qmp_set_link(const char *name, bool up, Error **errp) int queues, i; queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_OPTIONS_KIND_MAX, + NET_CLIENT_OPTIONS_KIND__MAX, MAX_QUEUE_NUM); if (queues == 0) { diff --git a/qemu/net/netmap.c b/qemu/net/netmap.c index 508b82947..6cc0db5ee 100644 --- a/qemu/net/netmap.c +++ b/qemu/net/netmap.c @@ -23,11 +23,10 @@ */ +#include "qemu/osdep.h" #include <sys/ioctl.h> #include <net/if.h> #include <sys/mman.h> -#include <stdint.h> -#include <stdio.h> #define NETMAP_WITH_LIBS #include <net/netmap.h> #include <net/netmap_user.h> @@ -37,23 +36,16 @@ #include "clients.h" #include "sysemu/sysemu.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "qemu/iov.h" - -/* Private netmap device info. */ -typedef struct NetmapPriv { - int fd; - size_t memsize; - void *mem; - struct netmap_if *nifp; - struct netmap_ring *rx; - struct netmap_ring *tx; - char fdname[PATH_MAX]; /* Normally "/dev/netmap". */ - char ifname[IFNAMSIZ]; -} NetmapPriv; +#include "qemu/cutils.h" typedef struct NetmapState { NetClientState nc; - NetmapPriv me; + struct nm_desc *nmd; + char ifname[IFNAMSIZ]; + struct netmap_ring *tx; + struct netmap_ring *rx; bool read_poll; bool write_poll; struct iovec iov[IOV_MAX]; @@ -90,46 +82,23 @@ pkt_copy(const void *_src, void *_dst, int l) * Open a netmap device. We assume there is only one queue * (which is the case for the VALE bridge). */ -static int netmap_open(NetmapPriv *me) +static struct nm_desc *netmap_open(const NetdevNetmapOptions *nm_opts, + Error **errp) { - int fd; - int err; - size_t l; + struct nm_desc *nmd; struct nmreq req; - me->fd = fd = open(me->fdname, O_RDWR); - if (fd < 0) { - error_report("Unable to open netmap device '%s' (%s)", - me->fdname, strerror(errno)); - return -1; - } memset(&req, 0, sizeof(req)); - pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); - req.nr_ringid = NETMAP_NO_TX_POLL; - req.nr_version = NETMAP_API; - err = ioctl(fd, NIOCREGIF, &req); - if (err) { - error_report("Unable to register %s: %s", me->ifname, strerror(errno)); - goto error; - } - l = me->memsize = req.nr_memsize; - - me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (me->mem == MAP_FAILED) { - error_report("Unable to mmap netmap shared memory: %s", - strerror(errno)); - me->mem = NULL; - goto error; - } - me->nifp = NETMAP_IF(me->mem, req.nr_offset); - me->tx = NETMAP_TXRING(me->nifp, 0); - me->rx = NETMAP_RXRING(me->nifp, 0); - return 0; + nmd = nm_open(nm_opts->ifname, &req, NETMAP_NO_TX_POLL, + NULL); + if (nmd == NULL) { + error_setg_errno(errp, errno, "Failed to nm_open() %s", + nm_opts->ifname); + return NULL; + } -error: - close(me->fd); - return -1; + return nmd; } static void netmap_send(void *opaque); @@ -138,7 +107,7 @@ static void netmap_writable(void *opaque); /* Set the event-loop handlers for the netmap backend. */ static void netmap_update_fd_handler(NetmapState *s) { - qemu_set_fd_handler(s->me.fd, + qemu_set_fd_handler(s->nmd->fd, s->read_poll ? netmap_send : NULL, s->write_poll ? netmap_writable : NULL, s); @@ -190,7 +159,7 @@ static ssize_t netmap_receive(NetClientState *nc, const uint8_t *buf, size_t size) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); - struct netmap_ring *ring = s->me.tx; + struct netmap_ring *ring = s->tx; uint32_t i; uint32_t idx; uint8_t *dst; @@ -220,7 +189,7 @@ static ssize_t netmap_receive(NetClientState *nc, ring->slot[i].flags = 0; pkt_copy(buf, dst, size); ring->cur = ring->head = nm_ring_next(ring, i); - ioctl(s->me.fd, NIOCTXSYNC, NULL); + ioctl(s->nmd->fd, NIOCTXSYNC, NULL); return size; } @@ -229,7 +198,7 @@ static ssize_t netmap_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); - struct netmap_ring *ring = s->me.tx; + struct netmap_ring *ring = s->tx; uint32_t last; uint32_t idx; uint8_t *dst; @@ -286,7 +255,7 @@ static ssize_t netmap_receive_iov(NetClientState *nc, /* Now update ring->cur and ring->head. */ ring->cur = ring->head = i; - ioctl(s->me.fd, NIOCTXSYNC, NULL); + ioctl(s->nmd->fd, NIOCTXSYNC, NULL); return iov_size(iov, iovcnt); } @@ -303,7 +272,7 @@ static void netmap_send_completed(NetClientState *nc, ssize_t len) static void netmap_send(void *opaque) { NetmapState *s = opaque; - struct netmap_ring *ring = s->me.rx; + struct netmap_ring *ring = s->rx; /* Keep sending while there are available packets into the netmap RX ring and the forwarding path towards the peer is open. */ @@ -351,27 +320,52 @@ static void netmap_cleanup(NetClientState *nc) qemu_purge_queued_packets(nc); netmap_poll(nc, false); - munmap(s->me.mem, s->me.memsize); - close(s->me.fd); - - s->me.fd = -1; + nm_close(s->nmd); + s->nmd = NULL; } /* Offloading manipulation support callbacks. */ -static bool netmap_has_ufo(NetClientState *nc) +static int netmap_fd_set_vnet_hdr_len(NetmapState *s, int len) { - return true; + struct nmreq req; + + /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header + * length for the netmap adapter associated to 's->ifname'. + */ + memset(&req, 0, sizeof(req)); + pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname); + req.nr_version = NETMAP_API; + req.nr_cmd = NETMAP_BDG_VNET_HDR; + req.nr_arg1 = len; + + return ioctl(s->nmd->fd, NIOCREGIF, &req); } -static bool netmap_has_vnet_hdr(NetClientState *nc) +static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) { + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + int prev_len = s->vnet_hdr_len; + + /* Check that we can set the new length. */ + if (netmap_fd_set_vnet_hdr_len(s, len)) { + return false; + } + + /* Restore the previous length. */ + if (netmap_fd_set_vnet_hdr_len(s, prev_len)) { + error_report("Failed to restore vnet-hdr length %d on %s: %s", + prev_len, s->ifname, strerror(errno)); + abort(); + } + return true; } -static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) +/* A netmap interface that supports virtio-net headers always + * supports UFO, so we use this callback also for the has_ufo hook. */ +static bool netmap_has_vnet_hdr(NetClientState *nc) { - return len == 0 || len == sizeof(struct virtio_net_hdr) || - len == sizeof(struct virtio_net_hdr_mrg_rxbuf); + return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); } static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) @@ -382,20 +376,11 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); int err; - struct nmreq req; - /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header - * length for the netmap adapter associated to 'me->ifname'. - */ - memset(&req, 0, sizeof(req)); - pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname); - req.nr_version = NETMAP_API; - req.nr_cmd = NETMAP_BDG_VNET_HDR; - req.nr_arg1 = len; - err = ioctl(s->me.fd, NIOCREGIF, &req); + err = netmap_fd_set_vnet_hdr_len(s, len); if (err) { - error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s", - s->me.ifname, strerror(errno)); + error_report("Unable to set vnet-hdr length %d on %s: %s", + len, s->ifname, strerror(errno)); } else { /* Keep track of the current length. */ s->vnet_hdr_len = len; @@ -408,8 +393,7 @@ static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, NetmapState *s = DO_UPCAST(NetmapState, nc, nc); /* Setting a virtio-net header length greater than zero automatically - * enables the offloadings. - */ + * enables the offloadings. */ if (!s->vnet_hdr_len) { netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); } @@ -423,7 +407,7 @@ static NetClientInfo net_netmap_info = { .receive_iov = netmap_receive_iov, .poll = netmap_poll, .cleanup = netmap_cleanup, - .has_ufo = netmap_has_ufo, + .has_ufo = netmap_has_vnet_hdr, .has_vnet_hdr = netmap_has_vnet_hdr, .has_vnet_hdr_len = netmap_has_vnet_hdr_len, .using_vnet_hdr = netmap_using_vnet_hdr, @@ -438,24 +422,25 @@ static NetClientInfo net_netmap_info = { int net_init_netmap(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { - /* FIXME error_setg(errp, ...) on failure */ - const NetdevNetmapOptions *netmap_opts = opts->netmap; + const NetdevNetmapOptions *netmap_opts = opts->u.netmap.data; + struct nm_desc *nmd; NetClientState *nc; - NetmapPriv me; + Error *err = NULL; NetmapState *s; - pstrcpy(me.fdname, sizeof(me.fdname), - netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap"); - /* Set default name for the port if not supplied. */ - pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname); - if (netmap_open(&me)) { + nmd = netmap_open(netmap_opts, &err); + if (err) { + error_propagate(errp, err); return -1; } /* Create the object. */ nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); s = DO_UPCAST(NetmapState, nc, nc); - s->me = me; + s->nmd = nmd; + s->tx = NETMAP_TXRING(nmd->nifp, 0); + s->rx = NETMAP_RXRING(nmd->nifp, 0); s->vnet_hdr_len = 0; + pstrcpy(s->ifname, sizeof(s->ifname), netmap_opts->ifname); netmap_read_poll(s, true); /* Initially only poll for reads. */ return 0; diff --git a/qemu/net/queue.c b/qemu/net/queue.c index ebbe2bb93..9c32abdb8 100644 --- a/qemu/net/queue.c +++ b/qemu/net/queue.c @@ -21,6 +21,7 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "net/queue.h" #include "qemu/queue.h" #include "net/net.h" @@ -52,13 +53,14 @@ struct NetQueue { void *opaque; uint32_t nq_maxlen; uint32_t nq_count; + NetQueueDeliverFunc *deliver; QTAILQ_HEAD(packets, NetPacket) packets; unsigned delivering : 1; }; -NetQueue *qemu_new_net_queue(void *opaque) +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque) { NetQueue *queue; @@ -67,6 +69,7 @@ NetQueue *qemu_new_net_queue(void *opaque) queue->opaque = opaque; queue->nq_maxlen = 10000; queue->nq_count = 0; + queue->deliver = deliver; QTAILQ_INIT(&queue->packets); @@ -110,12 +113,12 @@ static void qemu_net_queue_append(NetQueue *queue, QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); } -static void qemu_net_queue_append_iov(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const struct iovec *iov, - int iovcnt, - NetPacketSent *sent_cb) +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { NetPacket *packet; size_t max_len = 0; @@ -152,9 +155,13 @@ static ssize_t qemu_net_queue_deliver(NetQueue *queue, size_t size) { ssize_t ret = -1; + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; queue->delivering = 1; - ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque); + ret = queue->deliver(sender, flags, &iov, 1, queue->opaque); queue->delivering = 0; return ret; @@ -169,7 +176,7 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue, ssize_t ret = -1; queue->delivering = 1; - ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque); + ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque); queue->delivering = 0; return ret; diff --git a/qemu/net/slirp.c b/qemu/net/slirp.c index 7657b38fd..31630f005 100644 --- a/qemu/net/slirp.c +++ b/qemu/net/slirp.c @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "net/slirp.h" -#include "config-host.h" #ifndef _WIN32 #include <pwd.h> @@ -36,7 +36,9 @@ #include "qemu/error-report.h" #include "qemu/sockets.h" #include "slirp/libslirp.h" +#include "slirp/ip6.h" #include "sysemu/char.h" +#include "qemu/cutils.h" static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) { @@ -134,11 +136,14 @@ static NetClientInfo net_slirp_info = { static int net_slirp_init(NetClientState *peer, const char *model, const char *name, int restricted, - const char *vnetwork, const char *vhost, + bool ipv4, const char *vnetwork, const char *vhost, + bool ipv6, const char *vprefix6, int vprefix6_len, + const char *vhost6, const char *vhostname, const char *tftp_export, const char *bootfile, const char *vdhcp_start, - const char *vnameserver, const char *smb_export, - const char *vsmbserver, const char **dnssearch) + const char *vnameserver, const char *vnameserver6, + const char *smb_export, const char *vsmbserver, + const char **dnssearch) { /* default settings according to historic slirp */ struct in_addr net = { .s_addr = htonl(0x0a000200) }; /* 10.0.2.0 */ @@ -146,6 +151,9 @@ static int net_slirp_init(NetClientState *peer, const char *model, struct in_addr host = { .s_addr = htonl(0x0a000202) }; /* 10.0.2.2 */ struct in_addr dhcp = { .s_addr = htonl(0x0a00020f) }; /* 10.0.2.15 */ struct in_addr dns = { .s_addr = htonl(0x0a000203) }; /* 10.0.2.3 */ + struct in6_addr ip6_prefix; + struct in6_addr ip6_host; + struct in6_addr ip6_dns; #ifndef _WIN32 struct in_addr smbsrv = { .s_addr = 0 }; #endif @@ -157,6 +165,19 @@ static int net_slirp_init(NetClientState *peer, const char *model, char *end; struct slirp_config_str *config; + if (!ipv4 && (vnetwork || vhost || vnameserver)) { + return -1; + } + + if (!ipv6 && (vprefix6 || vhost6 || vnameserver6)) { + return -1; + } + + if (!ipv4 && !ipv6) { + /* It doesn't make sense to disable both */ + return -1; + } + if (!tftp_export) { tftp_export = legacy_tftp_prefix; } @@ -235,6 +256,64 @@ static int net_slirp_init(NetClientState *peer, const char *model, } #endif +#if defined(_WIN32) && (_WIN32_WINNT < 0x0600) + /* No inet_pton helper before Vista... */ + if (vprefix6) { + /* Unsupported */ + return -1; + } + memset(&ip6_prefix, 0, sizeof(ip6_prefix)); + ip6_prefix.s6_addr[0] = 0xfe; + ip6_prefix.s6_addr[1] = 0xc0; +#else + if (!vprefix6) { + vprefix6 = "fec0::"; + } + if (!inet_pton(AF_INET6, vprefix6, &ip6_prefix)) { + return -1; + } +#endif + + if (!vprefix6_len) { + vprefix6_len = 64; + } + if (vprefix6_len < 0 || vprefix6_len > 126) { + return -1; + } + + if (vhost6) { +#if defined(_WIN32) && (_WIN32_WINNT < 0x0600) + return -1; +#else + if (!inet_pton(AF_INET6, vhost6, &ip6_host)) { + return -1; + } + if (!in6_equal_net(&ip6_prefix, &ip6_host, vprefix6_len)) { + return -1; + } +#endif + } else { + ip6_host = ip6_prefix; + ip6_host.s6_addr[15] |= 2; + } + + if (vnameserver6) { +#if defined(_WIN32) && (_WIN32_WINNT < 0x0600) + return -1; +#else + if (!inet_pton(AF_INET6, vnameserver6, &ip6_dns)) { + return -1; + } + if (!in6_equal_net(&ip6_prefix, &ip6_dns, vprefix6_len)) { + return -1; + } +#endif + } else { + ip6_dns = ip6_prefix; + ip6_dns.s6_addr[15] |= 3; + } + + nc = qemu_new_net_client(&net_slirp_info, peer, model, name); snprintf(nc->info_str, sizeof(nc->info_str), @@ -243,8 +322,10 @@ static int net_slirp_init(NetClientState *peer, const char *model, s = DO_UPCAST(SlirpState, nc, nc); - s->slirp = slirp_init(restricted, net, mask, host, vhostname, - tftp_export, bootfile, dhcp, dns, dnssearch, s); + s->slirp = slirp_init(restricted, ipv4, net, mask, host, + ipv6, ip6_prefix, vprefix6_len, ip6_host, + vhostname, tftp_export, bootfile, dhcp, + dns, ip6_dns, dnssearch, s); QTAILQ_INSERT_TAIL(&slirp_stacks, s, entry); for (config = slirp_configs; config; config = config->next) { @@ -745,9 +826,19 @@ int net_init_slirp(const NetClientOptions *opts, const char *name, int ret; const NetdevUserOptions *user; const char **dnssearch; + bool ipv4 = true, ipv6 = true; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_USER); - user = opts->user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_USER); + user = opts->u.user.data; + + if ((user->has_ipv6 && user->ipv6 && !user->has_ipv4) || + (user->has_ipv4 && !user->ipv4)) { + ipv4 = 0; + } + if ((user->has_ipv4 && user->ipv4 && !user->has_ipv6) || + (user->has_ipv6 && !user->ipv6)) { + ipv6 = 0; + } vnet = user->has_net ? g_strdup(user->net) : user->has_ip ? g_strdup_printf("%s/24", user->ip) : @@ -760,9 +851,12 @@ int net_init_slirp(const NetClientOptions *opts, const char *name, net_init_slirp_configs(user->hostfwd, SLIRP_CFG_HOSTFWD); net_init_slirp_configs(user->guestfwd, 0); - ret = net_slirp_init(peer, "user", name, user->q_restrict, vnet, - user->host, user->hostname, user->tftp, - user->bootfile, user->dhcpstart, user->dns, user->smb, + ret = net_slirp_init(peer, "user", name, user->q_restrict, + ipv4, vnet, user->host, + ipv6, user->ipv6_prefix, user->ipv6_prefixlen, + user->ipv6_host, user->hostname, user->tftp, + user->bootfile, user->dhcpstart, + user->dns, user->ipv6_dns, user->smb, user->smbserver, dnssearch); while (slirp_configs) { @@ -784,6 +878,9 @@ int net_slirp_parse_legacy(QemuOptsList *opts_list, const char *optarg, int *ret return 0; } + error_report("The '-net channel' option is deprecated. " + "Please use '-netdev user,guestfwd=...' instead."); + /* handle legacy -net channel,port:chr */ optarg += strlen("channel,"); diff --git a/qemu/net/socket.c b/qemu/net/socket.c index b1e3b1c8d..9fa2cd8d5 100644 --- a/qemu/net/socket.c +++ b/qemu/net/socket.c @@ -21,11 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "config-host.h" +#include "qemu/osdep.h" #include "net/net.h" #include "clients.h" #include "monitor/monitor.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/error-report.h" #include "qemu/option.h" @@ -145,15 +146,14 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len) static void net_socket_send(void *opaque) { NetSocketState *s = opaque; - int size, err; + int size; unsigned l; uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; size = qemu_recv(s->fd, buf1, sizeof(buf1), 0); if (size < 0) { - err = socket_error(); - if (err != EWOULDBLOCK) + if (errno != EWOULDBLOCK) goto eoc; } else if (size == 0) { /* end of connection */ @@ -566,7 +566,7 @@ static int net_socket_connect_init(NetClientState *peer, const char *host_str) { NetSocketState *s; - int fd, connected, ret, err; + int fd, connected, ret; struct sockaddr_in saddr; if (parse_host_port(&saddr, host_str) < 0) @@ -583,14 +583,12 @@ static int net_socket_connect_init(NetClientState *peer, for(;;) { ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { - err = socket_error(); - if (err == EINTR || err == EWOULDBLOCK) { - } else if (err == EINPROGRESS) { - break; -#ifdef _WIN32 - } else if (err == WSAEALREADY || err == WSAEINVAL) { + if (errno == EINTR || errno == EWOULDBLOCK) { + /* continue */ + } else if (errno == EINPROGRESS || + errno == EALREADY || + errno == EINVAL) { break; -#endif } else { perror("connect"); closesocket(fd); @@ -706,8 +704,8 @@ int net_init_socket(const NetClientOptions *opts, const char *name, Error *err = NULL; const NetdevSocketOptions *sock; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_SOCKET); - sock = opts->socket; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_SOCKET); + sock = opts->u.socket.data; if (sock->has_fd + sock->has_listen + sock->has_connect + sock->has_mcast + sock->has_udp != 1) { diff --git a/qemu/net/tap-aix.c b/qemu/net/tap-aix.c index e84fc3913..0e6da6396 100644 --- a/qemu/net/tap-aix.c +++ b/qemu/net/tap-aix.c @@ -22,8 +22,9 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "tap_int.h" -#include <stdio.h> int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required, int mq_required, Error **errp) diff --git a/qemu/net/tap-bsd.c b/qemu/net/tap-bsd.c index 7028d9be9..c506ac31d 100644 --- a/qemu/net/tap-bsd.c +++ b/qemu/net/tap-bsd.c @@ -22,8 +22,10 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "tap_int.h" -#include "qemu-common.h" +#include "qemu/cutils.h" #include "sysemu/sysemu.h" #include "qemu/error-report.h" @@ -109,8 +111,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, #define PATH_NET_TAP "/dev/tap" -int tap_open(char *ifname, int ifname_size, int *vnet_hdr, - int vnet_hdr_required, int mq_required, Error **errp) +static int tap_open_clone(char *ifname, int ifname_size, Error **errp) { int fd, s, ret; struct ifreq ifr; @@ -126,7 +127,8 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, ret = ioctl(fd, TAPGIFNAME, (void *)&ifr); if (ret < 0) { error_setg_errno(errp, errno, "could not get tap interface name"); - goto error; + close(fd); + return -1; } if (ifname[0] != '\0') { @@ -135,19 +137,47 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, if (s < 0) { error_setg_errno(errp, errno, "could not open socket to set interface name"); - goto error; + close(fd); + return -1; } ifr.ifr_data = ifname; ret = ioctl(s, SIOCSIFNAME, (void *)&ifr); close(s); if (ret < 0) { error_setg(errp, "could not set tap interface name"); - goto error; + close(fd); + return -1; } } else { pstrcpy(ifname, ifname_size, ifr.ifr_name); } + return fd; +} + +int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp) +{ + int fd = -1; + + /* If the specified tap device already exists just use it. */ + if (ifname[0] != '\0') { + char dname[100]; + snprintf(dname, sizeof dname, "/dev/%s", ifname); + TFR(fd = open(dname, O_RDWR)); + if (fd < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "could not open %s", dname); + return -1; + } + } + + if (fd < 0) { + /* Tap device not specified or does not exist. */ + if ((fd = tap_open_clone(ifname, ifname_size, errp)) < 0) { + return -1; + } + } + if (*vnet_hdr) { /* BSD doesn't have IFF_VNET_HDR */ *vnet_hdr = 0; diff --git a/qemu/net/tap-haiku.c b/qemu/net/tap-haiku.c index 2e738ec6a..b27e57e95 100644 --- a/qemu/net/tap-haiku.c +++ b/qemu/net/tap-haiku.c @@ -22,8 +22,9 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "tap_int.h" -#include <stdio.h> int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required, int mq_required, Error **errp) diff --git a/qemu/net/tap-linux.c b/qemu/net/tap-linux.c index 394f2a646..a503fa9c6 100644 --- a/qemu/net/tap-linux.c +++ b/qemu/net/tap-linux.c @@ -23,6 +23,7 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "tap_int.h" #include "tap-linux.h" #include "net/tap.h" @@ -31,8 +32,9 @@ #include <sys/ioctl.h> #include "sysemu/sysemu.h" -#include "qemu-common.h" +#include "qapi/error.h" #include "qemu/error-report.h" +#include "qemu/cutils.h" #define PATH_NET_TUN "/dev/net/tun" @@ -211,7 +213,7 @@ int tap_fd_set_vnet_le(int fd, int is_le) return -errno; } - error_report("TUNSETVNETLE ioctl() failed: %s.\n", strerror(errno)); + error_report("TUNSETVNETLE ioctl() failed: %s.", strerror(errno)); abort(); } @@ -228,7 +230,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) return -errno; } - error_report("TUNSETVNETBE ioctl() failed: %s.\n", strerror(errno)); + error_report("TUNSETVNETBE ioctl() failed: %s.", strerror(errno)); abort(); } diff --git a/qemu/net/tap-linux.h b/qemu/net/tap-linux.h index 01dc6f8a2..1dc3a9f27 100644 --- a/qemu/net/tap-linux.h +++ b/qemu/net/tap-linux.h @@ -16,7 +16,6 @@ #ifndef QEMU_TAP_LINUX_H #define QEMU_TAP_LINUX_H -#include <stdint.h> #ifdef __linux__ #include <linux/ioctl.h> diff --git a/qemu/net/tap-solaris.c b/qemu/net/tap-solaris.c index 0f60f78dd..a2a92356c 100644 --- a/qemu/net/tap-solaris.c +++ b/qemu/net/tap-solaris.c @@ -22,10 +22,12 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "tap_int.h" #include "sysemu/sysemu.h" +#include "qemu/cutils.h" -#include <sys/stat.h> #include <sys/ethernet.h> #include <sys/sockio.h> #include <netinet/arp.h> diff --git a/qemu/net/tap-win32.c b/qemu/net/tap-win32.c index 625d53c64..f1e142ace 100644 --- a/qemu/net/tap-win32.c +++ b/qemu/net/tap-win32.c @@ -26,6 +26,7 @@ * distribution); if not, see <http://www.gnu.org/licenses/>. */ +#include "qemu/osdep.h" #include "tap_int.h" #include "qemu-common.h" @@ -34,7 +35,6 @@ #include "net/tap.h" /* tap_has_ufo, ... */ #include "sysemu/sysemu.h" #include "qemu/error-report.h" -#include <stdio.h> #include <windows.h> #include <winioctl.h> @@ -77,7 +77,12 @@ //#define DEBUG_TAP_WIN32 -#define TUN_ASYNCHRONOUS_WRITES 1 +/* FIXME: The asynch write path appears to be broken at + * present. WriteFile() ignores the lpNumberOfBytesWritten parameter + * for overlapped writes, with the result we return zero bytes sent, + * and after handling a single packet, receive is disabled for this + * interface. */ +/* #define TUN_ASYNCHRONOUS_WRITES 1 */ #define TUN_BUFFER_SIZE 1560 #define TUN_MAX_BUFFER_COUNT 32 @@ -356,7 +361,8 @@ static int get_device_guid( &len); if (status != ERROR_SUCCESS || name_type != REG_SZ) { - return -1; + ++i; + continue; } else { if (is_tap_win32_dev(enum_name)) { @@ -460,27 +466,48 @@ static int tap_win32_write(tap_win32_overlapped_t *overlapped, BOOL result; DWORD error; +#ifdef TUN_ASYNCHRONOUS_WRITES result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped, &write_size, FALSE); if (!result && GetLastError() == ERROR_IO_INCOMPLETE) WaitForSingleObject(overlapped->write_event, INFINITE); +#endif result = WriteFile(overlapped->handle, buffer, size, &write_size, &overlapped->write_overlapped); +#ifdef TUN_ASYNCHRONOUS_WRITES + /* FIXME: we can't sensibly set write_size here, without waiting + * for the IO to complete! Moreover, we can't return zero, + * because that will disable receive on this interface, and we + * also can't assume it will succeed and return the full size, + * because that will result in the buffer being reclaimed while + * the IO is in progress. */ +#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES. +#else /* !TUN_ASYNCHRONOUS_WRITES */ if (!result) { - switch (error = GetLastError()) - { - case ERROR_IO_PENDING: -#ifndef TUN_ASYNCHRONOUS_WRITES - WaitForSingleObject(overlapped->write_event, INFINITE); -#endif - break; - default: - return -1; + error = GetLastError(); + if (error == ERROR_IO_PENDING) { + result = GetOverlappedResult(overlapped->handle, + &overlapped->write_overlapped, + &write_size, TRUE); } } +#endif + + if (!result) { +#ifdef DEBUG_TAP_WIN32 + LPTSTR msgbuf; + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + &msgbuf, 0, NULL); + fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf); + LocalFree(msgbuf); +#endif + return 0; + } return write_size; } @@ -767,8 +794,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevTapOptions *tap; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap.data; if (!tap->has_ifname) { error_report("tap: no interface name"); diff --git a/qemu/net/tap.c b/qemu/net/tap.c index bd01590e8..740e8a261 100644 --- a/qemu/net/tap.c +++ b/qemu/net/tap.c @@ -23,12 +23,11 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "tap_int.h" -#include "config-host.h" #include <sys/ioctl.h> -#include <sys/stat.h> #include <sys/wait.h> #include <sys/socket.h> #include <net/if.h> @@ -37,7 +36,9 @@ #include "clients.h" #include "monitor/monitor.h" #include "sysemu/sysemu.h" +#include "qapi/error.h" #include "qemu-common.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #include "net/tap.h" @@ -565,8 +566,8 @@ int net_init_bridge(const NetClientOptions *opts, const char *name, TAPState *s; int fd, vnet_hdr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE); - bridge = opts->bridge; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE); + bridge = opts->u.bridge.data; helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER; br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE; @@ -663,7 +664,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, options.backend_type = VHOST_BACKEND_TYPE_KERNEL; options.net_backend = &s->nc; - if (tap->has_vhostfd || tap->has_vhostfds) { + if (vhostfdname) { vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); if (vhostfd == -1) { error_propagate(errp, err); @@ -685,7 +686,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, "vhost-net requested but could not be initialized"); return; } - } else if (tap->has_vhostfd || tap->has_vhostfds) { + } else if (vhostfdname) { error_setg(errp, "vhostfd= is not valid without vhost"); } } @@ -728,8 +729,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, const char *vhostfdname; char ifname[128]; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP); - tap = opts->tap; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP); + tap = opts->u.tap.data; queues = tap->has_queues ? tap->queues : 1; vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL; diff --git a/qemu/net/util.c b/qemu/net/util.c index 7e9507679..0b3dbfe5d 100644 --- a/qemu/net/util.c +++ b/qemu/net/util.c @@ -22,9 +22,8 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "util.h" -#include <errno.h> -#include <stdlib.h> int net_parse_macaddr(uint8_t *macaddr, const char *p) { diff --git a/qemu/net/util.h b/qemu/net/util.h index 10c7da95f..60b73d372 100644 --- a/qemu/net/util.h +++ b/qemu/net/util.h @@ -25,7 +25,6 @@ #ifndef QEMU_NET_UTIL_H #define QEMU_NET_UTIL_H -#include <stdint.h> int net_parse_macaddr(uint8_t *macaddr, const char *p); diff --git a/qemu/net/vde.c b/qemu/net/vde.c index dacaa64b4..9427eaa16 100644 --- a/qemu/net/vde.c +++ b/qemu/net/vde.c @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "config-host.h" +#include "qemu/osdep.h" #include <libvdeplug.h> @@ -115,8 +115,8 @@ int net_init_vde(const NetClientOptions *opts, const char *name, /* FIXME error_setg(errp, ...) on failure */ const NetdevVdeOptions *vde; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VDE); - vde = opts->vde; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VDE); + vde = opts->u.vde.data; /* missing optional values have been initialized to "all bits zero" */ if (net_vde_init(peer, "vde", name, vde->sock, vde->port, vde->group, diff --git a/qemu/net/vhost-user.c b/qemu/net/vhost-user.c index 93dcecd66..1b9e73a2d 100644 --- a/qemu/net/vhost-user.c +++ b/qemu/net/vhost-user.c @@ -8,12 +8,15 @@ * */ +#include "qemu/osdep.h" #include "clients.h" #include "net/vhost_net.h" #include "net/vhost-user.h" #include "sysemu/char.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qmp-commands.h" +#include "trace.h" typedef struct VhostUserState { NetClientState nc; @@ -24,7 +27,6 @@ typedef struct VhostUserState { typedef struct VhostUserChardevProps { bool is_socket; bool is_unix; - bool is_server; } VhostUserChardevProps; VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) @@ -39,37 +41,106 @@ static int vhost_user_running(VhostUserState *s) return (s->vhost_net) ? 1 : 0; } -static int vhost_user_start(VhostUserState *s) +static void vhost_user_stop(int queues, NetClientState *ncs[]) { - VhostNetOptions options; + VhostUserState *s; + int i; - if (vhost_user_running(s)) { - return 0; + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (!vhost_user_running(s)) { + continue; + } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } } +} + +static int vhost_user_start(int queues, NetClientState *ncs[]) +{ + VhostNetOptions options; + VhostUserState *s; + int max_queues; + int i; options.backend_type = VHOST_BACKEND_TYPE_USER; - options.net_backend = &s->nc; - options.opaque = s->chr; - s->vhost_net = vhost_net_init(&options); + for (i = 0; i < queues; i++) { + assert (ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + + s = DO_UPCAST(VhostUserState, nc, ncs[i]); + if (vhost_user_running(s)) { + continue; + } + + options.net_backend = ncs[i]; + options.opaque = s->chr; + s->vhost_net = vhost_net_init(&options); + if (!s->vhost_net) { + error_report("failed to init vhost_net for queue %d", i); + goto err; + } + + if (i == 0) { + max_queues = vhost_net_get_max_queues(s->vhost_net); + if (queues > max_queues) { + error_report("you are asking more queues than supported: %d", + max_queues); + goto err; + } + } + } - return vhost_user_running(s) ? 0 : -1; + return 0; + +err: + vhost_user_stop(i + 1, ncs); + return -1; } -static void vhost_user_stop(VhostUserState *s) +static ssize_t vhost_user_receive(NetClientState *nc, const uint8_t *buf, + size_t size) { - if (vhost_user_running(s)) { - vhost_net_cleanup(s->vhost_net); + /* In case of RARP (message size is 60) notify backup to send a fake RARP. + This fake RARP will be sent by backend only for guest + without GUEST_ANNOUNCE capability. + */ + if (size == 60) { + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); + int r; + static int display_rarp_failure = 1; + char mac_addr[6]; + + /* extract guest mac address from the RARP message */ + memcpy(mac_addr, &buf[6], 6); + + r = vhost_net_notify_migration_done(s->vhost_net, mac_addr); + + if ((r != 0) && (display_rarp_failure)) { + fprintf(stderr, + "Vhost user backend fails to broadcast fake RARP\n"); + fflush(stderr); + display_rarp_failure = 0; + } } - s->vhost_net = 0; + return size; } static void vhost_user_cleanup(NetClientState *nc) { VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc); - vhost_user_stop(s); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + s->vhost_net = NULL; + } + qemu_purge_queued_packets(nc); } @@ -90,64 +161,69 @@ static bool vhost_user_has_ufo(NetClientState *nc) static NetClientInfo net_vhost_user_info = { .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, .size = sizeof(VhostUserState), + .receive = vhost_user_receive, .cleanup = vhost_user_cleanup, .has_vnet_hdr = vhost_user_has_vnet_hdr, .has_ufo = vhost_user_has_ufo, }; -static void net_vhost_link_down(VhostUserState *s, bool link_down) -{ - s->nc.link_down = link_down; - - if (s->nc.peer) { - s->nc.peer->link_down = link_down; - } - - if (s->nc.info->link_status_changed) { - s->nc.info->link_status_changed(&s->nc); - } - - if (s->nc.peer && s->nc.peer->info->link_status_changed) { - s->nc.peer->info->link_status_changed(s->nc.peer); - } -} - static void net_vhost_user_event(void *opaque, int event) { - VhostUserState *s = opaque; + const char *name = opaque; + NetClientState *ncs[MAX_QUEUE_NUM]; + VhostUserState *s; + Error *err = NULL; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + assert(queues < MAX_QUEUE_NUM); + s = DO_UPCAST(VhostUserState, nc, ncs[0]); + trace_vhost_user_event(s->chr->label, event); switch (event) { case CHR_EVENT_OPENED: - vhost_user_start(s); - net_vhost_link_down(s, false); - error_report("chardev \"%s\" went up", s->chr->label); + if (vhost_user_start(queues, ncs) < 0) { + exit(1); + } + qmp_set_link(name, true, &err); break; case CHR_EVENT_CLOSED: - net_vhost_link_down(s, true); - vhost_user_stop(s); - error_report("chardev \"%s\" went down", s->chr->label); + qmp_set_link(name, false, &err); + vhost_user_stop(queues, ncs); break; } + + if (err) { + error_report_err(err); + } } static int net_vhost_user_init(NetClientState *peer, const char *device, - const char *name, CharDriverState *chr) + const char *name, CharDriverState *chr, + int queues) { NetClientState *nc; VhostUserState *s; + int i; - nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); + assert(name); + assert(queues > 0); - snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", - chr->label); + for (i = 0; i < queues; i++) { + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); - s = DO_UPCAST(VhostUserState, nc, nc); + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", + i, chr->label); - /* We don't provide a receive callback */ - s->nc.receive_disabled = 1; - s->chr = chr; + nc->queue_index = i; - qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s); + s = DO_UPCAST(VhostUserState, nc, nc); + s->chr = chr; + } + + qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, nc[0].name); return 0; } @@ -163,7 +239,6 @@ static int net_vhost_chardev_opts(void *opaque, } else if (strcmp(name, "path") == 0) { props->is_unix = true; } else if (strcmp(name, "server") == 0) { - props->is_server = true; } else { error_setg(errp, "vhost-user does not support a chardev with option %s=%s", @@ -226,11 +301,12 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp) int net_init_vhost_user(const NetClientOptions *opts, const char *name, NetClientState *peer, Error **errp) { + int queues; const NetdevVhostUserOptions *vhost_user_opts; CharDriverState *chr; - assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER); - vhost_user_opts = opts->vhost_user; + assert(opts->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER); + vhost_user_opts = opts->u.vhost_user.data; chr = net_vhost_parse_chardev(vhost_user_opts, errp); if (!chr) { @@ -243,6 +319,13 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name, return -1; } + queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1; + if (queues < 1 || queues > MAX_QUEUE_NUM) { + error_setg(errp, + "vhost-user number of queues must be in range [1, %d]", + MAX_QUEUE_NUM); + return -1; + } - return net_vhost_user_init(peer, "vhost_user", name, chr); + return net_vhost_user_init(peer, "vhost_user", name, chr, queues); } |