From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/drivers/xen/xen-pciback/xenbus.c | 750 ++++++++++++++++++++++++++++++++ 1 file changed, 750 insertions(+) create mode 100644 kernel/drivers/xen/xen-pciback/xenbus.c (limited to 'kernel/drivers/xen/xen-pciback/xenbus.c') diff --git a/kernel/drivers/xen/xen-pciback/xenbus.c b/kernel/drivers/xen/xen-pciback/xenbus.c new file mode 100644 index 000000000..98bc345f2 --- /dev/null +++ b/kernel/drivers/xen/xen-pciback/xenbus.c @@ -0,0 +1,750 @@ +/* + * PCI Backend Xenbus Setup - handles setup with frontend and xend + * + * Author: Ryan Wilson + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include "pciback.h" + +#define INVALID_EVTCHN_IRQ (-1) +struct workqueue_struct *xen_pcibk_wq; + +static bool __read_mostly passthrough; +module_param(passthrough, bool, S_IRUGO); +MODULE_PARM_DESC(passthrough, + "Option to specify how to export PCI topology to guest:\n"\ + " 0 - (default) Hide the true PCI topology and makes the frontend\n"\ + " there is a single PCI bus with only the exported devices on it.\n"\ + " For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\ + " while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\ + " 1 - Passthrough provides a real view of the PCI topology to the\n"\ + " frontend (for example, a device at 06:01.b will still appear at\n"\ + " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ + " exposed PCI devices to its driver domains. This may be required\n"\ + " for drivers which depend on finding their hardward in certain\n"\ + " bus/slot locations."); + +static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct xen_pcibk_device *pdev; + + pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); + + pdev->xdev = xdev; + dev_set_drvdata(&xdev->dev, pdev); + + mutex_init(&pdev->dev_lock); + + pdev->sh_info = NULL; + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + pdev->be_watching = 0; + + INIT_WORK(&pdev->op_work, xen_pcibk_do_op); + + if (xen_pcibk_init_devices(pdev)) { + kfree(pdev); + pdev = NULL; + } +out: + return pdev; +} + +static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) +{ + mutex_lock(&pdev->dev_lock); + /* Ensure the guest can't trigger our handler before removing devices */ + if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { + unbind_from_irqhandler(pdev->evtchn_irq, pdev); + pdev->evtchn_irq = INVALID_EVTCHN_IRQ; + } + + /* If the driver domain started an op, make sure we complete it + * before releasing the shared memory */ + + /* Note, the workqueue does not use spinlocks at all.*/ + flush_workqueue(xen_pcibk_wq); + + if (pdev->sh_info != NULL) { + xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); + pdev->sh_info = NULL; + } + mutex_unlock(&pdev->dev_lock); +} + +static void free_pdev(struct xen_pcibk_device *pdev) +{ + if (pdev->be_watching) { + unregister_xenbus_watch(&pdev->be_watch); + pdev->be_watching = 0; + } + + xen_pcibk_disconnect(pdev); + + /* N.B. This calls pcistub_put_pci_dev which does the FLR on all + * of the PCIe devices. */ + xen_pcibk_release_devices(pdev); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + pdev->xdev = NULL; + + kfree(pdev); +} + +static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, + int remote_evtchn) +{ + int err = 0; + void *vaddr; + + dev_dbg(&pdev->xdev->dev, + "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", + gnt_ref, remote_evtchn); + + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error mapping other domain page in ours."); + goto out; + } + + pdev->sh_info = vaddr; + + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, + 0, DRV_NAME, pdev); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error binding event channel to IRQ"); + goto out; + } + pdev->evtchn_irq = err; + err = 0; + + dev_dbg(&pdev->xdev->dev, "Attached!\n"); +out: + return err; +} + +static int xen_pcibk_attach(struct xen_pcibk_device *pdev) +{ + int err = 0; + int gnt_ref, remote_evtchn; + char *magic = NULL; + + + mutex_lock(&pdev->dev_lock); + /* Make sure we only do this setup once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + /* Wait for frontend to state that it has published the configuration */ + if (xenbus_read_driver_state(pdev->xdev->otherend) != + XenbusStateInitialised) + goto out; + + dev_dbg(&pdev->xdev->dev, "Reading frontend config\n"); + + err = xenbus_gather(XBT_NIL, pdev->xdev->otherend, + "pci-op-ref", "%u", &gnt_ref, + "event-channel", "%u", &remote_evtchn, + "magic", NULL, &magic, NULL); + if (err) { + /* If configuration didn't get read correctly, wait longer */ + xenbus_dev_fatal(pdev->xdev, err, + "Error reading configuration from frontend"); + goto out; + } + + if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { + xenbus_dev_fatal(pdev->xdev, -EFAULT, + "version mismatch (%s/%s) with pcifront - " + "halting " DRV_NAME, + magic, XEN_PCI_MAGIC); + err = -EFAULT; + goto out; + } + + err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn); + if (err) + goto out; + + dev_dbg(&pdev->xdev->dev, "Connecting...\n"); + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to connected state!"); + + dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); +out: + mutex_unlock(&pdev->dev_lock); + + kfree(magic); + + return err; +} + +static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus, + unsigned int devfn, unsigned int devid) +{ + int err; + int len; + char str[64]; + + len = snprintf(str, sizeof(str), "vdev-%d", devid); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + /* Note: The PV protocol uses %02x, don't change it */ + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x:%02x.%02x", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + +out: + return err; +} + +static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, + int domain, int bus, int slot, int func, + int devid) +{ + struct pci_dev *dev; + int err = 0; + + dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func); + if (!dev) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Couldn't locate PCI device " + "(%04x:%02x:%02x.%d)! " + "perhaps already in-use?", + domain, bus, slot, func); + goto out; + } + + err = xen_pcibk_add_pci_dev(pdev, dev, devid, + xen_pcibk_publish_pci_dev); + if (err) + goto out; + + dev_info(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); + if (xen_register_device_domain_owner(dev, + pdev->xdev->otherend_id) != 0) { + dev_err(&dev->dev, "Stealing ownership from dom%d.\n", + xen_find_device_domain_owner(dev)); + xen_unregister_device_domain_owner(dev); + xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); + } + + /* TODO: It'd be nice to export a bridge and have all of its children + * get exported with it. This may be best done in xend (which will + * have to calculate resource usage anyway) but we probably want to + * put something in here to ensure that if a bridge gets given to a + * driver domain, that all devices under that bridge are not given + * to other driver domains (as he who controls the bridge can disable + * it and stop the other devices from working). + */ +out: + return err; +} + +static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, + int domain, int bus, int slot, int func) +{ + int err = 0; + struct pci_dev *dev; + + dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n", + domain, bus, slot, func); + + dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func)); + if (!dev) { + err = -EINVAL; + dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device " + "(%04x:%02x:%02x.%d)! not owned by this domain\n", + domain, bus, slot, func); + goto out; + } + + dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); + xen_unregister_device_domain_owner(dev); + + /* N.B. This ends up calling pcistub_put_pci_dev which ends up + * doing the FLR. */ + xen_pcibk_release_pci_dev(pdev, dev, true /* use the lock. */); + +out: + return err; +} + +static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev, + unsigned int domain, unsigned int bus) +{ + unsigned int d, b; + int i, root_num, len, err; + char str[64]; + + dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", &root_num); + if (err == 0 || err == -ENOENT) + root_num = 0; + else if (err < 0) + goto out; + + /* Verify that we haven't already published this pci root */ + for (i = 0; i < root_num; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + str, "%x:%x", &d, &b); + if (err < 0) + goto out; + if (err != 2) { + err = -EINVAL; + goto out; + } + + if (d == domain && b == bus) { + err = 0; + goto out; + } + } + + len = snprintf(str, sizeof(str), "root-%d", root_num); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n", + root_num, domain, bus); + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str, + "%04x:%02x", domain, bus); + if (err) + goto out; + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + "root_num", "%d", (root_num + 1)); + +out: + return err; +} + +static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) +{ + int err = 0; + int num_devs; + int domain, bus, slot, func; + int substate; + int i, len; + char state_str[64]; + char dev_str[64]; + + + dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + + mutex_lock(&pdev->dev_lock); + /* Make sure we only reconfigure once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + len = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(len >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", &substate); + if (err != 1) + substate = XenbusStateUnknown; + + switch (substate) { + case XenbusStateInitialising: + dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_export_device(pdev, domain, bus, slot, + func, i); + if (err) + goto out; + + /* Publish pci roots. */ + err = xen_pcibk_publish_pci_roots(pdev, + xen_pcibk_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root" + "buses for frontend"); + goto out; + } + + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, + state_str, "%d", + XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching substate of " + "dev-%d\n", i); + goto out; + } + break; + + case XenbusStateClosing: + dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i); + + len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i); + if (unlikely(len >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while " + "reading configuration"); + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, + dev_str, "%x:%x:%x.%x", + &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device " + "configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_remove_device(pdev, domain, bus, slot, + func); + if (err) + goto out; + + /* TODO: If at some point we implement support for pci + * root hot-remove on pcifront side, we'll need to + * remove unnecessary xenstore nodes of pci roots here. + */ + + break; + + default: + break; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to reconfigured state!"); + goto out; + } + +out: + mutex_unlock(&pdev->dev_lock); + return 0; +} + +static void xen_pcibk_frontend_changed(struct xenbus_device *xdev, + enum xenbus_state fe_state) +{ + struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev); + + dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state); + + switch (fe_state) { + case XenbusStateInitialised: + xen_pcibk_attach(pdev); + break; + + case XenbusStateReconfiguring: + xen_pcibk_reconfigure(pdev); + break; + + case XenbusStateConnected: + /* pcifront switched its state from reconfiguring to connected. + * Then switch to connected state. + */ + xenbus_switch_state(xdev, XenbusStateConnected); + break; + + case XenbusStateClosing: + xen_pcibk_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xen_pcibk_disconnect(pdev); + xenbus_switch_state(xdev, XenbusStateClosed); + if (xenbus_dev_is_online(xdev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); + device_unregister(&xdev->dev); + break; + + default: + break; + } +} + +static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) +{ + /* Get configuration from xend (if available now) */ + int domain, bus, slot, func; + int err = 0; + int i, num_devs; + char dev_str[64]; + char state_str[64]; + + mutex_lock(&pdev->dev_lock); + /* It's possible we could get the call to setup twice, so make sure + * we're not already connected. + */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitWait) + goto out; + + dev_dbg(&pdev->xdev->dev, "getting be setup\n"); + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of devices"); + goto out; + } + + for (i = 0; i < num_devs; i++) { + int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i); + if (unlikely(l >= (sizeof(dev_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error reading device configuration"); + goto out; + } + if (err != 4) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error parsing pci device " + "configuration"); + goto out; + } + + err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i); + if (err) + goto out; + + /* Switch substate of this device. */ + l = snprintf(state_str, sizeof(state_str), "state-%d", i); + if (unlikely(l >= (sizeof(state_str) - 1))) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "String overflow while reading " + "configuration"); + goto out; + } + err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str, + "%d", XenbusStateInitialised); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, "Error switching " + "substate of dev-%d\n", i); + goto out; + } + } + + err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error while publish PCI root buses " + "for frontend"); + goto out; + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + if (err) + xenbus_dev_fatal(pdev->xdev, err, + "Error switching to initialised state!"); + +out: + mutex_unlock(&pdev->dev_lock); + if (!err) + /* see if pcifront is already configured (if not, we'll wait) */ + xen_pcibk_attach(pdev); + return err; +} + +static void xen_pcibk_be_watch(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xen_pcibk_device *pdev = + container_of(watch, struct xen_pcibk_device, be_watch); + + switch (xenbus_read_driver_state(pdev->xdev->nodename)) { + case XenbusStateInitWait: + xen_pcibk_setup_backend(pdev); + break; + + default: + break; + } +} + +static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct xen_pcibk_device *pdev = alloc_pdev(dev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(dev, err, + "Error allocating xen_pcibk_device struct"); + goto out; + } + + /* wait for xend to configure us */ + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto out; + + /* watch the backend node for backend configuration information */ + err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, + xen_pcibk_be_watch); + if (err) + goto out; + + pdev->be_watching = 1; + + /* We need to force a call to our callback here in case + * xend already configured us! + */ + xen_pcibk_be_watch(&pdev->be_watch, NULL, 0); + +out: + return err; +} + +static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) +{ + struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); + + if (pdev != NULL) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xen_pcibk_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xen_pcibk_driver = { + .name = DRV_NAME, + .ids = xen_pcibk_ids, + .probe = xen_pcibk_xenbus_probe, + .remove = xen_pcibk_xenbus_remove, + .otherend_changed = xen_pcibk_frontend_changed, +}; + +const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; + +int __init xen_pcibk_xenbus_register(void) +{ + xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); + if (!xen_pcibk_wq) { + pr_err("%s: create xen_pciback_workqueue failed\n", __func__); + return -EFAULT; + } + xen_pcibk_backend = &xen_pcibk_vpci_backend; + if (passthrough) + xen_pcibk_backend = &xen_pcibk_passthrough_backend; + pr_info("backend is %s\n", xen_pcibk_backend->name); + return xenbus_register_backend(&xen_pcibk_driver); +} + +void __exit xen_pcibk_xenbus_unregister(void) +{ + destroy_workqueue(xen_pcibk_wq); + xenbus_unregister_driver(&xen_pcibk_driver); +} -- cgit 1.2.3-korg