diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/xen | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/xen')
31 files changed, 1127 insertions, 599 deletions
diff --git a/kernel/drivers/xen/Kconfig b/kernel/drivers/xen/Kconfig index 7cd226da1..73708acce 100644 --- a/kernel/drivers/xen/Kconfig +++ b/kernel/drivers/xen/Kconfig @@ -280,4 +280,15 @@ config XEN_ACPI def_bool y depends on X86 && ACPI +config XEN_SYMS + bool "Xen symbols" + depends on X86 && XEN_DOM0 && XENFS + default y if KALLSYMS + help + Exports hypervisor symbols (along with their types and addresses) via + /proc/xen/xensyms file, similar to /proc/kallsyms + +config XEN_HAVE_VPMU + bool + endmenu diff --git a/kernel/drivers/xen/Makefile b/kernel/drivers/xen/Makefile index e293bc507..aa8a7f71f 100644 --- a/kernel/drivers/xen/Makefile +++ b/kernel/drivers/xen/Makefile @@ -1,6 +1,4 @@ -ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),) obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -endif obj-$(CONFIG_X86) += fallback.o obj-y += grant-table.o features.o balloon.o manage.o preempt.o obj-y += events/ diff --git a/kernel/drivers/xen/balloon.c b/kernel/drivers/xen/balloon.c index fd933695f..12eab503e 100644 --- a/kernel/drivers/xen/balloon.c +++ b/kernel/drivers/xen/balloon.c @@ -54,6 +54,8 @@ #include <linux/memory.h> #include <linux/memory_hotplug.h> #include <linux/percpu-defs.h> +#include <linux/slab.h> +#include <linux/sysctl.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -70,16 +72,64 @@ #include <xen/features.h> #include <xen/page.h> +static int xen_hotplug_unpopulated; + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + +static int zero; +static int one = 1; + +static struct ctl_table balloon_table[] = { + { + .procname = "hotplug_unpopulated", + .data = &xen_hotplug_unpopulated, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static struct ctl_table balloon_root[] = { + { + .procname = "balloon", + .mode = 0555, + .child = balloon_table, + }, + { } +}; + +static struct ctl_table xen_root[] = { + { + .procname = "xen", + .mode = 0555, + .child = balloon_root, + }, + { } +}; + +#endif + +/* + * Use one extent per PAGE_SIZE to avoid to break down the page into + * multiple frame. + */ +#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) + /* * balloon_process() state: * * BP_DONE: done or nothing to do, + * BP_WAIT: wait to be rescheduled, * BP_EAGAIN: error, go to sleep, * BP_ECANCELED: error, balloon operation canceled. */ enum bp_state { BP_DONE, + BP_WAIT, BP_EAGAIN, BP_ECANCELED }; @@ -91,11 +141,12 @@ struct balloon_stats balloon_stats; EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ -static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); +static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); /* Main work function, always executed in process context. */ static void balloon_process(struct work_struct *work); @@ -124,6 +175,7 @@ static void __balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } + wake_up(&balloon_wq); } static void balloon_append(struct page *page) @@ -133,17 +185,16 @@ static void balloon_append(struct page *page) } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ -static struct page *balloon_retrieve(bool prefer_highmem) +static struct page *balloon_retrieve(bool require_lowmem) { struct page *page; if (list_empty(&ballooned_pages)) return NULL; - if (prefer_highmem) - page = list_entry(ballooned_pages.prev, struct page, lru); - else - page = list_entry(ballooned_pages.next, struct page, lru); + page = list_entry(ballooned_pages.next, struct page, lru); + if (require_lowmem && PageHighMem(page)) + return NULL; list_del(&page->lru); if (PageHighMem(page)) @@ -166,6 +217,9 @@ static struct page *balloon_next_page(struct page *page) static enum bp_state update_schedule(enum bp_state state) { + if (state == BP_WAIT) + return BP_WAIT; + if (state == BP_ECANCELED) return BP_ECANCELED; @@ -193,43 +247,75 @@ static enum bp_state update_schedule(enum bp_state state) } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -static long current_credit(void) +static struct resource *additional_memory_resource(phys_addr_t size) { - return balloon_stats.target_pages - balloon_stats.current_pages - - balloon_stats.hotplug_pages; + struct resource *res; + int ret; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + res->name = "System RAM"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } + + return res; } -static bool balloon_is_inflated(void) +static void release_memory_resource(struct resource *resource) { - if (balloon_stats.balloon_low || balloon_stats.balloon_high || - balloon_stats.balloon_hotplug) - return true; - else - return false; -} + if (!resource) + return; -/* - * reserve_additional_memory() adds memory region of size >= credit above - * max_pfn. New region is section aligned and size is modified to be multiple - * of section size. Those features allow optimal use of address space and - * establish proper alignment when this function is called first time after - * boot (last section not fully populated at boot time contains unused memory - * pages with PG_reserved bit not set; online_pages_range() does not allow page - * onlining in whole range if first onlined page does not have PG_reserved - * bit set). Real size of added memory is established at page onlining stage. - */ + /* + * No need to reset region to identity mapped since we now + * know that no I/O can be in this region + */ + release_resource(resource); + kfree(resource); +} -static enum bp_state reserve_additional_memory(long credit) +static enum bp_state reserve_additional_memory(void) { + long credit; + struct resource *resource; int nid, rc; - u64 hotplug_start_paddr; - unsigned long balloon_hotplug = credit; + unsigned long balloon_hotplug; - hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); - balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); - nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + credit = balloon_stats.target_pages + balloon_stats.target_unpopulated + - balloon_stats.total_pages; + + /* + * Already hotplugged enough pages? Wait for them to be + * onlined. + */ + if (credit <= 0) + return BP_WAIT; + + balloon_hotplug = round_up(credit, PAGES_PER_SECTION); + + resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE); + if (!resource) + goto err; + + nid = memory_add_physaddr_to_nid(resource->start); #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + /* * add_memory() will build page tables for the new memory so * the p2m must contain invalid entries so the correct @@ -242,29 +328,28 @@ static enum bp_state reserve_additional_memory(long credit) if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long pfn, i; - pfn = PFN_DOWN(hotplug_start_paddr); + pfn = PFN_DOWN(resource->start); for (i = 0; i < balloon_hotplug; i++) { if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { pr_warn("set_phys_to_machine() failed, no memory added\n"); - return BP_ECANCELED; + goto err; } } } #endif - rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); - + rc = add_memory_resource(nid, resource); if (rc) { pr_warn("Cannot add additional memory (%i)\n", rc); - return BP_ECANCELED; + goto err; } - balloon_hotplug -= credit; + balloon_stats.total_pages += balloon_hotplug; - balloon_stats.hotplug_pages += credit; - balloon_stats.balloon_hotplug = balloon_hotplug; - - return BP_DONE; + return BP_WAIT; + err: + release_memory_resource(resource); + return BP_ECANCELED; } static void xen_online_page(struct page *page) @@ -275,11 +360,6 @@ static void xen_online_page(struct page *page) __balloon_append(page); - if (balloon_stats.hotplug_pages) - --balloon_stats.hotplug_pages; - else - --balloon_stats.balloon_hotplug; - mutex_unlock(&balloon_mutex); } @@ -296,53 +376,34 @@ static struct notifier_block xen_memory_nb = { .priority = 0 }; #else -static long current_credit(void) +static enum bp_state reserve_additional_memory(void) { - unsigned long target = balloon_stats.target_pages; - - target = min(target, - balloon_stats.current_pages + - balloon_stats.balloon_low + - balloon_stats.balloon_high); - - return target - balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_ECANCELED; } +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ -static bool balloon_is_inflated(void) +static long current_credit(void) { - if (balloon_stats.balloon_low || balloon_stats.balloon_high) - return true; - else - return false; + return balloon_stats.target_pages - balloon_stats.current_pages; } -static enum bp_state reserve_additional_memory(long credit) +static bool balloon_is_inflated(void) { - balloon_stats.target_pages = balloon_stats.current_pages; - return BP_DONE; + return balloon_stats.balloon_low || balloon_stats.balloon_high; } -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ static enum bp_state increase_reservation(unsigned long nr_pages) { int rc; - unsigned long pfn, i; + unsigned long i; struct page *page; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, + .extent_order = EXTENT_ORDER, .domid = DOMID_SELF }; -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { - nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); - balloon_stats.hotplug_pages += nr_pages; - balloon_stats.balloon_hotplug -= nr_pages; - return BP_DONE; - } -#endif - if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -352,7 +413,11 @@ static enum bp_state increase_reservation(unsigned long nr_pages) nr_pages = i; break; } - frame_list[i] = page_to_pfn(page); + + /* XENMEM_populate_physmap requires a PFN based on Xen + * granularity. + */ + frame_list[i] = page_to_xen_pfn(page); page = balloon_next_page(page); } @@ -366,10 +431,16 @@ static enum bp_state increase_reservation(unsigned long nr_pages) page = balloon_retrieve(false); BUG_ON(page == NULL); - pfn = page_to_pfn(page); - #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long pfn = page_to_pfn(page); + set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ @@ -396,23 +467,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages) static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) { enum bp_state state = BP_DONE; - unsigned long pfn, i; - struct page *page; + unsigned long i; + struct page *page, *tmp; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, + .extent_order = EXTENT_ORDER, .domid = DOMID_SELF }; - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (balloon_stats.hotplug_pages) { - nr_pages = min(nr_pages, balloon_stats.hotplug_pages); - balloon_stats.hotplug_pages -= nr_pages; - balloon_stats.balloon_hotplug += nr_pages; - return BP_DONE; - } -#endif + LIST_HEAD(pages); if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -425,8 +488,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) break; } scrub_page(page); - - frame_list[i] = page_to_pfn(page); + list_add(&page->lru, &pages); } /* @@ -438,14 +500,25 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) */ kmap_flush_unused(); - /* Update direct mapping, invalidate P2M, and add to balloon. */ - for (i = 0; i < nr_pages; i++) { - pfn = frame_list[i]; - frame_list[i] = pfn_to_mfn(pfn); - page = pfn_to_page(pfn); + /* + * Setup the frame, update direct mapping, invalidate P2M, + * and add to balloon. + */ + i = 0; + list_for_each_entry_safe(page, tmp, &pages, lru) { + /* XENMEM_decrease_reservation requires a GFN */ + frame_list[i++] = xen_page_to_gfn(page); #ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long pfn = page_to_pfn(page); + if (!PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -455,6 +528,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } #endif + list_del(&page->lru); balloon_append(page); } @@ -472,7 +546,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * We avoid multiple worker processes conflicting via the balloon mutex. + * As this is a work item it is guaranteed to run as a single instance only. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. @@ -482,16 +556,17 @@ static void balloon_process(struct work_struct *work) enum bp_state state = BP_DONE; long credit; - mutex_lock(&balloon_mutex); do { + mutex_lock(&balloon_mutex); + credit = current_credit(); if (credit > 0) { if (balloon_is_inflated()) state = increase_reservation(credit); else - state = reserve_additional_memory(credit); + state = reserve_additional_memory(); } if (credit < 0) @@ -499,17 +574,15 @@ static void balloon_process(struct work_struct *work) state = update_schedule(state); -#ifndef CONFIG_PREEMPT - if (need_resched()) - schedule(); -#endif + mutex_unlock(&balloon_mutex); + + cond_resched(); + } while (credit && state == BP_DONE); /* Schedule more work if there is some still to be done. */ if (state == BP_EAGAIN) schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); - - mutex_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ @@ -521,41 +594,71 @@ void balloon_set_new_target(unsigned long target) } EXPORT_SYMBOL_GPL(balloon_set_new_target); +static int add_ballooned_pages(int nr_pages) +{ + enum bp_state st; + + if (xen_hotplug_unpopulated) { + st = reserve_additional_memory(); + if (st != BP_ECANCELED) { + mutex_unlock(&balloon_mutex); + wait_event(balloon_wq, + !list_empty(&ballooned_pages)); + mutex_lock(&balloon_mutex); + return 0; + } + } + + st = decrease_reservation(nr_pages, GFP_USER); + if (st != BP_DONE) + return -ENOMEM; + + return 0; +} + /** * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned - * @highmem: allow highmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) +int alloc_xenballooned_pages(int nr_pages, struct page **pages) { int pgno = 0; struct page *page; + int ret; + mutex_lock(&balloon_mutex); + + balloon_stats.target_unpopulated += nr_pages; + while (pgno < nr_pages) { - page = balloon_retrieve(highmem); - if (page && (highmem || !PageHighMem(page))) { + page = balloon_retrieve(true); + if (page) { pages[pgno++] = page; +#ifdef CONFIG_XEN_HAVE_PVMMU + /* + * We don't support PV MMU when Linux and Xen is using + * different page granularity. + */ + BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE); + + ret = xen_alloc_p2m_entry(page_to_pfn(page)); + if (ret < 0) + goto out_undo; +#endif } else { - enum bp_state st; - if (page) - balloon_append(page); - st = decrease_reservation(nr_pages - pgno, - highmem ? GFP_HIGHUSER : GFP_USER); - if (st != BP_DONE) + ret = add_ballooned_pages(nr_pages - pgno); + if (ret < 0) goto out_undo; } } mutex_unlock(&balloon_mutex); return 0; out_undo: - while (pgno) - balloon_append(pages[--pgno]); - /* Free the memory back to the kernel soon */ - schedule_delayed_work(&balloon_worker, 0); mutex_unlock(&balloon_mutex); - return -ENOMEM; + free_xenballooned_pages(pgno, pages); + return ret; } EXPORT_SYMBOL(alloc_xenballooned_pages); @@ -575,6 +678,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) balloon_append(pages[i]); } + balloon_stats.target_unpopulated -= nr_pages; + /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) schedule_delayed_work(&balloon_worker, 0); @@ -603,6 +708,8 @@ static void __init balloon_add_region(unsigned long start_pfn, don't subtract from it. */ __balloon_append(page); } + + balloon_stats.total_pages += extra_pfn_end - start_pfn; } static int __init balloon_init(void) @@ -620,6 +727,7 @@ static int __init balloon_init(void) balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; + balloon_stats.total_pages = balloon_stats.current_pages; balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; @@ -627,11 +735,9 @@ static int __init balloon_init(void) balloon_stats.max_retry_count = RETRY_UNLIMITED; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - balloon_stats.hotplug_pages = 0; - balloon_stats.balloon_hotplug = 0; - set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); + register_sysctl_table(xen_root); #endif /* @@ -639,9 +745,9 @@ static int __init balloon_init(void) * regions (see arch/x86/xen/setup.c). */ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) - if (xen_extra_mem[i].size) - balloon_add_region(PFN_UP(xen_extra_mem[i].start), - PFN_DOWN(xen_extra_mem[i].size)); + if (xen_extra_mem[i].n_pfns) + balloon_add_region(xen_extra_mem[i].start_pfn, + xen_extra_mem[i].n_pfns); return 0; } diff --git a/kernel/drivers/xen/biomerge.c b/kernel/drivers/xen/biomerge.c index 0edb91c0d..4da69dbf7 100644 --- a/kernel/drivers/xen/biomerge.c +++ b/kernel/drivers/xen/biomerge.c @@ -6,10 +6,18 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, const struct bio_vec *vec2) { - unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page)); - unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page)); +#if XEN_PAGE_SIZE == PAGE_SIZE + unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); + unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && - ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); + ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); +#else + /* + * XXX: Add support for merging bio_vec when using different page + * size in Xen and Linux. + */ + return 0; +#endif } EXPORT_SYMBOL(xen_biovec_phys_mergeable); diff --git a/kernel/drivers/xen/cpu_hotplug.c b/kernel/drivers/xen/cpu_hotplug.c index cc6513a17..5676aefdf 100644 --- a/kernel/drivers/xen/cpu_hotplug.c +++ b/kernel/drivers/xen/cpu_hotplug.c @@ -11,15 +11,20 @@ static void enable_hotplug_cpu(int cpu) { if (!cpu_present(cpu)) - arch_register_cpu(cpu); + xen_arch_register_cpu(cpu); set_cpu_present(cpu, true); } static void disable_hotplug_cpu(int cpu) { + if (cpu_online(cpu)) { + lock_device_hotplug(); + device_offline(get_cpu_device(cpu)); + unlock_device_hotplug(); + } if (cpu_present(cpu)) - arch_unregister_cpu(cpu); + xen_arch_unregister_cpu(cpu); set_cpu_present(cpu, false); } @@ -55,7 +60,6 @@ static void vcpu_hotplug(unsigned int cpu) enable_hotplug_cpu(cpu); break; case 0: - (void)cpu_down(cpu); disable_hotplug_cpu(cpu); break; default: @@ -102,7 +106,11 @@ static int __init setup_vcpu_hotplug_event(void) static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; +#ifdef CONFIG_X86 if (!xen_pv_domain()) +#else + if (!xen_domain()) +#endif return -ENODEV; register_xenstore_notifier(&xsn_cpu); diff --git a/kernel/drivers/xen/events/events_base.c b/kernel/drivers/xen/events/events_base.c index 383879504..524c22146 100644 --- a/kernel/drivers/xen/events/events_base.c +++ b/kernel/drivers/xen/events/events_base.c @@ -39,12 +39,13 @@ #include <asm/irq.h> #include <asm/idle.h> #include <asm/io_apic.h> -#include <asm/xen/page.h> +#include <asm/i8259.h> #include <asm/xen/pci.h> #endif #include <asm/sync_bitops.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <xen/page.h> #include <xen/xen.h> #include <xen/hvm.h> @@ -336,7 +337,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu)); + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); #endif xen_evtchn_port_bind_to_cpu(info, cpu); @@ -373,7 +374,7 @@ static void xen_irq_init(unsigned irq) struct irq_info *info; #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ - cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(0)); + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); #endif info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) return xen_allocate_irq_dynamic(); /* Legacy IRQ descriptors are already allocated by the arch. */ - if (gsi < NR_IRQS_LEGACY) + if (gsi < nr_legacy_irqs()) irq = gsi; else irq = irq_alloc_desc_at(gsi, -1); @@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq) kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < NR_IRQS_LEGACY) + if (irq < nr_legacy_irqs()) return; irq_free_desc(irq); @@ -1301,11 +1302,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (!VALID_EVTCHN(evtchn)) return -1; - /* - * Events delivered via platform PCI interrupts are always - * routed to vcpu 0 and hence cannot be rebound. - */ - if (xen_hvm_domain() && !xen_have_vector_callback) + if (!xen_support_evtchn_rebind()) return -1; /* Send future instances of this interrupt to other vcpu. */ @@ -1692,7 +1689,7 @@ void __init xen_init_IRQ(void) struct physdev_pirq_eoi_gmfn eoi_gmfn; pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); + eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); /* TODO: No PVH support for PIRQ EOI */ if (rc != 0) { diff --git a/kernel/drivers/xen/events/events_fifo.c b/kernel/drivers/xen/events/events_fifo.c index 417415d73..96a1b8da5 100644 --- a/kernel/drivers/xen/events/events_fifo.c +++ b/kernel/drivers/xen/events/events_fifo.c @@ -44,17 +44,17 @@ #include <asm/sync_bitops.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> -#include <asm/xen/page.h> #include <xen/xen.h> #include <xen/xen-ops.h> #include <xen/events.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> +#include <xen/page.h> #include "events_internal.h" -#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t)) +#define EVENT_WORDS_PER_PAGE (XEN_PAGE_SIZE / sizeof(event_word_t)) #define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE) struct evtchn_fifo_queue { @@ -111,7 +111,7 @@ static int init_control_block(int cpu, for (i = 0; i < EVTCHN_FIFO_MAX_QUEUES; i++) q->head[i] = 0; - init_control.control_gfn = virt_to_mfn(control_block); + init_control.control_gfn = virt_to_gfn(control_block); init_control.offset = 0; init_control.vcpu = cpu; @@ -167,7 +167,7 @@ static int evtchn_fifo_setup(struct irq_info *info) /* Mask all events in this page before adding it. */ init_array_page(array_page); - expand_array.array_gfn = virt_to_mfn(array_page); + expand_array.array_gfn = virt_to_gfn(array_page); ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array); if (ret < 0) @@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port) static void consume_one_event(unsigned cpu, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready) + unsigned priority, unsigned long *ready, + bool drop) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu, if (head == 0) clear_bit(priority, ready); - if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) - handle_irq_for_port(port); + if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { + if (unlikely(drop)) + pr_warn("Dropping pending event for port %u\n", port); + else + handle_irq_for_port(port); + } q->head[priority] = head; } -static void evtchn_fifo_handle_events(unsigned cpu) +static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready); + consume_one_event(cpu, control_block, q, &ready, drop); ready |= xchg(&control_block->ready, 0); } } +static void evtchn_fifo_handle_events(unsigned cpu) +{ + __evtchn_fifo_handle_events(cpu, false); +} + static void evtchn_fifo_resume(void) { unsigned cpu; @@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self, if (!per_cpu(cpu_control_block, cpu)) ret = evtchn_fifo_alloc_control_block(cpu); break; + case CPU_DEAD: + __evtchn_fifo_handle_events(cpu, true); + break; default: break; } diff --git a/kernel/drivers/xen/evtchn.c b/kernel/drivers/xen/evtchn.c index 00f40f051..38272ad24 100644 --- a/kernel/drivers/xen/evtchn.c +++ b/kernel/drivers/xen/evtchn.c @@ -49,6 +49,8 @@ #include <linux/init.h> #include <linux/mutex.h> #include <linux/cpu.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> #include <xen/xen.h> #include <xen/events.h> @@ -58,10 +60,10 @@ struct per_user_data { struct mutex bind_mutex; /* serialize bind/unbind operations */ struct rb_root evtchns; + unsigned int nr_evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ -#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) -#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + unsigned int ring_size; evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct mutex ring_cons_mutex; /* protect against concurrent readers */ @@ -80,10 +82,41 @@ struct user_evtchn { bool enabled; }; +static evtchn_port_t *evtchn_alloc_ring(unsigned int size) +{ + evtchn_port_t *ring; + size_t s = size * sizeof(*ring); + + ring = kmalloc(s, GFP_KERNEL); + if (!ring) + ring = vmalloc(s); + + return ring; +} + +static void evtchn_free_ring(evtchn_port_t *ring) +{ + kvfree(ring); +} + +static unsigned int evtchn_ring_offset(struct per_user_data *u, + unsigned int idx) +{ + return idx & (u->ring_size - 1); +} + +static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, + unsigned int idx) +{ + return u->ring + evtchn_ring_offset(u, idx); +} + static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; + u->nr_evtchns++; + while (*new) { struct user_evtchn *this; @@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) { + u->nr_evtchns--; rb_erase(&evtchn->node, &u->evtchns); kfree(evtchn); } @@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) spin_lock(&u->ring_prod_lock); - if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; + if ((u->ring_prod - u->ring_cons) < u->ring_size) { + *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wake_up_interruptible(&u->evtchn_wait); @@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, } /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ - if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { - bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + if (((c ^ p) & u->ring_size) != 0) { + bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * sizeof(evtchn_port_t); - bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); } else { bytes1 = (p - c) * sizeof(evtchn_port_t); bytes2 = 0; @@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, rc = -EFAULT; rmb(); /* Ensure that we see the port before we copy it. */ - if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) goto unlock_out; @@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, return rc; } +static int evtchn_resize_ring(struct per_user_data *u) +{ + unsigned int new_size; + evtchn_port_t *new_ring, *old_ring; + unsigned int p, c; + + /* + * Ensure the ring is large enough to capture all possible + * events. i.e., one free slot for each bound event. + */ + if (u->nr_evtchns <= u->ring_size) + return 0; + + if (u->ring_size == 0) + new_size = 64; + else + new_size = 2 * u->ring_size; + + new_ring = evtchn_alloc_ring(new_size); + if (!new_ring) + return -ENOMEM; + + old_ring = u->ring; + + /* + * Access to the ring contents is serialized by either the + * prod /or/ cons lock so take both when resizing. + */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&u->ring_prod_lock); + + /* + * Copy the old ring contents to the new ring. + * + * If the ring contents crosses the end of the current ring, + * it needs to be copied in two chunks. + * + * +---------+ +------------------+ + * |34567 12| -> | 1234567 | + * +-----p-c-+ +------------------+ + */ + p = evtchn_ring_offset(u, u->ring_prod); + c = evtchn_ring_offset(u, u->ring_cons); + if (p < c) { + memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring)); + memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring)); + } else + memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring)); + + u->ring = new_ring; + u->ring_size = new_size; + + spin_unlock_irq(&u->ring_prod_lock); + mutex_unlock(&u->ring_cons_mutex); + + evtchn_free_ring(old_ring); + + return 0; +} + static int evtchn_bind_to_user(struct per_user_data *u, int port) { struct user_evtchn *evtchn; @@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; + rc = evtchn_resize_ring(u); + if (rc < 0) + goto err; + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, u->name, evtchn); if (rc < 0) @@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp) init_waitqueue_head(&u->evtchn_wait); - u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); - if (u->ring == NULL) { - kfree(u->name); - kfree(u); - return -ENOMEM; - } - mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); spin_lock_init(&u->ring_prod_lock); @@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) evtchn_unbind_from_user(u, evtchn); } - free_page((unsigned long)u->ring); + evtchn_free_ring(u->ring); kfree(u->name); kfree(u); diff --git a/kernel/drivers/xen/gntalloc.c b/kernel/drivers/xen/gntalloc.c index e53fe1917..4547a91bc 100644 --- a/kernel/drivers/xen/gntalloc.c +++ b/kernel/drivers/xen/gntalloc.c @@ -142,7 +142,8 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, /* Grant foreign access to the page. */ rc = gnttab_grant_foreign_access(op->domid, - pfn_to_mfn(page_to_pfn(gref->page)), readonly); + xen_page_to_gfn(gref->page), + readonly); if (rc < 0) goto undo; gref_ids[i] = gref->gref_id = rc; @@ -493,7 +494,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) mutex_unlock(&gref_mutex); } -static struct vm_operations_struct gntalloc_vmops = { +static const struct vm_operations_struct gntalloc_vmops = { .open = gntalloc_vma_open, .close = gntalloc_vma_close, }; diff --git a/kernel/drivers/xen/gntdev.c b/kernel/drivers/xen/gntdev.c index 4bd23bba8..1be5dd048 100644 --- a/kernel/drivers/xen/gntdev.c +++ b/kernel/drivers/xen/gntdev.c @@ -41,9 +41,9 @@ #include <xen/balloon.h> #include <xen/gntdev.h> #include <xen/events.h> +#include <xen/page.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " @@ -433,7 +433,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; } -static struct vm_operations_struct gntdev_vmops = { +static const struct vm_operations_struct gntdev_vmops = { .open = gntdev_vma_open, .close = gntdev_vma_close, .find_special_page = gntdev_vma_find_special_page, @@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/kernel/drivers/xen/grant-table.c b/kernel/drivers/xen/grant-table.c index b1c7170e5..c49f79ed5 100644 --- a/kernel/drivers/xen/grant-table.c +++ b/kernel/drivers/xen/grant-table.c @@ -138,7 +138,6 @@ static struct gnttab_free_callback *gnttab_free_callback_list; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) -#define SPP (PAGE_SIZE / sizeof(grant_status_t)) static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { @@ -643,7 +642,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr) if (xen_auto_xlat_grant_frames.count) return -EINVAL; - vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes); + vaddr = xen_remap(addr, XEN_PAGE_SIZE * max_nr_gframes); if (vaddr == NULL) { pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n", &addr); @@ -655,7 +654,7 @@ int gnttab_setup_auto_xlat_frames(phys_addr_t addr) return -ENOMEM; } for (i = 0; i < max_nr_gframes; i++) - pfn[i] = PFN_DOWN(addr) + i; + pfn[i] = XEN_PFN_DOWN(addr) + i; xen_auto_xlat_grant_frames.vaddr = vaddr; xen_auto_xlat_grant_frames.pfn = pfn; @@ -688,7 +687,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) int i; int ret; - ret = alloc_xenballooned_pages(nr_pages, pages, false); + ret = alloc_xenballooned_pages(nr_pages, pages); if (ret < 0) return ret; @@ -777,6 +776,54 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); +void gnttab_foreach_grant_in_range(struct page *page, + unsigned int offset, + unsigned int len, + xen_grant_fn_t fn, + void *data) +{ + unsigned int goffset; + unsigned int glen; + unsigned long xen_pfn; + + len = min_t(unsigned int, PAGE_SIZE - offset, len); + goffset = xen_offset_in_page(offset); + + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset); + + while (len) { + glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len); + fn(pfn_to_gfn(xen_pfn), goffset, glen, data); + + goffset = 0; + xen_pfn++; + len -= glen; + } +} +EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range); + +void gnttab_foreach_grant(struct page **pages, + unsigned int nr_grefs, + xen_grant_fn_t fn, + void *data) +{ + unsigned int goffset = 0; + unsigned long xen_pfn = 0; + unsigned int i; + + for (i = 0; i < nr_grefs; i++) { + if ((i % XEN_PFN_PER_PAGE) == 0) { + xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); + goffset = 0; + } + + fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data); + + goffset += XEN_PAGE_SIZE; + xen_pfn++; + } +} + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -979,7 +1026,7 @@ static void gnttab_request_version(void) { /* Only version 1 is used, which will always be available. */ grant_table_version = 1; - grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1); + grefs_per_grant_frame = XEN_PAGE_SIZE / sizeof(struct grant_entry_v1); gnttab_interface = &gnttab_v1_ops; pr_info("Grant tables using version %d layout\n", grant_table_version); diff --git a/kernel/drivers/xen/manage.c b/kernel/drivers/xen/manage.c index 9e6a85104..e12bd3635 100644 --- a/kernel/drivers/xen/manage.c +++ b/kernel/drivers/xen/manage.c @@ -19,10 +19,10 @@ #include <xen/grant_table.h> #include <xen/events.h> #include <xen/hvc-console.h> +#include <xen/page.h> #include <xen/xen-ops.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> #include <asm/xen/hypervisor.h> enum shutdown_state { @@ -80,7 +80,7 @@ static int xen_suspend(void *data) * is resuming in a new domain. */ si->cancelled = HYPERVISOR_suspend(xen_pv_domain() - ? virt_to_mfn(xen_start_info) + ? virt_to_gfn(xen_start_info) : 0); xen_arch_post_suspend(si->cancelled); diff --git a/kernel/drivers/xen/preempt.c b/kernel/drivers/xen/preempt.c index a1800c150..08cb419eb 100644 --- a/kernel/drivers/xen/preempt.c +++ b/kernel/drivers/xen/preempt.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall); asmlinkage __visible void xen_maybe_preempt_hcall(void) { if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) - && should_resched())) { + && need_resched())) { /* * Clear flag as we may be rescheduled on a different * cpu. diff --git a/kernel/drivers/xen/privcmd.c b/kernel/drivers/xen/privcmd.c index 5a296161d..df2e6f783 100644 --- a/kernel/drivers/xen/privcmd.c +++ b/kernel/drivers/xen/privcmd.c @@ -193,16 +193,16 @@ static int traverse_pages_block(unsigned nelem, size_t size, return ret; } -struct mmap_mfn_state { +struct mmap_gfn_state { unsigned long va; struct vm_area_struct *vma; domid_t domain; }; -static int mmap_mfn_range(void *data, void *state) +static int mmap_gfn_range(void *data, void *state) { struct privcmd_mmap_entry *msg = data; - struct mmap_mfn_state *st = state; + struct mmap_gfn_state *st = state; struct vm_area_struct *vma = st->vma; int rc; @@ -216,7 +216,7 @@ static int mmap_mfn_range(void *data, void *state) ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) return -EINVAL; - rc = xen_remap_domain_mfn_range(vma, + rc = xen_remap_domain_gfn_range(vma, msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, @@ -236,7 +236,7 @@ static long privcmd_ioctl_mmap(void __user *udata) struct vm_area_struct *vma; int rc; LIST_HEAD(pagelist); - struct mmap_mfn_state state; + struct mmap_gfn_state state; /* We only support privcmd_ioctl_mmap_batch for auto translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -273,7 +273,7 @@ static long privcmd_ioctl_mmap(void __user *udata) rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), &pagelist, - mmap_mfn_range, &state); + mmap_gfn_range, &state); out_up: @@ -299,18 +299,18 @@ struct mmap_batch_state { int global_error; int version; - /* User-space mfn array to store errors in the second pass for V1. */ - xen_pfn_t __user *user_mfn; + /* User-space gfn array to store errors in the second pass for V1. */ + xen_pfn_t __user *user_gfn; /* User-space int array to store errors in the second pass for V2. */ int __user *user_err; }; -/* auto translated dom0 note: if domU being created is PV, then mfn is - * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). +/* auto translated dom0 note: if domU being created is PV, then gfn is + * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP). */ static int mmap_batch_fn(void *data, int nr, void *state) { - xen_pfn_t *mfnp = data; + xen_pfn_t *gfnp = data; struct mmap_batch_state *st = state; struct vm_area_struct *vma = st->vma; struct page **pages = vma->vm_private_data; @@ -321,8 +321,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) cur_pages = &pages[st->index]; BUG_ON(nr < 0); - ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr, - (int *)mfnp, st->vma->vm_page_prot, + ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr, + (int *)gfnp, st->vma->vm_page_prot, st->domain, cur_pages); /* Adjust the global_error? */ @@ -347,22 +347,22 @@ static int mmap_return_error(int err, struct mmap_batch_state *st) if (st->version == 1) { if (err) { - xen_pfn_t mfn; + xen_pfn_t gfn; - ret = get_user(mfn, st->user_mfn); + ret = get_user(gfn, st->user_gfn); if (ret < 0) return ret; /* * V1 encodes the error codes in the 32bit top - * nibble of the mfn (with its known + * nibble of the gfn (with its known * limitations vis-a-vis 64 bit callers). */ - mfn |= (err == -ENOENT) ? + gfn |= (err == -ENOENT) ? PRIVCMD_MMAPBATCH_PAGED_ERROR : PRIVCMD_MMAPBATCH_MFN_ERROR; - return __put_user(mfn, st->user_mfn++); + return __put_user(gfn, st->user_gfn++); } else - st->user_mfn++; + st->user_gfn++; } else { /* st->version == 2 */ if (err) return __put_user(err, st->user_err++); @@ -388,7 +388,7 @@ static int mmap_return_errors(void *data, int nr, void *state) return 0; } -/* Allocate pfns that are then mapped with gmfns from foreign domid. Update +/* Allocate pfns that are then mapped with gfns from foreign domid. Update * the vma with the page info to use later. * Returns: 0 if success, otherwise -errno */ @@ -401,7 +401,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages, 0); + rc = alloc_xenballooned_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); @@ -414,7 +414,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) return 0; } -static struct vm_operations_struct privcmd_vm_ops; +static const struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { @@ -446,7 +446,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) return -EINVAL; } - nr_pages = m.num; + nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE); if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; @@ -494,7 +494,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) goto out_unlock; } if (xen_feature(XENFEAT_auto_translated_physmap)) { - ret = alloc_empty_pages(vma, m.num); + ret = alloc_empty_pages(vma, nr_pages); if (ret < 0) goto out_unlock; } else @@ -518,6 +518,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) state.global_error = 0; state.version = version; + BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0); /* mmap_batch_fn guarantees ret == 0 */ BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t), &pagelist, mmap_batch_fn, &state)); @@ -526,7 +527,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) if (state.global_error) { /* Write back errors in second pass. */ - state.user_mfn = (xen_pfn_t *)m.arr; + state.user_gfn = (xen_pfn_t *)m.arr; state.user_err = m.err; ret = traverse_pages_block(m.num, sizeof(xen_pfn_t), &pagelist, mmap_return_errors, &state); @@ -582,12 +583,13 @@ static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; int rc; if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) return; - rc = xen_unmap_domain_mfn_range(vma, numpgs, pages); + rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); if (rc == 0) free_xenballooned_pages(numpgs, pages); else @@ -605,7 +607,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -static struct vm_operations_struct privcmd_vm_ops = { +static const struct vm_operations_struct privcmd_vm_ops = { .close = privcmd_close, .fault = privcmd_fault }; diff --git a/kernel/drivers/xen/swiotlb-xen.c b/kernel/drivers/xen/swiotlb-xen.c index 4c549323c..7399782c0 100644 --- a/kernel/drivers/xen/swiotlb-xen.c +++ b/kernel/drivers/xen/swiotlb-xen.c @@ -76,27 +76,27 @@ static unsigned long xen_io_tlb_nslabs; static u64 start_dma_addr; /* - * Both of these functions should avoid PFN_PHYS because phys_addr_t + * Both of these functions should avoid XEN_PFN_PHYS because phys_addr_t * can be 32bit when dma_addr_t is 64bit leading to a loss in * information if the shift is done before casting to 64bit. */ static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { - unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr)); - dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT; + unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr)); + dma_addr_t dma = (dma_addr_t)bfn << XEN_PAGE_SHIFT; - dma |= paddr & ~PAGE_MASK; + dma |= paddr & ~XEN_PAGE_MASK; return dma; } static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) { - unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr)); - dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT; + unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr)); + dma_addr_t dma = (dma_addr_t)xen_pfn << XEN_PAGE_SHIFT; phys_addr_t paddr = dma; - paddr |= baddr & ~PAGE_MASK; + paddr |= baddr & ~XEN_PAGE_MASK; return paddr; } @@ -106,19 +106,19 @@ static inline dma_addr_t xen_virt_to_bus(void *address) return xen_phys_to_bus(virt_to_phys(address)); } -static int check_pages_physically_contiguous(unsigned long pfn, +static int check_pages_physically_contiguous(unsigned long xen_pfn, unsigned int offset, size_t length) { - unsigned long next_mfn; + unsigned long next_bfn; int i; int nr_pages; - next_mfn = pfn_to_mfn(pfn); - nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT; + next_bfn = pfn_to_bfn(xen_pfn); + nr_pages = (offset + length + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT; for (i = 1; i < nr_pages; i++) { - if (pfn_to_mfn(++pfn) != ++next_mfn) + if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 0; } return 1; @@ -126,28 +126,27 @@ static int check_pages_physically_contiguous(unsigned long pfn, static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { - unsigned long pfn = PFN_DOWN(p); - unsigned int offset = p & ~PAGE_MASK; + unsigned long xen_pfn = XEN_PFN_DOWN(p); + unsigned int offset = p & ~XEN_PAGE_MASK; - if (offset + size <= PAGE_SIZE) + if (offset + size <= XEN_PAGE_SIZE) return 0; - if (check_pages_physically_contiguous(pfn, offset, size)) + if (check_pages_physically_contiguous(xen_pfn, offset, size)) return 0; return 1; } static int is_xen_swiotlb_buffer(dma_addr_t dma_addr) { - unsigned long mfn = PFN_DOWN(dma_addr); - unsigned long pfn = mfn_to_local_pfn(mfn); - phys_addr_t paddr; + unsigned long bfn = XEN_PFN_DOWN(dma_addr); + unsigned long xen_pfn = bfn_to_local_pfn(bfn); + phys_addr_t paddr = XEN_PFN_PHYS(xen_pfn); /* If the address is outside our domain, it CAN * have the same virtual address as another address * in our domain. Therefore _only_ check address within our domain. */ - if (pfn_valid(pfn)) { - paddr = PFN_PHYS(pfn); + if (pfn_valid(PFN_DOWN(paddr))) { return paddr >= virt_to_phys(xen_io_tlb_start) && paddr < virt_to_phys(xen_io_tlb_end); } @@ -311,9 +310,6 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, */ flags &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret)) - return ret; - /* On ARM this function returns an ioremap'ped virtual address for * which virt_to_phys doesn't return the corresponding physical * address. In fact on ARM virt_to_phys only works for kernel direct @@ -356,9 +352,6 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, phys_addr_t phys; u64 dma_mask = DMA_BIT_MASK(32); - if (dma_release_from_coherent(hwdev, order, vaddr)) - return; - if (hwdev && hwdev->coherent_dma_mask) dma_mask = hwdev->coherent_dma_mask; @@ -398,7 +391,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && - !xen_arch_need_swiotlb(dev, PFN_DOWN(phys), PFN_DOWN(dev_addr)) && + !xen_arch_need_swiotlb(dev, phys, dev_addr) && !swiotlb_force) { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed @@ -557,7 +550,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, dma_addr_t dev_addr = xen_phys_to_bus(paddr); if (swiotlb_force || - xen_arch_need_swiotlb(hwdev, PFN_DOWN(paddr), PFN_DOWN(dev_addr)) || + xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || !dma_capable(hwdev, dev_addr, sg->length) || range_straddles_page_boundary(paddr, sg->length)) { phys_addr_t map = swiotlb_tbl_map_single(hwdev, diff --git a/kernel/drivers/xen/sys-hypervisor.c b/kernel/drivers/xen/sys-hypervisor.c index 96453f8a8..b5a7342e0 100644 --- a/kernel/drivers/xen/sys-hypervisor.c +++ b/kernel/drivers/xen/sys-hypervisor.c @@ -20,6 +20,9 @@ #include <xen/xenbus.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> +#ifdef CONFIG_XEN_HAVE_VPMU +#include <xen/interface/xenpmu.h> +#endif #define HYPERVISOR_ATTR_RO(_name) \ static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) @@ -368,6 +371,126 @@ static void xen_properties_destroy(void) sysfs_remove_group(hypervisor_kobj, &xen_properties_group); } +#ifdef CONFIG_XEN_HAVE_VPMU +struct pmu_mode { + const char *name; + uint32_t mode; +}; + +static struct pmu_mode pmu_modes[] = { + {"off", XENPMU_MODE_OFF}, + {"self", XENPMU_MODE_SELF}, + {"hv", XENPMU_MODE_HV}, + {"all", XENPMU_MODE_ALL} +}; + +static ssize_t pmu_mode_store(struct hyp_sysfs_attr *attr, + const char *buffer, size_t len) +{ + int ret; + struct xen_pmu_params xp; + int i; + + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) { + if (strncmp(buffer, pmu_modes[i].name, len - 1) == 0) { + xp.val = pmu_modes[i].mode; + break; + } + } + + if (i == ARRAY_SIZE(pmu_modes)) + return -EINVAL; + + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_set, &xp); + if (ret) + return ret; + + return len; +} + +static ssize_t pmu_mode_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + struct xen_pmu_params xp; + int i; + uint32_t mode; + + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + ret = HYPERVISOR_xenpmu_op(XENPMU_mode_get, &xp); + if (ret) + return ret; + + mode = (uint32_t)xp.val; + for (i = 0; i < ARRAY_SIZE(pmu_modes); i++) { + if (mode == pmu_modes[i].mode) + return sprintf(buffer, "%s\n", pmu_modes[i].name); + } + + return -EINVAL; +} +HYPERVISOR_ATTR_RW(pmu_mode); + +static ssize_t pmu_features_store(struct hyp_sysfs_attr *attr, + const char *buffer, size_t len) +{ + int ret; + uint32_t features; + struct xen_pmu_params xp; + + ret = kstrtou32(buffer, 0, &features); + if (ret) + return ret; + + xp.val = features; + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_set, &xp); + if (ret) + return ret; + + return len; +} + +static ssize_t pmu_features_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + struct xen_pmu_params xp; + + xp.version.maj = XENPMU_VER_MAJ; + xp.version.min = XENPMU_VER_MIN; + ret = HYPERVISOR_xenpmu_op(XENPMU_feature_get, &xp); + if (ret) + return ret; + + return sprintf(buffer, "0x%x\n", (uint32_t)xp.val); +} +HYPERVISOR_ATTR_RW(pmu_features); + +static struct attribute *xen_pmu_attrs[] = { + &pmu_mode_attr.attr, + &pmu_features_attr.attr, + NULL +}; + +static const struct attribute_group xen_pmu_group = { + .name = "pmu", + .attrs = xen_pmu_attrs, +}; + +static int __init xen_pmu_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_pmu_group); +} + +static void xen_pmu_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_pmu_group); +} +#endif + static int __init hyper_sysfs_init(void) { int ret; @@ -390,7 +513,15 @@ static int __init hyper_sysfs_init(void) ret = xen_properties_init(); if (ret) goto prop_out; - +#ifdef CONFIG_XEN_HAVE_VPMU + if (xen_initial_domain()) { + ret = xen_pmu_init(); + if (ret) { + xen_properties_destroy(); + goto prop_out; + } + } +#endif goto out; prop_out: @@ -407,6 +538,9 @@ out: static void __exit hyper_sysfs_exit(void) { +#ifdef CONFIG_XEN_HAVE_VPMU + xen_pmu_destroy(); +#endif xen_properties_destroy(); xen_compilation_destroy(); xen_sysfs_uuid_destroy(); diff --git a/kernel/drivers/xen/tmem.c b/kernel/drivers/xen/tmem.c index c4211a316..945fc4327 100644 --- a/kernel/drivers/xen/tmem.c +++ b/kernel/drivers/xen/tmem.c @@ -17,8 +17,8 @@ #include <xen/xen.h> #include <xen/interface/xen.h> +#include <xen/page.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> #include <asm/xen/hypervisor.h> #include <xen/tmem.h> @@ -129,21 +129,17 @@ static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, /* xen generic tmem ops */ static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, - u32 index, unsigned long pfn) + u32 index, struct page *page) { - unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; - return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, - gmfn, 0, 0, 0); + xen_page_to_gfn(page), 0, 0, 0); } static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, - u32 index, unsigned long pfn) + u32 index, struct page *page) { - unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; - return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, - gmfn, 0, 0, 0); + xen_page_to_gfn(page), 0, 0, 0); } static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) @@ -173,14 +169,13 @@ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, { u32 ind = (u32) index; struct tmem_oid oid = *(struct tmem_oid *)&key; - unsigned long pfn = page_to_pfn(page); if (pool < 0) return; if (ind != index) return; mb(); /* ensure page is quiescent; tmem may address it with an alias */ - (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); + (void)xen_tmem_put_page((u32)pool, oid, ind, page); } static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, @@ -188,7 +183,6 @@ static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, { u32 ind = (u32) index; struct tmem_oid oid = *(struct tmem_oid *)&key; - unsigned long pfn = page_to_pfn(page); int ret; /* translate return values to linux semantics */ @@ -196,7 +190,7 @@ static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, return -1; if (ind != index) return -1; - ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); + ret = xen_tmem_get_page((u32)pool, oid, ind, page); if (ret == 1) return 0; else @@ -287,7 +281,6 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset, { u64 ind64 = (u64)offset; u32 ind = (u32)offset; - unsigned long pfn = page_to_pfn(page); int pool = tmem_frontswap_poolid; int ret; @@ -296,7 +289,7 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset, if (ind64 != ind) return -1; mb(); /* ensure page is quiescent; tmem may address it with an alias */ - ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), page); /* translate Xen tmem return values to linux semantics */ if (ret == 1) return 0; @@ -313,7 +306,6 @@ static int tmem_frontswap_load(unsigned type, pgoff_t offset, { u64 ind64 = (u64)offset; u32 ind = (u32)offset; - unsigned long pfn = page_to_pfn(page); int pool = tmem_frontswap_poolid; int ret; @@ -321,7 +313,7 @@ static int tmem_frontswap_load(unsigned type, pgoff_t offset, return -1; if (ind64 != ind) return -1; - ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), page); /* translate Xen tmem return values to linux semantics */ if (ret == 1) return 0; @@ -381,21 +373,15 @@ static int __init xen_tmem_init(void) #ifdef CONFIG_FRONTSWAP if (tmem_enabled && frontswap) { char *s = ""; - struct frontswap_ops *old_ops; tmem_frontswap_poolid = -1; - old_ops = frontswap_register_ops(&tmem_frontswap_ops); - if (IS_ERR(old_ops) || old_ops) { - if (IS_ERR(old_ops)) - return PTR_ERR(old_ops); - s = " (WARNING: frontswap_ops overridden)"; - } + frontswap_register_ops(&tmem_frontswap_ops); pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n", s); } #endif #ifdef CONFIG_CLEANCACHE - BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); + BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && cleancache) { int err; diff --git a/kernel/drivers/xen/xen-acpi-cpuhotplug.c b/kernel/drivers/xen/xen-acpi-cpuhotplug.c index 3e62ee4b3..f4a369429 100644 --- a/kernel/drivers/xen/xen-acpi-cpuhotplug.c +++ b/kernel/drivers/xen/xen-acpi-cpuhotplug.c @@ -46,13 +46,7 @@ static int xen_acpi_processor_enable(struct acpi_device *device) unsigned long long value; union acpi_object object = { 0 }; struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - struct acpi_processor *pr; - - pr = acpi_driver_data(device); - if (!pr) { - pr_err(PREFIX "Cannot find driver data\n"); - return -EINVAL; - } + struct acpi_processor *pr = acpi_driver_data(device); if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) { /* Declared with "Processor" statement; match ProcessorID */ @@ -77,7 +71,7 @@ static int xen_acpi_processor_enable(struct acpi_device *device) pr->id = xen_pcpu_id(pr->acpi_id); - if ((int)pr->id < 0) + if (invalid_logical_cpuid(pr->id)) /* This cpu is not presented at hypervisor, try to hotadd it */ if (ACPI_FAILURE(xen_acpi_cpu_hotadd(pr))) { pr_err(PREFIX "Hotadd CPU (acpi_id = %d) failed.\n", @@ -226,7 +220,7 @@ static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr) return AE_ERROR; pr->id = xen_hotadd_cpu(pr); - if ((int)pr->id < 0) + if (invalid_logical_cpuid(pr->id)) return AE_ERROR; /* diff --git a/kernel/drivers/xen/xen-acpi-processor.c b/kernel/drivers/xen/xen-acpi-processor.c index 59fc190f1..70fa43800 100644 --- a/kernel/drivers/xen/xen-acpi-processor.c +++ b/kernel/drivers/xen/xen-acpi-processor.c @@ -560,11 +560,9 @@ static int __init xen_acpi_processor_init(void) return 0; err_unregister: - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + err_out: /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); @@ -579,11 +577,9 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + free_acpi_perf_data(); } diff --git a/kernel/drivers/xen/xen-pciback/pciback.h b/kernel/drivers/xen/xen-pciback/pciback.h index 58e38d586..4d529f3e4 100644 --- a/kernel/drivers/xen/xen-pciback/pciback.h +++ b/kernel/drivers/xen/xen-pciback/pciback.h @@ -37,6 +37,7 @@ struct xen_pcibk_device { struct xen_pci_sharedinfo *sh_info; unsigned long flags; struct work_struct op_work; + struct xen_pci_op op; }; struct xen_pcibk_dev_data { diff --git a/kernel/drivers/xen/xen-pciback/pciback_ops.c b/kernel/drivers/xen/xen-pciback/pciback_ops.c index c4a0666de..fb0221434 100644 --- a/kernel/drivers/xen/xen-pciback/pciback_ops.c +++ b/kernel/drivers/xen/xen-pciback/pciback_ops.c @@ -70,6 +70,13 @@ static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) enable ? "enable" : "disable"); if (enable) { + /* + * The MSI or MSI-X should not have an IRQ handler. Otherwise + * if the guest terminates we BUG_ON in free_msi_irqs. + */ + if (dev->msi_enabled || dev->msix_enabled) + goto out; + rc = request_irq(dev_data->irq, xen_pcibk_guest_interrupt, IRQF_SHARED, dev_data->irq_name, dev); @@ -144,7 +151,12 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); - status = pci_enable_msi(dev); + if (dev->msi_enabled) + status = -EALREADY; + else if (dev->msix_enabled) + status = -ENXIO; + else + status = pci_enable_msi(dev); if (status) { pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", @@ -173,20 +185,23 @@ static int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; - if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", pci_name(dev)); - pci_disable_msi(dev); + if (dev->msi_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msi(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; return 0; } @@ -197,13 +212,27 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, struct xen_pcibk_dev_data *dev_data; int i, result; struct msix_entry *entries; + u16 cmd; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", pci_name(dev)); + if (op->value > SH_INFO_MAX_VEC) return -EINVAL; + if (dev->msix_enabled) + return -EALREADY; + + /* + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able + * to access the BARs where the MSI-X entries reside. + * But VF devices are unique in which the PF needs to be checked. + */ + pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) + return -ENXIO; + entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); if (entries == NULL) return -ENOMEM; @@ -245,23 +274,27 @@ static int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) { - struct xen_pcibk_dev_data *dev_data; if (unlikely(verbose_request)) printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", pci_name(dev)); - pci_disable_msix(dev); + if (dev->msix_enabled) { + struct xen_pcibk_dev_data *dev_data; + + pci_disable_msix(dev); + + dev_data = pci_get_drvdata(dev); + if (dev_data) + dev_data->ack_intr = 1; + } /* * SR-IOV devices (which don't have any legacy IRQ) have * an undefined IRQ value of zero. */ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; if (unlikely(verbose_request)) - printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), - op->value); - dev_data = pci_get_drvdata(dev); - if (dev_data) - dev_data->ack_intr = 1; + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", + pci_name(dev), op->value); return 0; } #endif @@ -298,9 +331,14 @@ void xen_pcibk_do_op(struct work_struct *data) container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; - struct xen_pci_op *op = &pdev->sh_info->op; + struct xen_pci_op *op = &pdev->op; int test_intx = 0; +#ifdef CONFIG_PCI_MSI + unsigned int nr = 0; +#endif + *op = pdev->sh_info->op; + barrier(); dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); if (dev == NULL) @@ -326,6 +364,7 @@ void xen_pcibk_do_op(struct work_struct *data) op->err = xen_pcibk_disable_msi(pdev, dev, op); break; case XEN_PCI_OP_enable_msix: + nr = op->value; op->err = xen_pcibk_enable_msix(pdev, dev, op); break; case XEN_PCI_OP_disable_msix: @@ -342,6 +381,17 @@ void xen_pcibk_do_op(struct work_struct *data) if ((dev_data->enable_intx != test_intx)) xen_pcibk_control_isr(dev, 0 /* no reset */); } + pdev->sh_info->op.err = op->err; + pdev->sh_info->op.value = op->value; +#ifdef CONFIG_PCI_MSI + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { + unsigned int i; + + for (i = 0; i < nr; i++) + pdev->sh_info->op.msix_entries[i].vector = + op->msix_entries[i].vector; + } +#endif /* Tell the driver domain that we're done. */ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); diff --git a/kernel/drivers/xen/xen-pciback/xenbus.c b/kernel/drivers/xen/xen-pciback/xenbus.c index 98bc345f2..4843741e7 100644 --- a/kernel/drivers/xen/xen-pciback/xenbus.c +++ b/kernel/drivers/xen/xen-pciback/xenbus.c @@ -44,7 +44,6 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); pdev->xdev = xdev; - dev_set_drvdata(&xdev->dev, pdev); mutex_init(&pdev->dev_lock); @@ -58,6 +57,9 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) kfree(pdev); pdev = NULL; } + + dev_set_drvdata(&xdev->dev, pdev); + out: return pdev; } diff --git a/kernel/drivers/xen/xen-scsiback.c b/kernel/drivers/xen/xen-scsiback.c index b7f51504f..51387d75c 100644 --- a/kernel/drivers/xen/xen-scsiback.c +++ b/kernel/drivers/xen/xen-scsiback.c @@ -49,15 +49,10 @@ #include <generated/utsrelease.h> -#include <scsi/scsi.h> -#include <scsi/scsi_dbg.h> -#include <scsi/scsi_eh.h> -#include <scsi/scsi_tcq.h> +#include <scsi/scsi_host.h> /* SG_ALL */ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> -#include <target/target_core_fabric_configfs.h> #include <asm/hypervisor.h> @@ -204,8 +199,6 @@ static LIST_HEAD(scsiback_free_pages); static DEFINE_MUTEX(scsiback_mutex); static LIST_HEAD(scsiback_list); -static const struct target_core_fabric_ops scsiback_ops; - static void scsiback_get(struct vscsibk_info *info) { atomic_inc(&info->nr_unreplied_reqs); @@ -400,6 +393,7 @@ static void scsiback_cmd_exec(struct vscsibk_pend *pending_req) memset(se_cmd, 0, sizeof(*se_cmd)); scsiback_get(pending_req->info); + se_cmd->tag = pending_req->rqid; rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd, pending_req->sense_buffer, pending_req->v2p->lun, pending_req->data_len, 0, @@ -732,7 +726,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) if (!pending_req) return 1; - ring_req = *RING_GET_REQUEST(ring, rc); + RING_COPY_REQUEST(ring, rc, &ring_req); ring->req_cons = ++rc; err = prepare_pending_reqs(info, &ring_req, pending_req); @@ -866,7 +860,8 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, struct list_head *head = &(info->v2p_entry_lists); unsigned long flags; char *lunp; - unsigned int lun; + unsigned long long unpacked_lun; + struct se_lun *se_lun; struct scsiback_tpg *tpg_entry, *tpg = NULL; char *error = "doesn't exist"; @@ -877,24 +872,27 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, } *lunp = 0; lunp++; - if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) { + err = kstrtoull(lunp, 10, &unpacked_lun); + if (err < 0) { pr_err("lun number not valid: %s\n", lunp); - return -EINVAL; + return err; } mutex_lock(&scsiback_mutex); list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) { if (!strcmp(phy, tpg_entry->tport->tport_name) || !strcmp(phy, tpg_entry->param_alias)) { - spin_lock(&tpg_entry->se_tpg.tpg_lun_lock); - if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status == - TRANSPORT_LUN_STATUS_ACTIVE) { - if (!tpg_entry->tpg_nexus) - error = "nexus undefined"; - else - tpg = tpg_entry; + mutex_lock(&tpg_entry->se_tpg.tpg_lun_mutex); + hlist_for_each_entry(se_lun, &tpg_entry->se_tpg.tpg_lun_hlist, link) { + if (se_lun->unpacked_lun == unpacked_lun) { + if (!tpg_entry->tpg_nexus) + error = "nexus undefined"; + else + tpg = tpg_entry; + break; + } } - spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock); + mutex_unlock(&tpg_entry->se_tpg.tpg_lun_mutex); break; } } @@ -906,7 +904,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, mutex_unlock(&scsiback_mutex); if (!tpg) { - pr_err("%s:%d %s\n", phy, lun, error); + pr_err("%s:%llu %s\n", phy, unpacked_lun, error); return -ENODEV; } @@ -934,19 +932,19 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, kref_init(&new->kref); new->v = *v; new->tpg = tpg; - new->lun = lun; + new->lun = unpacked_lun; list_add_tail(&new->l, head); out: spin_unlock_irqrestore(&info->v2p_lock, flags); out_free: - mutex_lock(&tpg->tv_tpg_mutex); - tpg->tv_tpg_fe_count--; - mutex_unlock(&tpg->tv_tpg_mutex); - - if (err) + if (err) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_fe_count--; + mutex_unlock(&tpg->tv_tpg_mutex); kfree(new); + } return err; } @@ -1254,28 +1252,6 @@ static char *scsiback_dump_proto_id(struct scsiback_tport *tport) return "Unknown"; } -static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg) { struct scsiback_tpg *tpg = container_of(se_tpg, @@ -1292,102 +1268,6 @@ static u16 scsiback_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 -scsiback_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 -scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -static char * -scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - static struct se_wwn * scsiback_make_tport(struct target_fabric_configfs *tf, struct config_group *group, @@ -1454,19 +1334,6 @@ static void scsiback_drop_tport(struct se_wwn *wwn) kfree(tport); } -static struct se_node_acl * -scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void -scsiback_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1525,14 +1392,6 @@ static void scsiback_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 scsiback_get_task_tag(struct se_cmd *se_cmd) -{ - struct vscsibk_pend *pending_req = container_of(se_cmd, - struct vscsibk_pend, se_cmd); - - return pending_req->rqid; -} - static int scsiback_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1578,9 +1437,10 @@ static void scsiback_aborted_task(struct se_cmd *se_cmd) { } -static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg, +static ssize_t scsiback_tpg_param_alias_show(struct config_item *item, char *page) { + struct se_portal_group *se_tpg = param_to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); ssize_t rb; @@ -1592,9 +1452,10 @@ static ssize_t scsiback_tpg_param_show_alias(struct se_portal_group *se_tpg, return rb; } -static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg, +static ssize_t scsiback_tpg_param_alias_store(struct config_item *item, const char *page, size_t count) { + struct se_portal_group *se_tpg = param_to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); int len; @@ -1614,10 +1475,10 @@ static ssize_t scsiback_tpg_param_store_alias(struct se_portal_group *se_tpg, return count; } -TF_TPG_PARAM_ATTR(scsiback, alias, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(scsiback_tpg_param_, alias); static struct configfs_attribute *scsiback_param_attrs[] = { - &scsiback_tpg_param_alias.attr, + &scsiback_tpg_param_attr_alias, NULL, }; @@ -1725,9 +1586,9 @@ static int scsiback_drop_nexus(struct scsiback_tpg *tpg) return 0; } -static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg, - char *page) +static ssize_t scsiback_tpg_nexus_show(struct config_item *item, char *page) { + struct se_portal_group *se_tpg = to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); struct scsiback_nexus *tv_nexus; @@ -1746,10 +1607,10 @@ static ssize_t scsiback_tpg_show_nexus(struct se_portal_group *se_tpg, return ret; } -static ssize_t scsiback_tpg_store_nexus(struct se_portal_group *se_tpg, - const char *page, - size_t count) +static ssize_t scsiback_tpg_nexus_store(struct config_item *item, + const char *page, size_t count) { + struct se_portal_group *se_tpg = to_tpg(item); struct scsiback_tpg *tpg = container_of(se_tpg, struct scsiback_tpg, se_tpg); struct scsiback_tport *tport_wwn = tpg->tport; @@ -1821,26 +1682,25 @@ check_newline: return count; } -TF_TPG_BASE_ATTR(scsiback, nexus, S_IRUGO | S_IWUSR); +CONFIGFS_ATTR(scsiback_tpg_, nexus); static struct configfs_attribute *scsiback_tpg_attrs[] = { - &scsiback_tpg_nexus.attr, + &scsiback_tpg_attr_nexus, NULL, }; static ssize_t -scsiback_wwn_show_attr_version(struct target_fabric_configfs *tf, - char *page) +scsiback_wwn_version_show(struct config_item *item, char *page) { return sprintf(page, "xen-pvscsi fabric module %s on %s/%s on " UTS_RELEASE"\n", VSCSI_VERSION, utsname()->sysname, utsname()->machine); } -TF_WWN_ATTR_RO(scsiback, version); +CONFIGFS_ATTR_RO(scsiback_wwn_, version); static struct configfs_attribute *scsiback_wwn_attrs[] = { - &scsiback_wwn_version.attr, + &scsiback_wwn_attr_version, NULL, }; @@ -1901,8 +1761,7 @@ scsiback_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&scsiback_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; @@ -1947,23 +1806,15 @@ static const struct target_core_fabric_ops scsiback_ops = { .module = THIS_MODULE, .name = "xen-pvscsi", .get_fabric_name = scsiback_get_fabric_name, - .get_fabric_proto_ident = scsiback_get_fabric_proto_ident, .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, - .tpg_get_default_depth = scsiback_get_default_depth, - .tpg_get_pr_transport_id = scsiback_get_pr_transport_id, - .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id, .tpg_check_demo_mode = scsiback_check_true, .tpg_check_demo_mode_cache = scsiback_check_true, .tpg_check_demo_mode_write_protect = scsiback_check_false, .tpg_check_prod_mode_write_protect = scsiback_check_false, - .tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl, - .tpg_release_fabric_acl = scsiback_release_fabric_acl, .tpg_get_inst_index = scsiback_tpg_get_inst_index, .check_stop_free = scsiback_check_stop_free, .release_cmd = scsiback_release_cmd, - .put_session = NULL, .shutdown_session = scsiback_shutdown_session, .close_session = scsiback_close_session, .sess_get_index = scsiback_sess_get_index, @@ -1971,7 +1822,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .write_pending = scsiback_write_pending, .write_pending_status = scsiback_write_pending_status, .set_default_node_attributes = scsiback_set_default_node_attrs, - .get_task_tag = scsiback_get_task_tag, .get_cmd_state = scsiback_get_cmd_state, .queue_data_in = scsiback_queue_data_in, .queue_status = scsiback_queue_status, @@ -1986,12 +1836,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .fabric_drop_tpg = scsiback_drop_tpg, .fabric_post_link = scsiback_port_link, .fabric_pre_unlink = scsiback_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, -#if 0 - .fabric_make_nodeacl = scsiback_make_nodeacl, - .fabric_drop_nodeacl = scsiback_drop_nodeacl, -#endif .tfc_wwn_attrs = scsiback_wwn_attrs, .tfc_tpg_base_attrs = scsiback_tpg_attrs, diff --git a/kernel/drivers/xen/xenbus/xenbus_client.c b/kernel/drivers/xen/xenbus/xenbus_client.c index 658be6cc3..056da6ee1 100644 --- a/kernel/drivers/xen/xenbus/xenbus_client.c +++ b/kernel/drivers/xen/xenbus/xenbus_client.c @@ -37,7 +37,7 @@ #include <linux/vmalloc.h> #include <linux/export.h> #include <asm/xen/hypervisor.h> -#include <asm/xen/page.h> +#include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> #include <xen/balloon.h> @@ -49,6 +49,10 @@ #include "xenbus_probe.h" +#define XENBUS_PAGES(_grants) (DIV_ROUND_UP(_grants, XEN_PFN_PER_PAGE)) + +#define XENBUS_MAX_RING_PAGES (XENBUS_PAGES(XENBUS_MAX_RING_GRANTS)) + struct xenbus_map_node { struct list_head next; union { @@ -57,10 +61,11 @@ struct xenbus_map_node { } pv; struct { struct page *pages[XENBUS_MAX_RING_PAGES]; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; void *addr; } hvm; }; - grant_handle_t handles[XENBUS_MAX_RING_PAGES]; + grant_handle_t handles[XENBUS_MAX_RING_GRANTS]; unsigned int nr_handles; }; @@ -379,16 +384,16 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, int i, j; for (i = 0; i < nr_pages; i++) { - unsigned long addr = (unsigned long)vaddr + - (PAGE_SIZE * i); err = gnttab_grant_foreign_access(dev->otherend_id, - virt_to_mfn(addr), 0); + virt_to_gfn(vaddr), 0); if (err < 0) { xenbus_dev_fatal(dev, err, "granting access to ring page"); goto fail; } grefs[i] = err; + + vaddr = vaddr + XEN_PAGE_SIZE; } return 0; @@ -479,12 +484,12 @@ static int __xenbus_map_ring(struct xenbus_device *dev, unsigned int flags, bool *leaked) { - struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_GRANTS]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; int i, j; int err = GNTST_okay; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_grefs; i++) { @@ -540,22 +545,22 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, { struct xenbus_map_node *node; struct vm_struct *area; - pte_t *ptes[XENBUS_MAX_RING_PAGES]; - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + pte_t *ptes[XENBUS_MAX_RING_GRANTS]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; int err = GNTST_okay; int i; bool leaked; *vaddr = NULL; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; - area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes); + area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); if (!area) { kfree(node); return -ENOMEM; @@ -591,21 +596,44 @@ failed: return err; } +struct map_ring_valloc_hvm +{ + unsigned int idx; + + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; +}; + +static void xenbus_map_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct map_ring_valloc_hvm *info = data; + unsigned long vaddr = (unsigned long)gfn_to_virt(gfn); + + info->phys_addrs[info->idx] = vaddr; + info->addrs[info->idx] = vaddr; + + info->idx++; +} + static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, grant_ref_t *gnt_ref, unsigned int nr_grefs, void **vaddr) { struct xenbus_map_node *node; - int i; int err; void *addr; bool leaked = false; - /* Why do we need two arrays? See comment of __xenbus_map_ring */ - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; - unsigned long addrs[XENBUS_MAX_RING_PAGES]; + struct map_ring_valloc_hvm info = { + .idx = 0, + }; + unsigned int nr_pages = XENBUS_PAGES(nr_grefs); - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; *vaddr = NULL; @@ -614,25 +642,22 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, - false /* lowmem */); + err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); if (err) goto out_err; - for (i = 0; i < nr_grefs; i++) { - unsigned long pfn = page_to_pfn(node->hvm.pages[i]); - phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn); - addrs[i] = (unsigned long)pfn_to_kaddr(pfn); - } + gnttab_foreach_grant(node->hvm.pages, nr_grefs, + xenbus_map_ring_setup_grant_hvm, + &info); err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, - phys_addrs, GNTMAP_host_map, &leaked); + info.phys_addrs, GNTMAP_host_map, &leaked); node->nr_handles = nr_grefs; if (err) goto out_free_ballooned_pages; - addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, + addr = vmap(node->hvm.pages, nr_pages, VM_MAP | VM_IOREMAP, PAGE_KERNEL); if (!addr) { err = -ENOMEM; @@ -650,14 +675,13 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, out_xenbus_unmap_ring: if (!leaked) - xenbus_unmap_ring(dev, node->handles, node->nr_handles, - addrs); + xenbus_unmap_ring(dev, node->handles, nr_grefs, info.addrs); else pr_alert("leaking %p size %u page(s)", - addr, nr_grefs); + addr, nr_pages); out_free_ballooned_pages: if (!leaked) - free_xenballooned_pages(nr_grefs, node->hvm.pages); + free_xenballooned_pages(nr_pages, node->hvm.pages); out_err: kfree(node); return err; @@ -687,10 +711,10 @@ int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, unsigned int nr_grefs, grant_handle_t *handles, unsigned long *vaddrs, bool *leaked) { - phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; int i; - if (nr_grefs > XENBUS_MAX_RING_PAGES) + if (nr_grefs > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_grefs; i++) @@ -723,7 +747,7 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; unsigned int level; int i; bool leaked = false; @@ -750,7 +774,7 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) unsigned long addr; memset(&unmap[i], 0, sizeof(unmap[i])); - addr = (unsigned long)vaddr + (PAGE_SIZE * i); + addr = (unsigned long)vaddr + (XEN_PAGE_SIZE * i); unmap[i].host_addr = arbitrary_virt_to_machine( lookup_address(addr, &level)).maddr; unmap[i].dev_bus_addr = 0; @@ -783,13 +807,33 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) return err; } +struct unmap_ring_vfree_hvm +{ + unsigned int idx; + unsigned long addrs[XENBUS_MAX_RING_GRANTS]; +}; + +static void xenbus_unmap_ring_setup_grant_hvm(unsigned long gfn, + unsigned int goffset, + unsigned int len, + void *data) +{ + struct unmap_ring_vfree_hvm *info = data; + + info->addrs[info->idx] = (unsigned long)gfn_to_virt(gfn); + + info->idx++; +} + static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) { int rv; struct xenbus_map_node *node; void *addr; - unsigned long addrs[XENBUS_MAX_RING_PAGES]; - int i; + struct unmap_ring_vfree_hvm info = { + .idx = 0, + }; + unsigned int nr_pages; spin_lock(&xenbus_valloc_lock); list_for_each_entry(node, &xenbus_valloc_pages, next) { @@ -809,18 +853,20 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) return GNTST_bad_virt_addr; } - for (i = 0; i < node->nr_handles; i++) - addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + nr_pages = XENBUS_PAGES(node->nr_handles); + + gnttab_foreach_grant(node->hvm.pages, node->nr_handles, + xenbus_unmap_ring_setup_grant_hvm, + &info); rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, - addrs); + info.addrs); if (!rv) { vunmap(vaddr); - free_xenballooned_pages(node->nr_handles, node->hvm.pages); + free_xenballooned_pages(nr_pages, node->hvm.pages); } else - WARN(1, "Leaking %p, size %u page(s)\n", vaddr, - node->nr_handles); + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); kfree(node); return rv; @@ -841,11 +887,11 @@ int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t *handles, unsigned int nr_handles, unsigned long *vaddrs) { - struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_GRANTS]; int i; int err; - if (nr_handles > XENBUS_MAX_RING_PAGES) + if (nr_handles > XENBUS_MAX_RING_GRANTS) return -EINVAL; for (i = 0; i < nr_handles; i++) diff --git a/kernel/drivers/xen/xenbus/xenbus_dev_backend.c b/kernel/drivers/xen/xenbus/xenbus_dev_backend.c index b17707ee0..ee6d9efd7 100644 --- a/kernel/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/kernel/drivers/xen/xenbus/xenbus_dev_backend.c @@ -49,7 +49,7 @@ static long xenbus_alloc(domid_t domid) goto out_err; gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid, - virt_to_mfn(xen_store_interface), 0 /* writable */); + virt_to_gfn(xen_store_interface), 0 /* writable */); arg.dom = DOMID_SELF; arg.remote_dom = domid; diff --git a/kernel/drivers/xen/xenbus/xenbus_probe.c b/kernel/drivers/xen/xenbus/xenbus_probe.c index 5390a674b..33a31cfef 100644 --- a/kernel/drivers/xen/xenbus/xenbus_probe.c +++ b/kernel/drivers/xen/xenbus/xenbus_probe.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL_GPL(xen_store_interface); enum xenstore_init xen_store_domain_type; EXPORT_SYMBOL_GPL(xen_store_domain_type); -static unsigned long xen_store_mfn; +static unsigned long xen_store_gfn; static BLOCKING_NOTIFIER_HEAD(xenstore_chain); @@ -711,9 +711,7 @@ static int __init xenstored_local_init(void) if (!page) goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + xen_store_gfn = xen_start_info->store_mfn = virt_to_gfn((void *)page); /* Next allocate a local port which xenstored can bind to */ alloc_unbound.dom = DOMID_SELF; @@ -742,7 +740,7 @@ static int xenbus_resume_cb(struct notifier_block *nb, int err = 0; if (xen_hvm_domain()) { - uint64_t v; + uint64_t v = 0; err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); if (!err && v) @@ -787,12 +785,12 @@ static int __init xenbus_init(void) err = xenstored_local_init(); if (err) goto out_error; - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_interface = gfn_to_virt(xen_store_gfn); break; case XS_PV: xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_gfn = xen_start_info->store_mfn; + xen_store_interface = gfn_to_virt(xen_store_gfn); break; case XS_HVM: err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); @@ -802,9 +800,10 @@ static int __init xenbus_init(void) err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; + xen_store_gfn = (unsigned long)v; xen_store_interface = - xen_remap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, + XEN_PAGE_SIZE); break; default: pr_warn("Xenstore state unknown\n"); diff --git a/kernel/drivers/xen/xenfs/Makefile b/kernel/drivers/xen/xenfs/Makefile index b019865fc..1a83010dd 100644 --- a/kernel/drivers/xen/xenfs/Makefile +++ b/kernel/drivers/xen/xenfs/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_XENFS) += xenfs.o xenfs-y = super.o xenfs-$(CONFIG_XEN_DOM0) += xenstored.o +xenfs-$(CONFIG_XEN_SYMS) += xensyms.o diff --git a/kernel/drivers/xen/xenfs/super.c b/kernel/drivers/xen/xenfs/super.c index 06092e0fe..8559a71f3 100644 --- a/kernel/drivers/xen/xenfs/super.c +++ b/kernel/drivers/xen/xenfs/super.c @@ -57,6 +57,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, { "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR}, { "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR}, +#ifdef CONFIG_XEN_SYMS + { "xensyms", &xensyms_ops, S_IRUSR}, +#endif {""}, }; diff --git a/kernel/drivers/xen/xenfs/xenfs.h b/kernel/drivers/xen/xenfs/xenfs.h index 6b80c7779..2c5934ea9 100644 --- a/kernel/drivers/xen/xenfs/xenfs.h +++ b/kernel/drivers/xen/xenfs/xenfs.h @@ -3,5 +3,6 @@ extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; +extern const struct file_operations xensyms_ops; #endif /* _XENFS_XENBUS_H */ diff --git a/kernel/drivers/xen/xenfs/xensyms.c b/kernel/drivers/xen/xenfs/xensyms.c new file mode 100644 index 000000000..f8b128567 --- /dev/null +++ b/kernel/drivers/xen/xenfs/xensyms.c @@ -0,0 +1,152 @@ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <xen/interface/platform.h> +#include <asm/xen/hypercall.h> +#include <xen/xen-ops.h> +#include "xenfs.h" + + +#define XEN_KSYM_NAME_LEN 127 /* Hypervisor may have different name length */ + +struct xensyms { + struct xen_platform_op op; + char *name; + uint32_t namelen; +}; + +/* Grab next output page from the hypervisor */ +static int xensyms_next_sym(struct xensyms *xs) +{ + int ret; + struct xenpf_symdata *symdata = &xs->op.u.symdata; + uint64_t symnum; + + memset(xs->name, 0, xs->namelen); + symdata->namelen = xs->namelen; + + symnum = symdata->symnum; + + ret = HYPERVISOR_dom0_op(&xs->op); + if (ret < 0) + return ret; + + /* + * If hypervisor's symbol didn't fit into the buffer then allocate + * a larger buffer and try again. + */ + if (unlikely(symdata->namelen > xs->namelen)) { + kfree(xs->name); + + xs->namelen = symdata->namelen; + xs->name = kzalloc(xs->namelen, GFP_KERNEL); + if (!xs->name) + return -ENOMEM; + + set_xen_guest_handle(symdata->name, xs->name); + symdata->symnum--; /* Rewind */ + + ret = HYPERVISOR_dom0_op(&xs->op); + if (ret < 0) + return ret; + } + + if (symdata->symnum == symnum) + /* End of symbols */ + return 1; + + return 0; +} + +static void *xensyms_start(struct seq_file *m, loff_t *pos) +{ + struct xensyms *xs = (struct xensyms *)m->private; + + xs->op.u.symdata.symnum = *pos; + + if (xensyms_next_sym(xs)) + return NULL; + + return m->private; +} + +static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct xensyms *xs = (struct xensyms *)m->private; + + xs->op.u.symdata.symnum = ++(*pos); + + if (xensyms_next_sym(xs)) + return NULL; + + return p; +} + +static int xensyms_show(struct seq_file *m, void *p) +{ + struct xensyms *xs = (struct xensyms *)m->private; + struct xenpf_symdata *symdata = &xs->op.u.symdata; + + seq_printf(m, "%016llx %c %s\n", symdata->address, + symdata->type, xs->name); + + return 0; +} + +static void xensyms_stop(struct seq_file *m, void *p) +{ +} + +static const struct seq_operations xensyms_seq_ops = { + .start = xensyms_start, + .next = xensyms_next, + .show = xensyms_show, + .stop = xensyms_stop, +}; + +static int xensyms_open(struct inode *inode, struct file *file) +{ + struct seq_file *m; + struct xensyms *xs; + int ret; + + ret = seq_open_private(file, &xensyms_seq_ops, + sizeof(struct xensyms)); + if (ret) + return ret; + + m = file->private_data; + xs = (struct xensyms *)m->private; + + xs->namelen = XEN_KSYM_NAME_LEN + 1; + xs->name = kzalloc(xs->namelen, GFP_KERNEL); + if (!xs->name) { + seq_release_private(inode, file); + return -ENOMEM; + } + set_xen_guest_handle(xs->op.u.symdata.name, xs->name); + xs->op.cmd = XENPF_get_symbol; + xs->op.u.symdata.namelen = xs->namelen; + + return 0; +} + +static int xensyms_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct xensyms *xs = (struct xensyms *)m->private; + + kfree(xs->name); + return seq_release_private(inode, file); +} + +const struct file_operations xensyms_ops = { + .open = xensyms_open, + .read = seq_read, + .llseek = seq_lseek, + .release = xensyms_release +}; diff --git a/kernel/drivers/xen/xlate_mmu.c b/kernel/drivers/xen/xlate_mmu.c index 58a5389ae..5063c5e79 100644 --- a/kernel/drivers/xen/xlate_mmu.c +++ b/kernel/drivers/xen/xlate_mmu.c @@ -38,77 +38,122 @@ #include <xen/interface/xen.h> #include <xen/interface/memory.h> -/* map fgmfn of domid to lpfn in the current domain */ -static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, - unsigned int domid) -{ - int rc; - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - xen_ulong_t idx = fgmfn; - xen_pfn_t gpfn = lpfn; - int err = 0; +typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data); - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); +/* Break down the pages in 4KB chunk and call fn for each gfn */ +static void xen_for_each_gfn(struct page **pages, unsigned nr_gfn, + xen_gfn_fn_t fn, void *data) +{ + unsigned long xen_pfn = 0; + struct page *page; + int i; - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - return rc < 0 ? rc : err; + for (i = 0; i < nr_gfn; i++) { + if ((i % XEN_PFN_PER_PAGE) == 0) { + page = pages[i / XEN_PFN_PER_PAGE]; + xen_pfn = page_to_xen_pfn(page); + } + fn(pfn_to_gfn(xen_pfn++), data); + } } struct remap_data { - xen_pfn_t *fgmfn; /* foreign domain's gmfn */ + xen_pfn_t *fgfn; /* foreign domain's gfn */ + int nr_fgfn; /* Number of foreign gfn left to map */ pgprot_t prot; domid_t domid; struct vm_area_struct *vma; int index; struct page **pages; - struct xen_remap_mfn_info *info; + struct xen_remap_gfn_info *info; int *err_ptr; int mapped; + + /* Hypercall parameters */ + int h_errs[XEN_PFN_PER_PAGE]; + xen_ulong_t h_idxs[XEN_PFN_PER_PAGE]; + xen_pfn_t h_gpfns[XEN_PFN_PER_PAGE]; + + int h_iter; /* Iterator */ }; +static void setup_hparams(unsigned long gfn, void *data) +{ + struct remap_data *info = data; + + info->h_idxs[info->h_iter] = *info->fgfn; + info->h_gpfns[info->h_iter] = gfn; + info->h_errs[info->h_iter] = 0; + + info->h_iter++; + info->fgfn++; +} + static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *info = data; struct page *page = info->pages[info->index++]; - unsigned long pfn = page_to_pfn(page); - pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); - int rc; + pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), info->prot)); + int rc, nr_gfn; + uint32_t i; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = info->domid, + .space = XENMAPSPACE_gmfn_foreign, + }; - rc = map_foreign_page(pfn, *info->fgmfn, info->domid); - *info->err_ptr++ = rc; - if (!rc) { - set_pte_at(info->vma->vm_mm, addr, ptep, pte); - info->mapped++; + nr_gfn = min_t(typeof(info->nr_fgfn), XEN_PFN_PER_PAGE, info->nr_fgfn); + info->nr_fgfn -= nr_gfn; + + info->h_iter = 0; + xen_for_each_gfn(&page, nr_gfn, setup_hparams, info); + BUG_ON(info->h_iter != nr_gfn); + + set_xen_guest_handle(xatp.idxs, info->h_idxs); + set_xen_guest_handle(xatp.gpfns, info->h_gpfns); + set_xen_guest_handle(xatp.errs, info->h_errs); + xatp.size = nr_gfn; + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + + /* info->err_ptr expect to have one error status per Xen PFN */ + for (i = 0; i < nr_gfn; i++) { + int err = (rc < 0) ? rc : info->h_errs[i]; + + *(info->err_ptr++) = err; + if (!err) + info->mapped++; } - info->fgmfn++; + + /* + * Note: The hypercall will return 0 in most of the case if even if + * all the fgmfn are not mapped. We still have to update the pte + * as the userspace may decide to continue. + */ + if (!rc) + set_pte_at(info->vma->vm_mm, addr, ptep, pte); return 0; } int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *mfn, int nr, + xen_pfn_t *gfn, int nr, int *err_ptr, pgprot_t prot, unsigned domid, struct page **pages) { int err; struct remap_data data; - unsigned long range = nr << PAGE_SHIFT; + unsigned long range = DIV_ROUND_UP(nr, XEN_PFN_PER_PAGE) << PAGE_SHIFT; /* Kept here for the purpose of making sure code doesn't break x86 PVOPS */ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - data.fgmfn = mfn; + data.fgfn = gfn; + data.nr_fgfn = nr; data.prot = prot; data.domid = domid; data.vma = vma; @@ -123,21 +168,20 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array); -int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, - int nr, struct page **pages) +static void unmap_gfn(unsigned long gfn, void *data) { - int i; + struct xen_remove_from_physmap xrp; - for (i = 0; i < nr; i++) { - struct xen_remove_from_physmap xrp; - unsigned long pfn; + xrp.domid = DOMID_SELF; + xrp.gpfn = gfn; + (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); +} - pfn = page_to_pfn(pages[i]); +int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + xen_for_each_gfn(pages, nr, unmap_gfn, NULL); - xrp.domid = DOMID_SELF; - xrp.gpfn = pfn; - (void)HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - } return 0; } EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); |