summaryrefslogtreecommitdiffstats
path: root/kernel/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/virt/kvm/kvm_main.c')
-rw-r--r--kernel/virt/kvm/kvm_main.c597
1 files changed, 422 insertions, 175 deletions
diff --git a/kernel/virt/kvm/kvm_main.c b/kernel/virt/kvm/kvm_main.c
index de930768a..eeed326be 100644
--- a/kernel/virt/kvm/kvm_main.c
+++ b/kernel/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static unsigned int halt_poll_ns;
+/* Architectures should define their poll value according to the halt latency */
+static unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+/* Default doubles per-vcpu halt_poll_ns. */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
/*
* Ordering of locks:
*
@@ -103,8 +112,7 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
static void kvm_release_pfn_dirty(pfn_t pfn);
-static void mark_page_dirty_in_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot, gfn_t gfn);
+static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -218,9 +226,12 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
- init_swait_head(&vcpu->wq);
+ init_swait_queue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
+ vcpu->pre_pcpu = -1;
+ INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
+
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
@@ -388,6 +399,36 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
+static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int young, idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+ /*
+ * Even though we do not flush TLB, this will still adversely
+ * affect performance on pre-Haswell Intel EPT, where there is
+ * no EPT Access Bit to clear so that we have to tear down EPT
+ * tables instead. If we find this unacceptable, we can always
+ * add a parameter to kvm_age_hva so that it effectively doesn't
+ * do anything on clear_young.
+ *
+ * Also note that currently we never issue secondary TLB flushes
+ * from clear_young, leaving this job up to the regular system
+ * cadence. If we find this inaccurate, we might come up with a
+ * more sophisticated heuristic later.
+ */
+ young = kvm_age_hva(kvm, start, end);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return young;
+}
+
static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address)
@@ -420,6 +461,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .clear_young = kvm_mmu_notifier_clear_young,
.test_young = kvm_mmu_notifier_test_young,
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
@@ -440,13 +482,60 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
-static void kvm_init_memslots_id(struct kvm *kvm)
+static struct kvm_memslots *kvm_alloc_memslots(void)
{
int i;
- struct kvm_memslots *slots = kvm->memslots;
+ struct kvm_memslots *slots;
+ slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ if (!slots)
+ return NULL;
+
+ /*
+ * Init kvm generation close to the maximum to easily test the
+ * code of handling generation number wrap-around.
+ */
+ slots->generation = -150;
for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
slots->id_to_index[i] = slots->memslots[i].id = i;
+
+ return slots;
+}
+
+static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+ if (!memslot->dirty_bitmap)
+ return;
+
+ kvfree(memslot->dirty_bitmap);
+ memslot->dirty_bitmap = NULL;
+}
+
+/*
+ * Free any memory in @free but not in @dont.
+ */
+static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
+ kvm_destroy_dirty_bitmap(free);
+
+ kvm_arch_free_memslot(kvm, free, dont);
+
+ free->npages = 0;
+}
+
+static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
+{
+ struct kvm_memory_slot *memslot;
+
+ if (!slots)
+ return;
+
+ kvm_for_each_memslot(memslot, slots)
+ kvm_free_memslot(kvm, memslot, NULL);
+
+ kvfree(slots);
}
static struct kvm *kvm_create_vm(unsigned long type)
@@ -472,17 +561,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
r = -ENOMEM;
- kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
- if (!kvm->memslots)
- goto out_err_no_srcu;
-
- /*
- * Init kvm generation close to the maximum to easily test the
- * code of handling generation number wrap-around.
- */
- kvm->memslots->generation = -150;
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ kvm->memslots[i] = kvm_alloc_memslots();
+ if (!kvm->memslots[i])
+ goto out_err_no_srcu;
+ }
- kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
goto out_err_no_srcu;
if (init_srcu_struct(&kvm->irq_srcu))
@@ -512,6 +596,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
+ preempt_notifier_inc();
+
return kvm;
out_err:
@@ -523,7 +609,8 @@ out_err_no_srcu:
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
- kvfree(kvm->memslots);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ kvm_free_memslots(kvm, kvm->memslots[i]);
kvm_arch_free_vm(kvm);
return ERR_PTR(r);
}
@@ -540,40 +627,6 @@ void *kvm_kvzalloc(unsigned long size)
return kzalloc(size, GFP_KERNEL);
}
-static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
-{
- if (!memslot->dirty_bitmap)
- return;
-
- kvfree(memslot->dirty_bitmap);
- memslot->dirty_bitmap = NULL;
-}
-
-/*
- * Free any memory in @free but not in @dont.
- */
-static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
- if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
- kvm_destroy_dirty_bitmap(free);
-
- kvm_arch_free_memslot(kvm, free, dont);
-
- free->npages = 0;
-}
-
-static void kvm_free_physmem(struct kvm *kvm)
-{
- struct kvm_memslots *slots = kvm->memslots;
- struct kvm_memory_slot *memslot;
-
- kvm_for_each_memslot(memslot, slots)
- kvm_free_physmem_slot(kvm, memslot, NULL);
-
- kvfree(kvm->memslots);
-}
-
static void kvm_destroy_devices(struct kvm *kvm)
{
struct list_head *node, *tmp;
@@ -607,10 +660,12 @@ static void kvm_destroy_vm(struct kvm *kvm)
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
- kvm_free_physmem(kvm);
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ kvm_free_memslots(kvm, kvm->memslots[i]);
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
+ preempt_notifier_dec();
hardware_disable_all();
mmdrop(mm);
}
@@ -670,8 +725,6 @@ static void update_memslots(struct kvm_memslots *slots,
WARN_ON(mslots[i].id != id);
if (!new->npages) {
WARN_ON(!mslots[i].npages);
- new->base_gfn = 0;
- new->flags = 0;
if (mslots[i].npages)
slots->used_slots--;
} else {
@@ -711,7 +764,7 @@ static void update_memslots(struct kvm_memslots *slots,
slots->id_to_index[mslots[i].id] = i;
}
-static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -726,9 +779,9 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
}
static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
- struct kvm_memslots *slots)
+ int as_id, struct kvm_memslots *slots)
{
- struct kvm_memslots *old_memslots = kvm->memslots;
+ struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
/*
* Set the low bit in the generation, which disables SPTE caching
@@ -737,7 +790,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
WARN_ON(old_memslots->generation & 1);
slots->generation = old_memslots->generation + 1;
- rcu_assign_pointer(kvm->memslots, slots);
+ rcu_assign_pointer(kvm->memslots[as_id], slots);
synchronize_srcu_expedited(&kvm->srcu);
/*
@@ -747,7 +800,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
*/
slots->generation++;
- kvm_arch_memslots_updated(kvm);
+ kvm_arch_memslots_updated(kvm, slots);
return old_memslots;
}
@@ -761,7 +814,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
* Must be called holding kvm->slots_lock for write.
*/
int __kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
int r;
gfn_t base_gfn;
@@ -769,6 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot *slot;
struct kvm_memory_slot old, new;
struct kvm_memslots *slots = NULL, *old_memslots;
+ int as_id, id;
enum kvm_mr_change change;
r = check_memory_region_flags(mem);
@@ -776,36 +830,36 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
r = -EINVAL;
+ as_id = mem->slot >> 16;
+ id = (u16)mem->slot;
+
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
/* We can read the guest memory with __xxx_user() later on. */
- if ((mem->slot < KVM_USER_MEM_SLOTS) &&
+ if ((id < KVM_USER_MEM_SLOTS) &&
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
!access_ok(VERIFY_WRITE,
(void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size)))
goto out;
- if (mem->slot >= KVM_MEM_SLOTS_NUM)
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- slot = id_to_memslot(kvm->memslots, mem->slot);
+ slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
if (npages > KVM_MEM_MAX_NR_PAGES)
goto out;
- if (!npages)
- mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
-
new = old = *slot;
- new.id = mem->slot;
+ new.id = id;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
@@ -828,17 +882,21 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
}
}
- } else if (old.npages) {
+ } else {
+ if (!old.npages)
+ goto out;
+
change = KVM_MR_DELETE;
- } else /* Modify a non-existent slot: disallowed. */
- goto out;
+ new.base_gfn = 0;
+ new.flags = 0;
+ }
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
/* Check for overlaps */
r = -EEXIST;
- kvm_for_each_memslot(slot, kvm->memslots) {
+ kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
- (slot->id == mem->slot))
+ (slot->id == id))
continue;
if (!((base_gfn + npages <= slot->base_gfn) ||
(base_gfn >= slot->base_gfn + slot->npages)))
@@ -867,13 +925,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+ memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
- slot = id_to_memslot(slots, mem->slot);
+ slot = id_to_memslot(slots, id);
slot->flags |= KVM_MEMSLOT_INVALID;
- old_memslots = install_new_memslots(kvm, slots);
+ old_memslots = install_new_memslots(kvm, as_id, slots);
/* slot was deleted or moved, clear iommu mapping */
kvm_iommu_unmap_pages(kvm, &old);
@@ -898,18 +956,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (r)
goto out_slots;
- /* actual memory is freed via old in kvm_free_physmem_slot below */
+ /* actual memory is freed via old in kvm_free_memslot below */
if (change == KVM_MR_DELETE) {
new.dirty_bitmap = NULL;
memset(&new.arch, 0, sizeof(new.arch));
}
update_memslots(slots, &new);
- old_memslots = install_new_memslots(kvm, slots);
+ old_memslots = install_new_memslots(kvm, as_id, slots);
- kvm_arch_commit_memory_region(kvm, mem, &old, change);
+ kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
- kvm_free_physmem_slot(kvm, &old, &new);
+ kvm_free_memslot(kvm, &old, &new);
kvfree(old_memslots);
/*
@@ -931,14 +989,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
out_slots:
kvfree(slots);
out_free:
- kvm_free_physmem_slot(kvm, &new, &old);
+ kvm_free_memslot(kvm, &new, &old);
out:
return r;
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
int kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_userspace_memory_region *mem)
{
int r;
@@ -952,24 +1010,29 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- if (mem->slot >= KVM_USER_MEM_SLOTS)
+ if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
+
return kvm_set_memory_region(kvm, mem);
}
int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log, int *is_dirty)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i;
+ int r, i, as_id, id;
unsigned long n;
unsigned long any = 0;
r = -EINVAL;
- if (log->slot >= KVM_USER_MEM_SLOTS)
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -1018,17 +1081,21 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
int kvm_get_dirty_log_protect(struct kvm *kvm,
struct kvm_dirty_log *log, bool *is_dirty)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int r, i;
+ int r, i, as_id, id;
unsigned long n;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_buffer;
r = -EINVAL;
- if (log->slot >= KVM_USER_MEM_SLOTS)
+ as_id = log->slot >> 16;
+ id = (u16)log->slot;
+ if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = __kvm_memslots(kvm, as_id);
+ memslot = id_to_memslot(slots, id);
dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
@@ -1091,6 +1158,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_memslot);
+struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
+}
+
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
@@ -1166,6 +1238,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
+unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
+
/*
* If writable is set to false, the hva returned by this function is only
* allowed to be read.
@@ -1188,6 +1266,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
+unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
{
@@ -1355,9 +1440,8 @@ exit:
return pfn;
}
-static pfn_t
-__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
- bool *async, bool write_fault, bool *writable)
+pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+ bool *async, bool write_fault, bool *writable)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
@@ -1376,65 +1460,59 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
return hva_to_pfn(addr, atomic, async, write_fault,
writable);
}
+EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
- bool write_fault, bool *writable)
+pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
+ bool *writable)
{
- struct kvm_memory_slot *slot;
-
- if (async)
- *async = false;
-
- slot = gfn_to_memslot(kvm, gfn);
-
- return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
- writable);
+ return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
+ write_fault, writable);
}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
- bool write_fault, bool *writable)
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
+ return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
+ return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn);
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
-pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
- bool *writable)
+pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
+ return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic);
-pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+ return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn);
}
+EXPORT_SYMBOL_GPL(gfn_to_pfn);
-pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
+ return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn);
-int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
- int nr_pages)
+int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ struct page **pages, int nr_pages)
{
unsigned long addr;
gfn_t entry;
- addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
+ addr = gfn_to_hva_many(slot, gfn, &entry);
if (kvm_is_error_hva(addr))
return -1;
@@ -1468,6 +1546,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_page);
+struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ pfn_t pfn;
+
+ pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn);
+
+ return kvm_pfn_to_page(pfn);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page);
+
void kvm_release_page_clean(struct page *page)
{
WARN_ON(is_error_page(page));
@@ -1530,13 +1618,13 @@ static int next_segment(unsigned long len, int offset)
return len;
}
-int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
- int len)
+static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
+ void *data, int offset, int len)
{
int r;
unsigned long addr;
- addr = gfn_to_hva_prot(kvm, gfn, NULL);
+ addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = __copy_from_user(data, (void __user *)addr + offset, len);
@@ -1544,8 +1632,25 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
return -EFAULT;
return 0;
}
+
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+ int len)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return __kvm_read_guest_page(slot, gfn, data, offset, len);
+}
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
+int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
+ int offset, int len)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return __kvm_read_guest_page(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
+
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -1566,15 +1671,33 @@ int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_read_guest);
-int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
- unsigned long len)
+int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
{
- int r;
- unsigned long addr;
gfn_t gfn = gpa >> PAGE_SHIFT;
+ int seg;
int offset = offset_in_page(gpa);
+ int ret;
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg);
+ if (ret < 0)
+ return ret;
+ offset = 0;
+ len -= seg;
+ data += seg;
+ ++gfn;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest);
+
+static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
+ void *data, int offset, unsigned long len)
+{
+ int r;
+ unsigned long addr;
- addr = gfn_to_hva_prot(kvm, gfn, NULL);
+ addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
@@ -1584,25 +1707,63 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL(kvm_read_guest_atomic);
-int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
- int offset, int len)
+int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ int offset = offset_in_page(gpa);
+
+ return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
+
+int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
+ void *data, unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ int offset = offset_in_page(gpa);
+
+ return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
+
+static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+ const void *data, int offset, int len)
{
int r;
unsigned long addr;
- addr = gfn_to_hva(kvm, gfn);
+ addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
- mark_page_dirty(kvm, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
return 0;
}
+
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return __kvm_write_guest_page(slot, gfn, data, offset, len);
+}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
+int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ const void *data, int offset, int len)
+{
+ struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+
+ return __kvm_write_guest_page(slot, gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
+
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
unsigned long len)
{
@@ -1624,6 +1785,27 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
}
EXPORT_SYMBOL_GPL(kvm_write_guest);
+int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
+ unsigned long len)
+{
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ int seg;
+ int offset = offset_in_page(gpa);
+ int ret;
+
+ while ((seg = next_segment(len, offset)) != 0) {
+ ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg);
+ if (ret < 0)
+ return ret;
+ offset = 0;
+ len -= seg;
+ data += seg;
+ ++gfn;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
+
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len)
{
@@ -1681,7 +1863,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
r = __copy_to_user((void __user *)ghc->hva, data, len);
if (r)
return -EFAULT;
- mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+ mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
return 0;
}
@@ -1739,8 +1921,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
-static void mark_page_dirty_in_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
+static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
gfn_t gfn)
{
if (memslot && memslot->dirty_bitmap) {
@@ -1755,10 +1936,51 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
- mark_page_dirty_in_slot(kvm, memslot, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
}
EXPORT_SYMBOL_GPL(mark_page_dirty);
+void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ struct kvm_memory_slot *memslot;
+
+ memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ mark_page_dirty_in_slot(memslot, gfn);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
+
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ /* 10us base */
+ if (val == 0 && halt_poll_ns_grow)
+ val = 10000;
+ else
+ val *= halt_poll_ns_grow;
+
+ if (val > halt_poll_ns)
+ val = halt_poll_ns;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ if (halt_poll_ns_shrink == 0)
+ val = 0;
+ else
+ val /= halt_poll_ns_shrink;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
+}
+
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1779,13 +2001,15 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
ktime_t start, cur;
- DEFINE_SWAITER(wait);
+ DECLARE_SWAITQUEUE(wait);
bool waited = false;
+ u64 block_ns;
start = cur = ktime_get();
- if (halt_poll_ns) {
- ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+ if (vcpu->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+ ++vcpu->stat.halt_attempted_poll;
do {
/*
* This sets KVM_REQ_UNHALT if an interrupt
@@ -1799,8 +2023,10 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
} while (single_task_running() && ktime_before(cur, stop));
}
+ kvm_arch_vcpu_blocking(vcpu);
+
for (;;) {
- swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
if (kvm_vcpu_check_block(vcpu) < 0)
break;
@@ -1809,11 +2035,27 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
schedule();
}
- swait_finish(&vcpu->wq, &wait);
+ finish_swait(&vcpu->wq, &wait);
cur = ktime_get();
+ kvm_arch_vcpu_unblocking(vcpu);
out:
- trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+
+ if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ } else
+ vcpu->halt_poll_ns = 0;
+
+ trace_kvm_vcpu_wakeup(block_ns, waited);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
@@ -1825,11 +2067,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int me;
int cpu = vcpu->cpu;
- struct swait_head *wqp;
+ struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
- if (swaitqueue_active(wqp)) {
- swait_wake_interruptible(wqp);
+ if (swait_active(wqp)) {
+ swake_up(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -1930,7 +2172,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (swaitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+ if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -2059,6 +2301,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
}
kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
+
+ /*
+ * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus
+ * before kvm->online_vcpu's incremented value.
+ */
smp_wmb();
atomic_inc(&kvm->online_vcpus);
@@ -2471,9 +2718,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_USER_MEMORY:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
case KVM_CAP_INTERNAL_ERROR_DATA:
#ifdef CONFIG_HAVE_KVM_MSI
case KVM_CAP_SIGNAL_MSI:
@@ -2482,12 +2726,17 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IRQFD:
case KVM_CAP_IRQFD_RESAMPLE:
#endif
+ case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
return 1;
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
#endif
+#if KVM_ADDRESS_SPACE_NUM > 1
+ case KVM_CAP_MULTI_ADDRESS_SPACE:
+ return KVM_ADDRESS_SPACE_NUM;
+#endif
default:
break;
}
@@ -2565,17 +2814,6 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_ioeventfd(kvm, &data);
break;
}
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- case KVM_SET_BOOT_CPU_ID:
- r = 0;
- mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus) != 0)
- r = -EBUSY;
- else
- kvm->bsp_vcpu_id = arg;
- mutex_unlock(&kvm->lock);
- break;
-#endif
#ifdef CONFIG_HAVE_KVM_MSI
case KVM_SIGNAL_MSI: {
struct kvm_msi msi;
@@ -2882,18 +3120,12 @@ static int hardware_enable_all(void)
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
void *v)
{
- int cpu = (long)v;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
- pr_info("kvm: disabling virtualization on CPU%d\n",
- cpu);
hardware_disable();
break;
case CPU_STARTING:
- pr_info("kvm: enabling virtualization on CPU%d\n",
- cpu);
hardware_enable();
break;
}
@@ -2935,10 +3167,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
const struct kvm_io_range *r2)
{
- if (r1->addr < r2->addr)
+ gpa_t addr1 = r1->addr;
+ gpa_t addr2 = r2->addr;
+
+ if (addr1 < addr2)
return -1;
- if (r1->addr + r1->len > r2->addr + r2->len)
+
+ /* If r2->len == 0, match the exact address. If r2->len != 0,
+ * accept any overlapping write. Any order is acceptable for
+ * overlapping ranges, because kvm_io_bus_get_first_dev ensures
+ * we process all of them.
+ */
+ if (r2->len) {
+ addr1 += r1->len;
+ addr2 += r2->len;
+ }
+
+ if (addr1 > addr2)
return 1;
+
return 0;
}
@@ -3103,7 +3350,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
- new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+ new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
@@ -3135,7 +3382,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (r)
return r;
- new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+ new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;