From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/virt/kvm/arm/arch_timer.c | 210 +++++++++--- kernel/virt/kvm/arm/trace.h | 63 ++++ kernel/virt/kvm/arm/vgic-v2.c | 22 +- kernel/virt/kvm/arm/vgic-v3-emul.c | 56 +++- kernel/virt/kvm/arm/vgic-v3.c | 29 +- kernel/virt/kvm/arm/vgic.c | 651 +++++++++++++++++++++++++++---------- 6 files changed, 785 insertions(+), 246 deletions(-) create mode 100644 kernel/virt/kvm/arm/trace.h (limited to 'kernel/virt/kvm/arm') diff --git a/kernel/virt/kvm/arm/arch_timer.c b/kernel/virt/kvm/arm/arch_timer.c index 98c95f2fc..ea6064696 100644 --- a/kernel/virt/kvm/arm/arch_timer.c +++ b/kernel/virt/kvm/arm/arch_timer.c @@ -28,6 +28,8 @@ #include #include +#include "trace.h" + static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; @@ -59,18 +61,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) } } -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer->irq->irq, - timer->irq->level); - WARN_ON(ret); -} - static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; @@ -111,13 +101,20 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); +} + bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; cycle_t cval, now; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + if (!kvm_timer_irq_can_fire(vcpu)) return false; cval = timer->cntv_cval; @@ -126,68 +123,159 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +{ + int ret; + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + BUG_ON(!vgic_initialized(vcpu->kvm)); + + timer->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, + timer->irq.level); + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, + timer->map, + timer->irq.level); + WARN_ON(ret); +} + +/* + * Check if there was a change in the timer state (should we raise or lower + * the line level to the GIC). + */ +static int kvm_timer_update_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * If userspace modified the timer registers via SET_ONE_REG before + * the vgic was initialized, we mustn't set the timer->irq.level value + * because the guest would never see the interrupt. Instead wait + * until we call this function from kvm_timer_flush_hwstate. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (kvm_timer_should_fire(vcpu) != timer->irq.level) + kvm_timer_update_irq(vcpu, !timer->irq.level); + + return 0; +} + +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +void kvm_timer_schedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 ns; + cycle_t cval, now; + + BUG_ON(timer_is_armed(timer)); + + /* + * No need to schedule a background timer if the guest timer has + * already expired, because kvm_vcpu_block will return before putting + * the thread to sleep. + */ + if (kvm_timer_should_fire(vcpu)) + return; + + /* + * If the timer is not capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(vcpu)) + return; + + /* The timer has not yet expired, schedule a background timer */ + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + timer_arm(timer, ns); +} + +void kvm_timer_unschedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + timer_disarm(timer); +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case. */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + bool phys_active; + int ret; - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); + if (kvm_timer_update_state(vcpu)) + return; /* - * If the timer expired while we were not scheduled, now is the time - * to inject it. - */ - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + * If we enter the guest with the virtual input level to the VGIC + * asserted, then we have already told the VGIC what we need to, and + * we don't need to exit from the guest until the guest deactivates + * the already injected interrupt, so therefore we should set the + * hardware active state to prevent unnecessary exits from the guest. + * + * Also, if we enter the guest with the virtual timer interrupt active, + * then it must be active on the physical distributor, because we set + * the HW bit and the guest must be able to deactivate the virtual and + * physical interrupt at the same time. + * + * Conversely, if the virtual input level is deasserted and the virtual + * interrupt is not active, then always clear the hardware active state + * to ensure that hardware interrupts from the timer triggers a guest + * exit. + */ + if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) + phys_active = true; + else + phys_active = false; + + ret = irq_set_irqchip_state(timer->map->irq, + IRQCHIP_STATE_ACTIVE, + phys_active); + WARN_ON(ret); } /** * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. + * Check if the virtual timer has expired while we were running in the guest, + * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ - kvm_timer_inject_irq(vcpu); - return; - } - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + /* + * The guest could have modified the timer registers or the timer + * could have expired, update the timer state. + */ + kvm_timer_update_state(vcpu); } -void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, + const struct kvm_irq_level *irq) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct irq_phys_map *map; /* * The vcpu timer irq number cannot be determined in @@ -195,7 +283,27 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq = irq; + timer->irq.irq = irq->irq; + + /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + timer->cntv_ctl = 0; + kvm_timer_update_state(vcpu); + + /* + * Tell the VGIC that the virtual interrupt is tied to a + * physical interrupt. We do that once per VCPU. + */ + map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); + if (WARN_ON(IS_ERR(map))) + return PTR_ERR(map); + + timer->map = map; + return 0; } void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) @@ -229,6 +337,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) default: return -1; } + + kvm_timer_update_state(vcpu); return 0; } @@ -335,6 +445,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; timer_disarm(timer); + if (timer->map) + kvm_vgic_unmap_phys_irq(vcpu, timer->map); } void kvm_timer_enable(struct kvm *kvm) diff --git a/kernel/virt/kvm/arm/trace.h b/kernel/virt/kvm/arm/trace.h new file mode 100644 index 000000000..37d8b9886 --- /dev/null +++ b/kernel/virt/kvm/arm/trace.h @@ -0,0 +1,63 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for vgic + */ +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_update_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/kernel/virt/kvm/arm/vgic-v2.c b/kernel/virt/kvm/arm/vgic-v2.c index f9b9c7c51..ff02f08df 100644 --- a/kernel/virt/kvm/arm/vgic-v2.c +++ b/kernel/virt/kvm/arm/vgic-v2.c @@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) lr_desc.state |= LR_STATE_ACTIVE; if (val & GICH_LR_EOI) lr_desc.state |= LR_EOI_INT; + if (val & GICH_LR_HW) { + lr_desc.state |= LR_HW; + lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT; + } return lr_desc; } @@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc) { - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + u32 lr_val; + + lr_val = lr_desc.irq; if (lr_desc.state & LR_STATE_PENDING) lr_val |= GICH_LR_PENDING_BIT; @@ -64,12 +70,16 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, if (lr_desc.state & LR_EOI_INT) lr_val |= GICH_LR_EOI; + if (lr_desc.state & LR_HW) { + lr_val |= GICH_LR_HW; + lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT; + } + + if (lr_desc.irq < VGIC_NR_SGIS) + lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); else @@ -144,6 +154,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; @@ -152,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v2_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .clear_eisr = vgic_v2_clear_eisr, diff --git a/kernel/virt/kvm/arm/vgic-v3-emul.c b/kernel/virt/kvm/arm/vgic-v3-emul.c index e9c3a7a83..e661e7fb9 100644 --- a/kernel/virt/kvm/arm/vgic-v3-emul.c +++ b/kernel/virt/kvm/arm/vgic-v3-emul.c @@ -76,8 +76,6 @@ static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, ®, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (reg & GICD_CTLR_ENABLE_SS_G0) - kvm_info("guest tried to enable unsupported Group0 interrupts\n"); vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); vgic_update_state(vcpu->kvm); return true; @@ -173,6 +171,32 @@ static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, return false; } +static bool handle_mmio_set_active_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_clear_active_reg_dist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); + + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -428,13 +452,13 @@ static const struct vgic_io_range vgic_v3_dist_ranges[] = { .base = GICD_ISACTIVER, .len = 0x80, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg_dist, }, { .base = GICD_ICACTIVER, .len = 0x80, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg_dist, }, { .base = GICD_IPRIORITYR, @@ -561,6 +585,26 @@ static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, ACCESS_WRITE_CLEARBIT); } +static bool handle_mmio_set_active_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + struct kvm_vcpu *redist_vcpu = mmio->private; + + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + redist_vcpu->vcpu_id); +} + static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -674,13 +718,13 @@ static const struct vgic_io_range vgic_redist_ranges[] = { .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg_redist, }, { .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg_redist, }, { .base = SGI_base(GICR_IPRIORITYR0), diff --git a/kernel/virt/kvm/arm/vgic-v3.c b/kernel/virt/kvm/arm/vgic-v3.c index dff06021e..487d6357b 100644 --- a/kernel/virt/kvm/arm/vgic-v3.c +++ b/kernel/virt/kvm/arm/vgic-v3.c @@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) lr_desc.state |= LR_STATE_ACTIVE; if (val & ICH_LR_EOI) lr_desc.state |= LR_EOI_INT; + if (val & ICH_LR_HW) { + lr_desc.state |= LR_HW; + lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0); + } return lr_desc; } @@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, * Eventually we want to make this configurable, so we may revisit * this in the future. */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + switch (vcpu->kvm->arch.vgic.vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: lr_val |= ICH_LR_GROUP; - else - lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + if (lr_desc.irq < VGIC_NR_SGIS) + lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; + break; + default: + BUG(); + } if (lr_desc.state & LR_STATE_PENDING) lr_val |= ICH_LR_PENDING_BIT; @@ -95,13 +106,13 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= ICH_LR_ACTIVE_BIT; if (lr_desc.state & LR_EOI_INT) lr_val |= ICH_LR_EOI; + if (lr_desc.state & LR_HW) { + lr_val |= ICH_LR_HW; + lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; + } vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; -} -static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); else @@ -178,6 +189,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; + vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible @@ -196,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v3_ops = { .get_lr = vgic_v3_get_lr, .set_lr = vgic_v3_set_lr, - .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, .clear_eisr = vgic_v3_clear_eisr, @@ -273,7 +284,7 @@ int vgic_v3_probe(struct device_node *vgic_node, vgic->vctrl_base = NULL; vgic->type = VGIC_V3; - vgic->max_gic_vcpus = KVM_MAX_VCPUS; + vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); diff --git a/kernel/virt/kvm/arm/vgic.c b/kernel/virt/kvm/arm/vgic.c index 950064a09..5d10f104f 100644 --- a/kernel/virt/kvm/arm/vgic.c +++ b/kernel/virt/kvm/arm/vgic.c @@ -24,10 +24,9 @@ #include #include #include +#include #include -#include - #include #include #include @@ -35,6 +34,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + /* * How the whole thing works (courtesy of Christoffer Dall): * @@ -76,14 +78,40 @@ * cause the interrupt to become inactive in such a situation. * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become * inactive as long as the external input line is held high. + * + * + * Initialization rules: there are multiple stages to the vgic + * initialization, both for the distributor and the CPU interfaces. + * + * Distributor: + * + * - kvm_vgic_early_init(): initialization of static data that doesn't + * depend on any sizing information or emulation type. No allocation + * is allowed there. + * + * - vgic_init(): allocation and initialization of the generic data + * structures that depend on sizing information (number of CPUs, + * number of interrupts). Also initializes the vcpu specific data + * structures. Can be executed lazily for GICv2. + * [to be renamed to kvm_vgic_init??] + * + * CPU Interface: + * + * - kvm_vgic_cpu_early_init(): initialization of static data that + * doesn't depend on any sizing information or emulation type. No + * allocation is allowed there. */ #include "vgic.h" static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); +static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, + int virt_irq); +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; @@ -334,6 +362,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); + if (!vgic_dist_irq_get_level(vcpu, irq)) { + vgic_dist_irq_clear_pending(vcpu, irq); + if (!compute_pending_for_cpu(vcpu)) + clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); + } } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -377,7 +410,7 @@ void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) { - return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); + return !vgic_irq_is_queued(vcpu, irq); } /** @@ -631,10 +664,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 8) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ + /* Ignore writes to read-only SGI and PPI bits */ + if (offset < 8) return false; - } val = vgic_cfg_compress(val); if (offset & 4) { @@ -660,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int i; - for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr lr = vgic_get_lr(vcpu, i); /* @@ -683,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * interrupt then move the active state to the * distributor tracking bit. */ - if (lr.state & LR_STATE_ACTIVE) { + if (lr.state & LR_STATE_ACTIVE) vgic_irq_set_active(vcpu, lr.irq); - lr.state &= ~LR_STATE_ACTIVE; - } /* * Reestablish the pending state on the distributor and the - * CPU interface. It may have already been pending, but that - * is fine, then we are only setting a few bits that were - * already set. + * CPU interface and mark the LR as free for other use. */ - if (lr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, lr.irq); - lr.state &= ~LR_STATE_PENDING; - } - - vgic_set_lr(vcpu, i, lr); - - /* - * Mark the LR as free for other use. - */ - BUG_ON(lr.state & LR_STATE_MASK); - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); + vgic_retire_lr(i, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -959,6 +977,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; pend_shared = vcpu->arch.vgic_cpu.pending_shared; + if (!dist->enabled) { + bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS); + bitmap_zero(pend_shared, nr_shared); + return 0; + } + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); @@ -986,11 +1010,6 @@ void vgic_update_state(struct kvm *kvm) struct kvm_vcpu *vcpu; int c; - if (!dist->enabled) { - set_bit(0, dist->irq_pending_on_cpu); - return; - } - kvm_for_each_vcpu(c, vcpu, kvm) { if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); @@ -1013,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops->set_lr(vcpu, lr, vlr); } -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); -} - static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { return vgic_ops->get_elrsr(vcpu); @@ -1064,16 +1077,44 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) vgic_ops->enable(vcpu); } -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + vgic_irq_clear_queued(vcpu, vlr.irq); + + /* + * We must transfer the pending state back to the distributor before + * retiring the LR, otherwise we may loose edge-triggered interrupts. + */ + if (vlr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, vlr.irq); + vlr.hwirq = 0; + } + vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); +} + +static bool dist_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map) +{ + int i; + + for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) { + struct vgic_lr vlr = vgic_get_lr(vcpu, i); + + if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE) + return true; + } + + return vgic_irq_is_active(vcpu, map->virt_irq); } /* @@ -1087,17 +1128,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) */ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { - vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_queued(vcpu, vlr.irq)) - vgic_irq_clear_queued(vcpu, vlr.irq); - } + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) + vgic_retire_lr(lr, vcpu); } } @@ -1109,7 +1148,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); vgic_irq_clear_active(vcpu, irq); vgic_update_state(vcpu->kvm); - } else if (vgic_dist_irq_is_pending(vcpu, irq)) { + } else { + WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq)); vlr.state |= LR_STATE_PENDING; kvm_debug("Set pending: 0x%x\n", vlr.state); } @@ -1117,8 +1157,25 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, if (!vgic_irq_is_edge(vcpu, irq)) vlr.state |= LR_EOI_INT; + if (vlr.irq >= VGIC_NR_SGIS) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, irq); + + if (map) { + vlr.hwirq = map->phys_irq; + vlr.state |= LR_HW; + vlr.state &= ~LR_EOI_INT; + + /* + * Make sure we're not going to sample this + * again, as a HW-backed interrupt cannot be + * in the PENDING_ACTIVE stage. + */ + vgic_irq_set_queued(vcpu, irq); + } + } + vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1128,8 +1185,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, */ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); struct vgic_lr vlr; int lr; @@ -1140,28 +1198,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("Queue IRQ%d\n", irq); - lr = vgic_cpu->vgic_irq_lr_map[irq]; - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { vlr = vgic_get_lr(vcpu, lr); - if (vlr.source == sgi_source_id) { + if (vlr.irq == irq && vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic->nr_lr); + lr = find_first_bit(elrsr_ptr, vgic->nr_lr); if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); vlr.irq = irq; vlr.source = sgi_source_id; @@ -1217,7 +1269,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu)) goto epilog; /* SGIs */ @@ -1256,12 +1308,60 @@ epilog: } } +static int process_queued_irq(struct kvm_vcpu *vcpu, + int lr, struct vgic_lr vlr) +{ + int pending = 0; + + /* + * If the IRQ was EOIed (called from vgic_process_maintenance) or it + * went from active to non-active (called from vgic_sync_hwirq) it was + * also ACKed and we we therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after the IRQ was + * acked, it actually shouldn't be cleared, but we have no way of + * knowing that unless we start trapping ACKs when the soft-pending + * state is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + + /* + * Tell the gic to start sampling this interrupt again. + */ + vgic_irq_clear_queued(vcpu, vlr.irq); + + /* Any additional pending interrupt? */ + if (vgic_irq_is_edge(vcpu, vlr.irq)) { + BUG_ON(!(vlr.state & LR_HW)); + pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); + } else { + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } + } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + + return pending; +} + static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool level_pending = false; struct kvm *kvm = vcpu->kvm; + int level_pending = 0; kvm_debug("STATUS = %08x\n", status); @@ -1276,54 +1376,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - spin_lock(&dist->lock); - vgic_irq_clear_queued(vcpu, vlr.irq); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); WARN_ON(vlr.state & LR_STATE_MASK); - vlr.state = 0; - vgic_set_lr(vcpu, lr, vlr); - /* - * If the IRQ was EOIed it was also ACKed and we we - * therefore assume we can clear the soft pending - * state (should it had been set) for this interrupt. - * - * Note: if the IRQ soft pending state was set after - * the IRQ was acked, it actually shouldn't be - * cleared, but we have no way of knowing that unless - * we start trapping ACKs when the soft-pending state - * is set. - */ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* * kvm_notify_acked_irq calls kvm_set_irq() - * to reset the IRQ level. Need to release the - * lock for kvm_set_irq to grab it. + * to reset the IRQ level, which grabs the dist->lock + * so we call this before taking the dist->lock. */ - spin_unlock(&dist->lock); - kvm_notify_acked_irq(kvm, 0, vlr.irq - VGIC_NR_PRIVATE_IRQS); - spin_lock(&dist->lock); - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - level_pending = true; - } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); - } + spin_lock(&dist->lock); + level_pending |= process_queued_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - vgic_sync_lr_elrsr(vcpu, lr, vlr); } } @@ -1341,10 +1409,31 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) return level_pending; } +/* + * Save the physical active state, and reset it to inactive. + * + * Return true if there's a pending forwarded interrupt to queue. + */ +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + bool level_pending; + + if (!(vlr.state & LR_HW)) + return false; + + if (vlr.state & LR_STATE_ACTIVE) + return false; + + spin_lock(&dist->lock); + level_pending = process_queued_irq(vcpu, lr, vlr); + spin_unlock(&dist->lock); + return level_pending; +} + /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; u64 elrsr; unsigned long *elrsr_ptr; @@ -1352,23 +1441,18 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) bool level_pending; level_pending = vgic_process_maintenance(vcpu); - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); - - /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { - struct vgic_lr vlr; - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) - continue; - - vlr = vgic_get_lr(vcpu, lr); + /* Deal with HW interrupts, and clear mappings for empty LRs */ + for (lr = 0; lr < vgic->nr_lr; lr++) { + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); BUG_ON(vlr.irq >= dist->nr_irqs); - vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = u64_to_bitmask(&elrsr); pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); @@ -1404,17 +1488,6 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } -int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); -} - - void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -1449,7 +1522,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) } static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) + struct irq_phys_map *map, + unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; @@ -1457,6 +1531,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, int enabled; bool ret = true, can_inject = true; + trace_vgic_update_irq_pending(cpuid, irq_num, level); + + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) + return -EINVAL; + spin_lock(&dist->lock); vcpu = kvm_get_vcpu(kvm, cpuid); @@ -1487,8 +1566,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, } else { if (level_triggered) { vgic_dist_irq_clear_level(vcpu, irq_num); - if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) { vgic_dist_irq_clear_pending(vcpu, irq_num); + vgic_cpu_irq_clear(vcpu, irq_num); + if (!compute_pending_for_cpu(vcpu)) + clear_bit(cpuid, dist->irq_pending_on_cpu); + } } ret = false; @@ -1519,18 +1602,46 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, out: spin_unlock(&dist->lock); - return ret ? cpuid : -EINVAL; + if (ret) { + /* kick the specified vcpu */ + kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid)); + } + + return 0; +} + +static int vgic_lazy_init(struct kvm *kvm) +{ + int ret = 0; + + if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) + return -EBUSY; + + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + } + + return ret; } /** * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic * @kvm: The VM structure pointer * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device + * @irq_num: The IRQ number that is assigned to the device. This IRQ + * must not be mapped to a HW interrupt. * @level: Edge-triggered: true: to trigger the interrupt * false: to ignore the call - * Level-sensitive true: activates an interrupt - * false: deactivates an interrupt + * Level-sensitive true: raise the input signal + * false: lower the input signal * * The GIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 @@ -1539,39 +1650,44 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - int ret = 0; - int vcpu_id; - - if (unlikely(!vgic_initialized(kvm))) { - /* - * We only provide the automatic initialization of the VGIC - * for the legacy case of a GICv2. Any other type must - * be explicitly initialized once setup with the respective - * KVM device call. - */ - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) { - ret = -EBUSY; - goto out; - } - mutex_lock(&kvm->lock); - ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + struct irq_phys_map *map; + int ret; - if (ret) - goto out; - } + ret = vgic_lazy_init(kvm); + if (ret) + return ret; - if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) + map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num); + if (map) return -EINVAL; - vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); - if (vcpu_id >= 0) { - /* kick the specified vcpu */ - kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); - } + return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); +} -out: - return ret; +/** + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @map: Pointer to a irq_phys_map structure describing the mapping + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * + * The GIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, + struct irq_phys_map *map, bool level) +{ + int ret; + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + + return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -1585,6 +1701,164 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, + int virt_irq) +{ + if (virt_irq < VGIC_NR_PRIVATE_IRQS) + return &vcpu->arch.vgic_cpu.irq_phys_map_list; + else + return &vcpu->kvm->arch.vgic.irq_phys_map_list; +} + +/** + * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ + * @vcpu: The VCPU pointer + * @virt_irq: The virtual irq number + * @irq: The Linux IRQ number + * + * Establish a mapping between a guest visible irq (@virt_irq) and a + * Linux irq (@irq). On injection, @virt_irq will be associated with + * the physical interrupt represented by @irq. This mapping can be + * established multiple times as long as the parameters are the same. + * + * Returns a valid pointer on success, and an error pointer otherwise + */ +struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, + int virt_irq, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); + struct irq_phys_map *map; + struct irq_phys_map_entry *entry; + struct irq_desc *desc; + struct irq_data *data; + int phys_irq; + + desc = irq_to_desc(irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return ERR_PTR(-EINVAL); + } + + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + phys_irq = data->hwirq; + + /* Create a new mapping */ + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + spin_lock(&dist->irq_phys_map_lock); + + /* Try to match an existing mapping */ + map = vgic_irq_map_search(vcpu, virt_irq); + if (map) { + /* Make sure this mapping matches */ + if (map->phys_irq != phys_irq || + map->irq != irq) + map = ERR_PTR(-EINVAL); + + /* Found an existing, valid mapping */ + goto out; + } + + map = &entry->map; + map->virt_irq = virt_irq; + map->phys_irq = phys_irq; + map->irq = irq; + + list_add_tail_rcu(&entry->entry, root); + +out: + spin_unlock(&dist->irq_phys_map_lock); + /* If we've found a hit in the existing list, free the useless + * entry */ + if (IS_ERR(map) || map != &entry->map) + kfree(entry); + return map; +} + +static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, + int virt_irq) +{ + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); + struct irq_phys_map_entry *entry; + struct irq_phys_map *map; + + rcu_read_lock(); + + list_for_each_entry_rcu(entry, root, entry) { + map = &entry->map; + if (map->virt_irq == virt_irq) { + rcu_read_unlock(); + return map; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) +{ + struct irq_phys_map_entry *entry; + + entry = container_of(rcu, struct irq_phys_map_entry, rcu); + kfree(entry); +} + +/** + * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping + * @vcpu: The VCPU pointer + * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq + * + * Remove an existing mapping between virtual and physical interrupts. + */ +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct irq_phys_map_entry *entry; + struct list_head *root; + + if (!map) + return -EINVAL; + + root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq); + + spin_lock(&dist->irq_phys_map_lock); + + list_for_each_entry(entry, root, entry) { + if (&entry->map == map) { + list_del_rcu(&entry->entry); + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); + break; + } + } + + spin_unlock(&dist->irq_phys_map_lock); + + return 0; +} + +static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct irq_phys_map_entry *entry; + + spin_lock(&dist->irq_phys_map_lock); + + list_for_each_entry(entry, root, entry) { + list_del_rcu(&entry->entry); + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); + } + + spin_unlock(&dist->irq_phys_map_lock); +} + void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1592,33 +1866,28 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->pending_shared); kfree(vgic_cpu->active_shared); kfree(vgic_cpu->pend_act_shared); - kfree(vgic_cpu->vgic_irq_lr_map); + vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); vgic_cpu->pending_shared = NULL; vgic_cpu->active_shared = NULL; vgic_cpu->pend_act_shared = NULL; - vgic_cpu->vgic_irq_lr_map = NULL; } static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + int sz = nr_longs * sizeof(unsigned long); vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared - || !vgic_cpu->vgic_irq_lr_map) { + || !vgic_cpu->pend_act_shared) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); - /* * Store the number of LRs per vcpu, so we don't have to go * all the way to the distributor structure to find out. Only @@ -1629,6 +1898,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) return 0; } +/** + * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage + * + * No memory allocation should be performed here, only static init. + */ +void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list); +} + /** * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW * @@ -1666,6 +1946,7 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); kfree(dist->irq_active_on_cpu); + vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; @@ -1748,14 +2029,24 @@ int vgic_init(struct kvm *kvm) break; } - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) + /* + * Enable and configure all SGIs to be edge-triggere and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + if (i < VGIC_NR_SGIS) { + /* SGIs */ vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) vgic_bitmap_set_irq_val(&dist->irq_cfg, vcpu->vcpu_id, i, VGIC_CFG_EDGE); + } else if (i < VGIC_NR_PRIVATE_IRQS) { + /* PPIs */ + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_LEVEL); + } } vgic_enable(vcpu); @@ -1774,7 +2065,7 @@ static int init_vgic_model(struct kvm *kvm, int type) case KVM_DEV_TYPE_ARM_VGIC_V2: vgic_v2_init_emulation(kvm); break; -#ifdef CONFIG_ARM_GIC_V3 +#ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_DEV_TYPE_ARM_VGIC_V3: vgic_v3_init_emulation(kvm); break; @@ -1789,6 +2080,18 @@ static int init_vgic_model(struct kvm *kvm, int type) return 0; } +/** + * kvm_vgic_early_init - Earliest possible vgic initialization stage + * + * No memory allocation should be performed here, only static init. + */ +void kvm_vgic_early_init(struct kvm *kvm) +{ + spin_lock_init(&kvm->arch.vgic.lock); + spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock); + INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list); +} + int kvm_vgic_create(struct kvm *kvm, u32 type) { int i, vcpu_lock_idx = -1, ret; @@ -1834,7 +2137,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) if (ret) goto out_unlock; - spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; @@ -1925,7 +2227,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) block_size = KVM_VGIC_V2_CPU_SIZE; alignment = SZ_4K; break; -#ifdef CONFIG_ARM_GIC_V3 +#ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_VGIC_V3_ADDR_TYPE_DIST: type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; addr_ptr = &vgic->vgic_dist_base; @@ -2128,9 +2430,6 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - /* Callback into for arch code for setup */ - vgic_arch_setup(vgic); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); return 0; -- cgit 1.2.3-korg