Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 13:27:38 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:44 +0100
commit3d1fd5d32e9fc106ea96f588c394f456ecf78e55 (patch)
tree9e248e79f8748351b9d99df9db5b37972db40692
parent3b76f4a63266a541ee91f2de87e15573a48affad (diff)
KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space
(bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch256
-rw-r--r--series.conf1
2 files changed, 257 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch b/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
new file mode 100644
index 0000000000..01eda55f06
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
@@ -0,0 +1,256 @@
+From 1e175d2e07c71d9574f5b1c74523abca54e2654f Mon Sep 17 00:00:00 2001
+From: Sam Bobroff <sam.bobroff@au1.ibm.com>
+Date: Wed, 25 Jul 2018 16:12:02 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID
+ space
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc1
+Git-commit: 1e175d2e07c71d9574f5b1c74523abca54e2654f
+
+It is not currently possible to create the full number of possible
+VCPUs (KVM_MAX_VCPUS) on Power9 with KVM-HV when the guest uses fewer
+threads per core than its core stride (or "VSMT mode"). This is
+because the VCORE ID and XIVE offsets grow beyond KVM_MAX_VCPUS
+even though the VCPU ID is less than KVM_MAX_VCPU_ID.
+
+To address this, "pack" the VCORE ID and XIVE offsets by using
+knowledge of the way the VCPU IDs will be used when there are fewer
+guest threads per core than the core stride. The primary thread of
+each core will always be used first. Then, if the guest uses more than
+one thread per core, these secondary threads will sequentially follow
+the primary in each core.
+
+So, the only way an ID above KVM_MAX_VCPUS can be seen, is if the
+VCPUs are being spaced apart, so at least half of each core is empty,
+and IDs between KVM_MAX_VCPUS and (KVM_MAX_VCPUS * 2) can be mapped
+into the second half of each core (4..7, in an 8-thread core).
+
+Similarly, if IDs above KVM_MAX_VCPUS * 2 are seen, at least 3/4 of
+each core is being left empty, and we can map down into the second and
+third quarters of each core (2, 3 and 5, 6 in an 8-thread core).
+
+Lastly, if IDs above KVM_MAX_VCPUS * 4 are seen, only the primary
+threads are being used and 7/8 of the core is empty, allowing use of
+the 1, 5, 3 and 7 thread slots.
+
+(Strides less than 8 are handled similarly.)
+
+This allows the VCORE ID or offset to be calculated quickly from the
+VCPU ID or XIVE server numbers, without access to the VCPU structure.
+
+[paulus@ozlabs.org - tidied up comment a little, changed some WARN_ONCE
+ to pr_devel, wrapped line, fixed id check.]
+
+Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/include/asm/kvm_book3s.h | 47 +++++++++++++++++++++++++++++++++++
+ arch/powerpc/kvm/book3s_hv.c | 27 +++++++++++++++-----
+ arch/powerpc/kvm/book3s_xive.c | 19 ++++++++------
+ 3 files changed, 80 insertions(+), 13 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
+index 1f345a0b6ba2..83a9aa3cf689 100644
+--- a/arch/powerpc/include/asm/kvm_book3s.h
++++ b/arch/powerpc/include/asm/kvm_book3s.h
+@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+ #define SPLIT_HACK_MASK 0xff000000
+ #define SPLIT_HACK_OFFS 0xfb000000
+
++/*
++ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
++ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
++ * (but not its actual threading mode, which is not available) to avoid
++ * collisions.
++ *
++ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
++ * 0) unchanged: if the guest is filling each VCORE completely then it will be
++ * using consecutive IDs and it will fill the space without any packing.
++ *
++ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
++ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
++ * added to avoid collisions.
++ *
++ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
++ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
++ * can be safely packed into the second half of each VCORE by adding an offset
++ * of (stride / 2).
++ *
++ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
++ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
++ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
++ *
++ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
++ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
++ * must be free to use.
++ *
++ * (The offsets for each block are stored in block_offsets[], indexed by the
++ * block number if the stride is 8. For cases where the guest's stride is less
++ * than 8, we can re-use the block_offsets array by multiplying the block
++ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
++ */
++static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
++{
++ const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
++ int stride = kvm->arch.emul_smt_mode;
++ int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
++ u32 packed_id;
++
++ if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
++ return 0;
++ packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
++ if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
++ return 0;
++ return packed_id;
++}
++
+ #endif /* __ASM_KVM_BOOK3S_H__ */
+diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
+index d73b29b6aaa1..785245e09f32 100644
+--- a/arch/powerpc/kvm/book3s_hv.c
++++ b/arch/powerpc/kvm/book3s_hv.c
+@@ -1816,7 +1816,7 @@ static int threads_per_vcore(struct kvm *kvm)
+ return threads_per_subcore;
+ }
+
+-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
++static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
+ {
+ struct kvmppc_vcore *vcore;
+
+@@ -1830,7 +1830,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+ init_swait_queue_head(&vcore->wq);
+ vcore->preempt_tb = TB_NIL;
+ vcore->lpcr = kvm->arch.lpcr;
+- vcore->first_vcpuid = core * kvm->arch.smt_mode;
++ vcore->first_vcpuid = id;
+ vcore->kvm = kvm;
+ INIT_LIST_HEAD(&vcore->preempt_list);
+
+@@ -1989,10 +1989,16 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ unsigned int id)
+ {
+ struct kvm_vcpu *vcpu;
+- int err;
++ int err = -EINVAL;
+ int core;
+ struct kvmppc_vcore *vcore;
+
++ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode) &&
++ cpu_has_feature(CPU_FTR_ARCH_300)) {
++ pr_devel("DNCI: VCPU ID too high\n");
++ goto out;
++ }
++
+ err = -ENOMEM;
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+@@ -2048,12 +2054,21 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+ mutex_lock(&kvm->lock);
+ vcore = NULL;
+ err = -EINVAL;
+- core = id / kvm->arch.smt_mode;
++ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
++ BUG_ON(kvm->arch.smt_mode != 1);
++ core = kvmppc_pack_vcpu_id(kvm, id);
++ } else {
++ core = id / kvm->arch.smt_mode;
++ }
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+- if (!vcore) {
++ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
++ pr_devel("KVM: collision on id %u", id);
++ vcore = NULL;
++ } else if (!vcore) {
+ err = -ENOMEM;
+- vcore = kvmppc_vcore_create(kvm, core);
++ vcore = kvmppc_vcore_create(kvm,
++ id & ~(kvm->arch.smt_mode - 1));
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
+diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
+index f9818d7d3381..126f02b3ffb8 100644
+--- a/arch/powerpc/kvm/book3s_xive.c
++++ b/arch/powerpc/kvm/book3s_xive.c
+@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+ return -EBUSY;
+ }
+
++static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
++{
++ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
++}
++
+ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state)
+@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+ */
+ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+ xive_native_configure_irq(hw_num,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ MASKED, state->number);
+ /* set old_p so we can track if an H_EOI was done */
+ state->old_p = true;
+@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
+ */
+ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
+ xive_native_configure_irq(hw_num,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+ /* If an EOI is needed, do it here */
+ if (!state->old_p)
+@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ return xive_native_configure_irq(hw_num,
+- xive->vp_base + server,
++ xive_vp(xive, server),
+ prio, state->number);
+ }
+
+@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ * which is fine for a never started interrupt.
+ */
+ xive_native_configure_irq(hw_irq,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+
+ /* Reconfigure the IPI */
+ xive_native_configure_irq(state->ipi_number,
+- xive->vp_base + state->act_server,
++ xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+- if (cpu >= KVM_MAX_VCPUS) {
++ if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = cpu;
+- xc->vp_id = xive->vp_base + cpu;
++ xc->vp_id = xive_vp(xive, cpu);
+ xc->mfrr = 0xff;
+ xc->valid = true;
+
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index b25239dd0b..e745c1d98c 100644
--- a/series.conf
+++ b/series.conf
@@ -17781,6 +17781,7 @@
patches.arch/KVM-PPC-Book3S-Fix-matching-of-hardware-and-emulated.patch
patches.arch/KVM-PPC-Book3S-HV-Add-of_node_put-in-success-path.patch
patches.arch/KVM-PPC-Book3S-HV-Fix-constant-size-warning.patch
+ patches.arch/KVM-PPC-Book3S-HV-Pack-VCORE-IDs-to-access-full-VCPU.patch
patches.fixes/kvm-s390-add-etoken-support-for-guests.patch
patches.arch/kvm-nvmx-fix-fault-vector-for-vmx-operation-at-cpl-0
patches.arch/kvm-vmx-track-host_state-loaded-using-a-loaded_vmcs-pointer