Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 13:27:38 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:44 +0100
commit31d2e9ce17198ad8bdda083c3d0ed013b89d905a (patch)
treea75f3753df05f45fb0a253e18db5445927de3919
parenta02fa4e3650b80126fe23c1e24bf047c85f03e97 (diff)
KVM: PPC: Book3S HV: Don't use compound_order to determine
host mapping size (bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch158
-rw-r--r--series.conf1
2 files changed, 159 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
new file mode 100644
index 0000000000..d5c7ee3d81
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
@@ -0,0 +1,158 @@
+From 71d29f43b6332badc5598c656616a62575e83342 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 11 Sep 2018 20:48:34 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Don't use compound_order to determine
+ host mapping size
+
+References: bsc#1061840
+Patch-mainline: v4.19-rc5
+Git-commit: 71d29f43b6332badc5598c656616a62575e83342
+
+THP paths can defer splitting compound pages until after the actual
+remap and TLB flushes to split a huge PMD/PUD. This causes radix
+partition scope page table mappings to get out of synch with the host
+qemu page table mappings.
+
+This results in random memory corruption in the guest when running
+with THP. The easiest way to reproduce is use KVM balloon to free up
+a lot of memory in the guest and then shrink the balloon to give the
+memory back, while some work is being done in the guest.
+
+Cc: David Gibson <david@gibson.dropbear.id.au>
+Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
+Cc: kvm-ppc@vger.kernel.org
+Cc: linuxppc-dev@lists.ozlabs.org
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 91 ++++++++++++++--------------------
+ 1 file changed, 37 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index fd6e8c13685f..933c574e1cf7 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+ {
+ struct kvm *kvm = vcpu->kvm;
+- unsigned long mmu_seq, pte_size;
+- unsigned long gpa, gfn, hva, pfn;
++ unsigned long mmu_seq;
++ unsigned long gpa, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ struct page *page = NULL;
+ long ret;
+@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ */
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+- pfn = page_to_pfn(page);
+ upgrade_write = true;
+ } else {
++ unsigned long pfn;
++
+ /* Call KVM generic code to do the slow-path check */
+ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+ writing, upgrade_p);
+@@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ }
+ }
+
+- /* See if we can insert a 1GB or 2MB large PTE here */
+- level = 0;
+- if (page && PageCompound(page)) {
+- pte_size = PAGE_SIZE << compound_order(compound_head(page));
+- if (pte_size >= PUD_SIZE &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+- } else if (pte_size >= PMD_SIZE &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+- }
+- }
+-
+ /*
+- * Compute the PTE value that we need to insert.
++ * Read the PTE from the process' radix tree and use that
++ * so we get the shift and attribute bits.
+ */
+- if (page) {
+- pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
+- _PAGE_ACCESSED;
+- if (writing || upgrade_write)
+- pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
+- pte = pfn_pte(pfn, __pgprot(pgflags));
++ local_irq_disable();
++ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
++ pte = *ptep;
++ local_irq_enable();
++
++ /* Get pte level from shift/size */
++ if (shift == PUD_SHIFT &&
++ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
++ (hva & (PUD_SIZE - PAGE_SIZE))) {
++ level = 2;
++ } else if (shift == PMD_SHIFT &&
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
++ level = 1;
+ } else {
+- /*
+- * Read the PTE from the process' radix tree and use that
+- * so we get the attribute bits.
+- */
+- local_irq_disable();
+- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+- pte = *ptep;
+- local_irq_enable();
+- if (shift == PUD_SHIFT &&
+- (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+- (hva & (PUD_SIZE - PAGE_SIZE))) {
+- level = 2;
+- } else if (shift == PMD_SHIFT &&
+- (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+- (hva & (PMD_SIZE - PAGE_SIZE))) {
+- level = 1;
+- } else if (shift && shift != PAGE_SHIFT) {
+- /* Adjust PFN */
+- unsigned long mask = (1ul << shift) - PAGE_SIZE;
+- pte = __pte(pte_val(pte) | (hva & mask));
+- }
+- pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+- if (writing || upgrade_write) {
+- if (pte_val(pte) & _PAGE_WRITE)
+- pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+- } else {
+- pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
++ level = 0;
++ if (shift > PAGE_SHIFT) {
++ /*
++ * If the pte maps more than one page, bring over
++ * bits from the virtual address to get the real
++ * address of the specific single page we want.
++ */
++ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
++ pte = __pte(pte_val(pte) | (hva & rpnmask));
+ }
+ }
+
++ pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
++ if (writing || upgrade_write) {
++ if (pte_val(pte) & _PAGE_WRITE)
++ pte = __pte(pte_val(pte) | _PAGE_DIRTY);
++ } else {
++ pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
++ }
++
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index 6ad01abf75..11e1c5c59b 100644
--- a/series.conf
+++ b/series.conf
@@ -18123,6 +18123,7 @@
patches.fixes/ubifs-Check-for-name-being-NULL-while-mounting.patch
patches.fixes/Revert-ubifs-xattr-Don-t-operate-on-deleted-inodes.patch
patches.arch/KVM-PPC-Avoid-marking-DMA-mapped-pages-dirty-in-real.patch
+ patches.arch/KVM-PPC-Book3S-HV-Don-t-use-compound_order-to-determ.patch
patches.fixes/0001-x86-paravirt-Fix-some-warning-messages.patch
patches.arch/x86-mm-expand-static-page-table-for-fixmap-space
patches.arch/xen-x86-vpmu-zero-struct-pt_regs-before-calling-into-sample-handling-code