Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 12:36:54 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:36 +0100
commit56106d6d95c27b5aeb4cffc0773629bafad7e7d4 (patch)
tree85b0b9280664969b5928cce82d44a5647dd401b1
parentd1b70bf40b9d3efaed1f84d223f39f68bb199620 (diff)
KVM: PPC: Book3S HV: Fix handling of large pages in radix page
fault handler (bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch172
-rw-r--r--series.conf1
2 files changed, 173 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
new file mode 100644
index 0000000000..b59be4dec2
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
@@ -0,0 +1,172 @@
+From c3856aeb29402e94ad9b3879030165cc6a4fdc56 Mon Sep 17 00:00:00 2001
+From: Paul Mackerras <paulus@ozlabs.org>
+Date: Fri, 23 Feb 2018 21:21:12 +1100
+Subject: [PATCH] KVM: PPC: Book3S HV: Fix handling of large pages in radix
+ page fault handler
+
+References: bsc#1061840
+Patch-mainline: v4.16-rc5
+Git-commit: c3856aeb29402e94ad9b3879030165cc6a4fdc56
+
+This fixes several bugs in the radix page fault handler relating to
+the way large pages in the memory backing the guest were handled.
+First, the check for large pages only checked for explicit huge pages
+and missed transparent huge pages. Then the check that the addresses
+(host virtual vs. guest physical) had appropriate alignment was
+wrong, meaning that the code never put a large page in the partition
+scoped radix tree; it was always demoted to a small page.
+
+Fixing this exposed bugs in kvmppc_create_pte(). We were never
+invalidating a 2MB PTE, which meant that if a page was initially
+faulted in without write permission and the guest then attempted
+to store to it, we would never update the PTE to have write permission.
+If we find a valid 2MB PTE in the PMD, we need to clear it and
+do a TLB invalidation before installing either the new 2MB PTE or
+a pointer to a page table page.
+
+This also corrects an assumption that get_user_pages_fast would set
+the _PAGE_DIRTY bit if we are writing, which is not true. Instead we
+mark the page dirty explicitly with set_page_dirty_lock(). This
+also means we don't need the dirty bit set on the host PTE when
+providing write access on a read fault.
+
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 69 +++++++++++++++++++++-------------
+ 1 file changed, 43 insertions(+), 26 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 0c854816e653..5cb4e4687107 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)
+ kmem_cache_free(kvm_pte_cache, ptep);
+ }
+
++/* Like pmd_huge() and pmd_large(), but works regardless of config options */
++static inline int pmd_is_leaf(pmd_t pmd)
++{
++ return !!(pmd_val(pmd) & _PAGE_PTE);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ else
+ new_pmd = pmd_alloc_one(kvm->mm, gpa);
+
+- if (level == 0 && !(pmd && pmd_present(*pmd)))
++ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+ new_ptep = kvmppc_pte_alloc();
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ new_pmd = NULL;
+ }
+ pmd = pmd_offset(pud, gpa);
+- if (pmd_large(*pmd)) {
+- /* Someone else has instantiated a large page here; retry */
+- ret = -EAGAIN;
+- goto out_unlock;
+- }
+- if (level == 1 && !pmd_none(*pmd)) {
++ if (pmd_is_leaf(*pmd)) {
++ unsigned long lgpa = gpa & PMD_MASK;
++
++ /*
++ * If we raced with another CPU which has just put
++ * a 2MB pte in after we saw a pte page, try again.
++ */
++ if (level == 0 && !new_ptep) {
++ ret = -EAGAIN;
++ goto out_unlock;
++ }
++ /* Valid 2MB page here already, remove it */
++ old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
++ ~0UL, 0, lgpa, PMD_SHIFT);
++ kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
++ if (old & _PAGE_DIRTY) {
++ unsigned long gfn = lgpa >> PAGE_SHIFT;
++ struct kvm_memory_slot *memslot;
++ memslot = gfn_to_memslot(kvm, gfn);
++ if (memslot && memslot->dirty_bitmap)
++ kvmppc_update_dirty_map(memslot,
++ gfn, PMD_SIZE);
++ }
++ } else if (level == 1 && !pmd_none(*pmd)) {
+ /*
+ * There's a page table page here, but we wanted
+ * to install a large page. Tell the caller and let
+@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ } else {
+ page = pages[0];
+ pfn = page_to_pfn(page);
+- if (PageHuge(page)) {
+- page = compound_head(page);
+- pte_size <<= compound_order(page);
++ if (PageCompound(page)) {
++ pte_size <<= compound_order(compound_head(page));
+ /* See if we can insert a 2MB large-page PTE here */
+ if (pte_size >= PMD_SIZE &&
+- (gpa & PMD_MASK & PAGE_MASK) ==
+- (hva & PMD_MASK & PAGE_MASK)) {
++ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
++ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
+ }
+ }
+ /* See if we can provide write access */
+ if (writing) {
+- /*
+- * We assume gup_fast has set dirty on the host PTE.
+- */
+ pgflags |= _PAGE_WRITE;
+ } else {
+ local_irq_save(flags);
+ ptep = find_current_mm_pte(current->mm->pgd,
+ hva, NULL, NULL);
+- if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
++ if (ptep && pte_write(*ptep))
+ pgflags |= _PAGE_WRITE;
+ local_irq_restore(flags);
+ }
+@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ pte = pfn_pte(pfn, __pgprot(pgflags));
+ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+ }
+- if (ret == 0 || ret == -EAGAIN)
+- ret = RESUME_GUEST;
+
+ if (page) {
+- /*
+- * We drop pages[0] here, not page because page might
+- * have been set to the head page of a compound, but
+- * we have to drop the reference on the correct tail
+- * page to match the get inside gup()
+- */
+- put_page(pages[0]);
++ if (!ret && (pgflags & _PAGE_WRITE))
++ set_page_dirty_lock(page);
++ put_page(page);
+ }
++
++ if (ret == 0 || ret == -EAGAIN)
++ ret = RESUME_GUEST;
+ return ret;
+ }
+
+@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)
+ continue;
+ pmd = pmd_offset(pud, 0);
+ for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+- if (pmd_huge(*pmd)) {
++ if (pmd_is_leaf(*pmd)) {
+ pmd_clear(pmd);
+ continue;
+ }
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index b332cd0a65..cdb8d940fb 100644
--- a/series.conf
+++ b/series.conf
@@ -13729,6 +13729,7 @@
patches.arch/0001-arm64-mm-fix-thinko-in-non-global-page-table-attribu.patch
patches.arch/0001-arm64-Relax-ARM_SMCCC_ARCH_WORKAROUND_1-discovery.patch
patches.arch/KVM-PPC-Book3S-Fix-compile-error-that-occurs-with-so.patch
+ patches.arch/KVM-PPC-Book3S-HV-Fix-handling-of-large-pages-in-rad.patch
patches.arch/KVM-PPC-Book3S-HV-Fix-guest-time-accounting-with-VIR.patch
patches.drivers/IB-mlx5-Fix-incorrect-size-of-klms-in-the-memory-reg.patch
patches.drivers/IB-mlx5-Avoid-passing-an-invalid-QP-type-to-firmware.patch