Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2018-10-31 12:36:54 +0100
committerMichal Suchanek <msuchanek@suse.de>2018-10-31 23:28:41 +0100
commit039e0a5ff35d545a815b8a32d4a682b5944b473e (patch)
tree97f004be58c93afbb354629cbdd51f4c7aa119fc
parent2e7f8155287ebd6b9d3e5b36e2bad85e6ba21ede (diff)
KVM: PPC: Book3S HV: Recursively unmap all page table entries
when unmapping (bsc#1061840).
-rw-r--r--patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch280
-rw-r--r--series.conf1
2 files changed, 281 insertions, 0 deletions
diff --git a/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch b/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
new file mode 100644
index 0000000000..eeb7aaa9d1
--- /dev/null
+++ b/patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
@@ -0,0 +1,280 @@
+From a5704e83aa3d672327409509b2d1bff2def72966 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 17 May 2018 17:06:27 +1000
+Subject: [PATCH] KVM: PPC: Book3S HV: Recursively unmap all page table entries
+ when unmapping
+
+References: bsc#1061840
+Patch-mainline: v4.18-rc1
+Git-commit: a5704e83aa3d672327409509b2d1bff2def72966
+
+When partition scope mappings are unmapped with kvm_unmap_radix, the
+pte is cleared, but the page table structure is left in place. If the
+next page fault requests a different page table geometry (e.g., due to
+THP promotion or split), kvmppc_create_pte is responsible for changing
+the page tables.
+
+When a page table entry is to be converted to a large pte, the page
+table entry is cleared, the PWC flushed, then the page table it points
+to freed. This will cause pte page tables to leak when a 1GB page is
+to replace a pud entry points to a pmd table with pte tables under it:
+The pmd table will be freed, but its pte tables will be missed.
+
+Fix this by replacing the simple clear and free code with one that
+walks down the page tables and frees children. Care must be taken to
+clear the root entry being unmapped then flushing the PWC before
+freeing any page tables, as explained in comments.
+
+This requires PWC flush to logically become a flush-all-PWC (which it
+already is in hardware, but the KVM API needs to be changed to avoid
+confusion).
+
+This code also checks that no unexpected pte entries exist in any page
+table being freed, and unmaps those and emits a WARN. This is an
+expensive operation for the pte page level, but partition scope
+changes are rare, so it's unconditional for now to iron out bugs. It
+can be put under a CONFIG option or removed after some time.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/kvm/book3s_64_mmu_radix.c | 192 +++++++++++++++++++++++----------
+ 1 file changed, 138 insertions(+), 54 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+index 2c49b31ec7fb..e514370ab5ae 100644
+--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
++++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
+@@ -165,7 +165,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
+ }
+
+-static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
++static void kvmppc_radix_flush_pwc(struct kvm *kvm)
+ {
+ unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+
+@@ -247,6 +247,139 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+ }
+ }
+
++/*
++ * kvmppc_free_p?d are used to free existing page tables, and recursively
++ * descend and clear and free children.
++ * Callers are responsible for flushing the PWC.
++ *
++ * When page tables are being unmapped/freed as part of page fault path
++ * (full == false), ptes are not expected. There is code to unmap them
++ * and emit a warning if encountered, but there may already be data
++ * corruption due to the unexpected mappings.
++ */
++static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
++{
++ if (full) {
++ memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
++ } else {
++ pte_t *p = pte;
++ unsigned long it;
++
++ for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
++ if (pte_val(*p) == 0)
++ continue;
++ WARN_ON_ONCE(1);
++ kvmppc_unmap_pte(kvm, p,
++ pte_pfn(*p) << PAGE_SHIFT,
++ PAGE_SHIFT);
++ }
++ }
++
++ kvmppc_pte_free(pte);
++}
++
++static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
++{
++ unsigned long im;
++ pmd_t *p = pmd;
++
++ for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
++ if (!pmd_present(*p))
++ continue;
++ if (pmd_is_leaf(*p)) {
++ if (full) {
++ pmd_clear(p);
++ } else {
++ WARN_ON_ONCE(1);
++ kvmppc_unmap_pte(kvm, (pte_t *)p,
++ pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
++ PMD_SHIFT);
++ }
++ } else {
++ pte_t *pte;
++
++ pte = pte_offset_map(p, 0);
++ kvmppc_unmap_free_pte(kvm, pte, full);
++ pmd_clear(p);
++ }
++ }
++ kvmppc_pmd_free(pmd);
++}
++
++static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
++{
++ unsigned long iu;
++ pud_t *p = pud;
++
++ for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
++ if (!pud_present(*p))
++ continue;
++ if (pud_huge(*p)) {
++ pud_clear(p);
++ } else {
++ pmd_t *pmd;
++
++ pmd = pmd_offset(p, 0);
++ kvmppc_unmap_free_pmd(kvm, pmd, true);
++ pud_clear(p);
++ }
++ }
++ pud_free(kvm->mm, pud);
++}
++
++void kvmppc_free_radix(struct kvm *kvm)
++{
++ unsigned long ig;
++ pgd_t *pgd;
++
++ if (!kvm->arch.pgtable)
++ return;
++ pgd = kvm->arch.pgtable;
++ for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
++ pud_t *pud;
++
++ if (!pgd_present(*pgd))
++ continue;
++ pud = pud_offset(pgd, 0);
++ kvmppc_unmap_free_pud(kvm, pud);
++ pgd_clear(pgd);
++ }
++ pgd_free(kvm->mm, kvm->arch.pgtable);
++ kvm->arch.pgtable = NULL;
++}
++
++static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
++ unsigned long gpa)
++{
++ pte_t *pte = pte_offset_kernel(pmd, 0);
++
++ /*
++ * Clearing the pmd entry then flushing the PWC ensures that the pte
++ * page no longer be cached by the MMU, so can be freed without
++ * flushing the PWC again.
++ */
++ pmd_clear(pmd);
++ kvmppc_radix_flush_pwc(kvm);
++
++ kvmppc_unmap_free_pte(kvm, pte, false);
++}
++
++static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
++ unsigned long gpa)
++{
++ pmd_t *pmd = pmd_offset(pud, 0);
++
++ /*
++ * Clearing the pud entry then flushing the PWC ensures that the pmd
++ * page and any children pte pages will no longer be cached by the MMU,
++ * so can be freed without flushing the PWC again.
++ */
++ pud_clear(pud);
++ kvmppc_radix_flush_pwc(kvm);
++
++ kvmppc_unmap_free_pmd(kvm, pmd, false);
++}
++
+ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ unsigned int level, unsigned long mmu_seq)
+ {
+@@ -312,11 +445,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+- * table page. new_pmd will be NULL since level == 2.
++ * table page.
+ */
+- new_pmd = pmd_offset(pud, 0);
+- pud_clear(pud);
+- kvmppc_radix_flush_pwc(kvm, gpa);
++ kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+ ret = 0;
+@@ -353,11 +484,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+- * table page. new_ptep will be NULL since level == 1.
++ * table page.
+ */
+- new_ptep = pte_offset_kernel(pmd, 0);
+- pmd_clear(pmd);
+- kvmppc_radix_flush_pwc(kvm, gpa);
++ kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+ ret = 0;
+@@ -734,51 +863,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm)
+ return 0;
+ }
+
+-void kvmppc_free_radix(struct kvm *kvm)
+-{
+- unsigned long ig, iu, im;
+- pte_t *pte;
+- pmd_t *pmd;
+- pud_t *pud;
+- pgd_t *pgd;
+-
+- if (!kvm->arch.pgtable)
+- return;
+- pgd = kvm->arch.pgtable;
+- for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+- if (!pgd_present(*pgd))
+- continue;
+- pud = pud_offset(pgd, 0);
+- for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
+- if (!pud_present(*pud))
+- continue;
+- if (pud_huge(*pud)) {
+- pud_clear(pud);
+- continue;
+- }
+- pmd = pmd_offset(pud, 0);
+- for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
+- if (pmd_is_leaf(*pmd)) {
+- pmd_clear(pmd);
+- continue;
+- }
+- if (!pmd_present(*pmd))
+- continue;
+- pte = pte_offset_map(pmd, 0);
+- memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+- kvmppc_pte_free(pte);
+- pmd_clear(pmd);
+- }
+- kvmppc_pmd_free(pmd_offset(pud, 0));
+- pud_clear(pud);
+- }
+- pud_free(kvm->mm, pud_offset(pgd, 0));
+- pgd_clear(pgd);
+- }
+- pgd_free(kvm->mm, kvm->arch.pgtable);
+- kvm->arch.pgtable = NULL;
+-}
+-
+ static void pte_ctor(void *addr)
+ {
+ memset(addr, 0, RADIX_PTE_TABLE_SIZE);
+--
+2.13.7
+
diff --git a/series.conf b/series.conf
index 343f90eff8..6fd9b3ebd0 100644
--- a/series.conf
+++ b/series.conf
@@ -16729,6 +16729,7 @@
patches.arch/KVM-PPC-Fix-a-mmio_host_swabbed-uninitialized-usage-.patch
patches.arch/KVM-PPC-Book3S-HV-Lockless-tlbie-for-HPT-hcalls.patch
patches.arch/KVM-PPC-Book3S-HV-Use-a-helper-to-unmap-ptes-in-the-.patch
+ patches.arch/KVM-PPC-Book3S-HV-Recursively-unmap-all-page-table-e.patch
patches.suse/ipv6-allow-PMTU-exceptions-to-local-routes.patch
patches.suse/net-dsa-add-error-handling-for-pskb_trim_rcsum.patch
patches.drivers/ixgbe-Fix-setting-of-TC-configuration-for-macvlan-ca.patch