Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2019-10-02 14:46:53 +0200
committerMichal Suchanek <msuchanek@suse.de>2019-10-02 21:10:16 +0200
commitd7b6f7c2e5a4d50ac4241f891982cc337b60aab2 (patch)
tree7d286c950d59ec60e9e470300b68bf99160e7196
parent9a6b5c8544705010d6153422ac93d11cbc51b4ac (diff)
powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss
problem with THP (bsc#1152161 ltc#181664).
-rw-r--r--patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch201
-rw-r--r--series.conf1
2 files changed, 202 insertions, 0 deletions
diff --git a/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch b/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
new file mode 100644
index 0000000000..0d260438fe
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
@@ -0,0 +1,201 @@
+From 02390f66bd2362df114a0a0770d80ec33061f6d1 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Fri, 15 Jun 2018 11:38:37 +1000
+Subject: [PATCH] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss
+ problem with THP
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.18-rc2
+Git-commit: 02390f66bd2362df114a0a0770d80ec33061f6d1
+
+The patch 99baac21e4 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss
+problem") added a force flush mode to the mmu_gather flush, which
+unconditionally flushes the entire address range being invalidated
+(even if actual ptes only covered a smaller range), to solve a problem
+with concurrent threads invalidating the same PTEs causing them to
+miss TLBs that need flushing.
+
+This does not work with powerpc that invalidates mmu_gather batches
+according to page size. Have powerpc flush all possible page sizes in
+the range if it encounters this concurrency condition.
+
+Patch 4647706ebe ("mm: always flush VMA ranges affected by
+zap_page_range") does add a TLB flush for all page sizes on powerpc for
+the zap_page_range case, but that is to be removed and replaced with
+the mmu_gather flush to avoid redundant flushing. It is also thought to
+not cover other obscure race conditions:
+
+https://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
+
+Hash does not have a problem because it invalidates TLBs inside the
+page table locks.
+
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 96 +++++++++++++++++++++++++++++--------
+ 1 file changed, 75 insertions(+), 21 deletions(-)
+
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -445,22 +445,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_ra
+ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
+-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+- unsigned long end)
++static inline void __radix__flush_tlb_range(struct mm_struct *mm,
++ unsigned long start, unsigned long end,
++ bool flush_all_sizes)
+
+ {
+- struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+-#ifdef CONFIG_HUGETLB_PAGE
+- if (is_vm_hugetlb_page(vma))
+- return radix__flush_hugetlb_tlb_range(vma, start, end);
+-#endif
+-
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+@@ -486,37 +481,64 @@ void radix__flush_tlb_range(struct vm_ar
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ }
+ } else {
+- bool hflush = false;
++ bool hflush = flush_all_sizes;
++ bool gflush = flush_all_sizes;
+ unsigned long hstart, hend;
++ unsigned long gstart, gend;
+
+-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+- hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+- hend = end >> HPAGE_PMD_SHIFT;
+- if (hstart < hend) {
+- hstart <<= HPAGE_PMD_SHIFT;
+- hend <<= HPAGE_PMD_SHIFT;
++ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ hflush = true;
++
++ if (hflush) {
++ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
++ hend = end & PMD_MASK;
++ if (hstart == hend)
++ hflush = false;
++ }
++
++ if (gflush) {
++ gstart = (start + PUD_SIZE - 1) & PUD_MASK;
++ gend = end & PUD_MASK;
++ if (gstart == gend)
++ gflush = false;
+ }
+-#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+- HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ PMD_SIZE, MMU_PAGE_2M);
++ if (gflush)
++ __tlbiel_va_range(gstart, gend, pid,
++ PUD_SIZE, MMU_PAGE_1G);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+- HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ PMD_SIZE, MMU_PAGE_2M);
++ if (gflush)
++ __tlbie_va_range(gstart, gend, pid,
++ PUD_SIZE, MMU_PAGE_1G);
+ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
+ }
++
++void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
++ unsigned long end)
++
++{
++#ifdef CONFIG_HUGETLB_PAGE
++ if (is_vm_hugetlb_page(vma))
++ return radix__flush_hugetlb_tlb_range(vma, start, end);
++#endif
++
++ __radix__flush_tlb_range(vma->vm_mm, start, end, false);
++}
+ EXPORT_SYMBOL(radix__flush_tlb_range);
+
+ static int radix_get_mmu_psize(int page_size)
+@@ -542,6 +564,8 @@ void radix__tlb_flush(struct mmu_gather
+ int psize = 0;
+ struct mm_struct *mm = tlb->mm;
+ int page_size = tlb->page_size;
++ unsigned long start = tlb->start;
++ unsigned long end = tlb->end;
+
+ /*
+ * if page size is not something we understand, do a full mm flush
+@@ -552,15 +576,45 @@ void radix__tlb_flush(struct mmu_gather
+ */
+ if (tlb->fullmm) {
+ radix__flush_all_mm(mm);
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
++ } else if (mm_tlb_flush_nested(mm)) {
++ /*
++ * If there is a concurrent invalidation that is clearing ptes,
++ * then it's possible this invalidation will miss one of those
++ * cleared ptes and miss flushing the TLB. If this invalidate
++ * returns before the other one flushes TLBs, that can result
++ * in it returning while there are still valid TLBs inside the
++ * range to be invalidated.
++ *
++ * See mm/memory.c:tlb_finish_mmu() for more details.
++ *
++ * The solution to this is ensure the entire range is always
++ * flushed here. The problem for powerpc is that the flushes
++ * are page size specific, so this "forced flush" would not
++ * do the right thing if there are a mix of page sizes in
++ * the range to be invalidated. So use __flush_tlb_range
++ * which invalidates all possible page sizes in the range.
++ *
++ * PWC flush probably is not be required because the core code
++ * shouldn't free page tables in this path, but accounting
++ * for the possibility makes us a bit more robust.
++ *
++ * need_flush_all is an uncommon case because page table
++ * teardown should be done with exclusive locks held (but
++ * after locks are dropped another invalidate could come
++ * in), it could be optimized further if necessary.
++ */
++ if (!tlb->need_flush_all)
++ __radix__flush_tlb_range(mm, start, end, true);
++ else
++ radix__flush_all_mm(mm);
++#endif
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+- unsigned long start = tlb->start;
+- unsigned long end = tlb->end;
+-
+ if (!tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
diff --git a/series.conf b/series.conf
index 45d948f36d..4b597e036a 100644
--- a/series.conf
+++ b/series.conf
@@ -17876,6 +17876,7 @@
patches.suse/0017-arm64-dma-mapping-clear-buffers-allocated-with-FORCE.patch
patches.suse/0001-arm64-kpti-Use-early_param-for-kpti-command-line-opt.patch
patches.suse/0020-arm64-mm-Ensure-writes-to-swapper-are-ordered-wrt-su.patch
+ patches.suse/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-.patch
patches.suse/powerpc-64s-Fix-DT-CPU-features-Power9-DD2.1-logic.patch
patches.suse/block-fix-timeout-changes-for-legacy-request-drivers.patch
patches.suse/0001-block-sed-opal-Fix-a-couple-off-by-one-bugs.patch