Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Suchanek <msuchanek@suse.de>2019-10-02 14:35:36 +0200
committerMichal Suchanek <msuchanek@suse.de>2019-10-02 21:10:15 +0200
commit80c5a9ebd6bcb83d0d084bafa6227630ba397966 (patch)
tree5aa2d3cc8c414c2208c6912c7ef0b7f4b4800404
parentab2f8fa2a1ef238e922f9312aafe3b95bf96bb90 (diff)
powerpc/64s/radix: Optimize flush_tlb_range (bsc#1152161 ltc#181664).
- Refresh patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch. - Refresh patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch. - Refresh patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch.
-rw-r--r--patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch275
-rw-r--r--patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch19
-rw-r--r--patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch27
-rw-r--r--patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch14
-rw-r--r--series.conf1
5 files changed, 314 insertions, 22 deletions
diff --git a/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch b/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
new file mode 100644
index 0000000000..c8ebd7ebe2
--- /dev/null
+++ b/patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
@@ -0,0 +1,275 @@
+From cbf09c837720f72f5e63ab7a2d331ec6cc9a3417 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Tue, 7 Nov 2017 18:53:07 +1100
+Subject: [PATCH] powerpc/64s/radix: Optimize flush_tlb_range
+
+References: bsc#1152161 ltc#181664
+Patch-mainline: v4.15-rc1
+Git-commit: cbf09c837720f72f5e63ab7a2d331ec6cc9a3417
+
+Currently for radix, flush_tlb_range flushes the entire PID, because
+the Linux mm code does not tell us about page size here for THP vs
+regular pages. This is quite sub-optimal for small mremap / mprotect
+/ change_protection.
+
+So implement va range flushes with two flush passes, one for each
+page size (regular and THP). The second flush has an order of matnitude
+fewer tlbie instructions than the first, so it is a relatively small
+additional cost.
+
+There is still room for improvement here with some changes to generic
+APIs, particularly if there are mostly THP pages to be invalidated,
+the small page flushes could be reduced.
+
+Time to mprotect 1 page of memory (after mmap, touch):
+vanilla 2.9us 1.8us
+patched 1.2us 1.6us
+
+Time to mprotect 30 pages of memory (after mmap, touch):
+vanilla 8.2us 7.2us
+patched 6.9us 17.9us
+
+Time to mprotect 34 pages of memory (after mmap, touch):
+vanilla 9.1us 8.0us
+patched 9.0us 8.0us
+
+34 pages is the point at which the invalidation switches from va
+to entire PID, which tlbie can do in a single instruction. This is
+why in the case of 30 pages, the new code runs slower for this test.
+This is a deliberate tradeoff already present in the unmap and THP
+promotion code, the idea is that the benefit from avoiding flushing
+entire TLB for this PID on all threads in the system.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Acked-by: Michal Suchanek <msuchanek@suse.de>
+---
+ arch/powerpc/mm/tlb-radix.c | 138 +++++++++++++++++++++++++++---------
+ 1 file changed, 103 insertions(+), 35 deletions(-)
+
+diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
+index 9916ea2fff43..b4b49de551a9 100644
+--- a/arch/powerpc/mm/tlb-radix.c
++++ b/arch/powerpc/mm/tlb-radix.c
+@@ -100,6 +100,17 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ for (addr = start; addr < end; addr += page_size)
++ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++}
++
+ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+ {
+@@ -114,12 +125,8 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+ {
+- unsigned long addr;
+- unsigned long ap = mmu_get_ap(psize);
+-
+ asm volatile("ptesync": : :"memory");
+- for (addr = start; addr < end; addr += page_size)
+- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbiel_va_range(start, end, pid, page_size, psize);
+ asm volatile("ptesync": : :"memory");
+ }
+
+@@ -139,6 +146,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+ }
+
++static inline void __tlbie_va_range(unsigned long start, unsigned long end,
++ unsigned long pid, unsigned long page_size,
++ unsigned long psize)
++{
++ unsigned long addr;
++ unsigned long ap = mmu_get_ap(psize);
++
++ for (addr = start; addr < end; addr += page_size)
++ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++}
++
+ static inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+ {
+@@ -153,12 +171,8 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+ {
+- unsigned long addr;
+- unsigned long ap = mmu_get_ap(psize);
+-
+ asm volatile("ptesync": : :"memory");
+- for (addr = start; addr < end; addr += page_size)
+- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
++ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+
+@@ -300,17 +314,78 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+
++#define TLB_FLUSH_ALL -1UL
++
+ /*
+- * Currently, for range flushing, we just do a full mm flush. Because
+- * we use this in code path where we don' track the page size.
++ * Number of pages above which we invalidate the entire PID rather than
++ * flush individual pages, for local and global flushes respectively.
++ *
++ * tlbie goes out to the interconnect and individual ops are more costly.
++ * It also does not iterate over sets like the local tlbiel variant when
++ * invalidating a full PID, so it has a far lower threshold to change from
++ * individual page flushes to full-pid flushes.
+ */
++static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
++
+ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+ {
+ struct mm_struct *mm = vma->vm_mm;
++ unsigned long pid;
++ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
++ unsigned long page_size = 1UL << page_shift;
++ unsigned long nr_pages = (end - start) >> page_shift;
++ bool local, full;
++
++#ifdef CONFIG_HUGETLB_PAGE
++ if (is_vm_hugetlb_page(vma))
++ return radix__flush_hugetlb_tlb_range(vma, start, end);
++#endif
+
+- radix__flush_tlb_mm(mm);
++ pid = mm->context.id;
++ if (unlikely(pid == MMU_NO_CONTEXT))
++ return;
++
++ preempt_disable();
++ local = mm_is_thread_local(mm);
++ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++
++ if (full) {
++ if (local)
++ _tlbiel_pid(pid, RIC_FLUSH_TLB);
++ else
++ _tlbie_pid(pid, RIC_FLUSH_TLB);
++ } else {
++ bool hflush = false;
++ unsigned long hstart, hend;
++
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
++ hend = end >> HPAGE_PMD_SHIFT;
++ if (hstart < hend) {
++ hstart <<= HPAGE_PMD_SHIFT;
++ hend <<= HPAGE_PMD_SHIFT;
++ hflush = true;
++ }
++#endif
++
++ asm volatile("ptesync": : :"memory");
++ if (local) {
++ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
++ if (hflush)
++ __tlbiel_va_range(hstart, hend, pid,
++ HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ asm volatile("ptesync": : :"memory");
++ } else {
++ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
++ if (hflush)
++ __tlbie_va_range(hstart, hend, pid,
++ HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ asm volatile("eieio; tlbsync; ptesync": : :"memory");
++ }
++ }
++ preempt_enable();
+ }
+ EXPORT_SYMBOL(radix__flush_tlb_range);
+
+@@ -352,19 +427,14 @@ void radix__tlb_flush(struct mmu_gather *tlb)
+ radix__flush_tlb_mm(mm);
+ }
+
+-#define TLB_FLUSH_ALL -1UL
+-/*
+- * Number of pages above which we will do a bcast tlbie. Just a
+- * number at this point copied from x86
+- */
+-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+-
+ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+ {
+ unsigned long pid;
+- bool local;
+- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
++ unsigned int page_shift = mmu_psize_defs[psize].shift;
++ unsigned long page_size = 1UL << page_shift;
++ unsigned long nr_pages = (end - start) >> page_shift;
++ bool local, full;
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+@@ -372,8 +442,9 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+
+ preempt_disable();
+ local = mm_is_thread_local(mm);
+- if (end == TLB_FLUSH_ALL ||
+- (end - start) > tlb_single_page_flush_ceiling * page_size) {
++ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
++
++ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+@@ -391,7 +462,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ {
+ unsigned long pid, end;
+- bool local;
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+@@ -403,20 +473,18 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+ return;
+ }
+
++ end = addr + HPAGE_PMD_SIZE;
++
++ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+- local = mm_is_thread_local(mm);
+- /* Otherwise first do the PWC */
+- if (local)
+- _tlbiel_pid(pid, RIC_FLUSH_PWC);
+- else
+- _tlbie_pid(pid, RIC_FLUSH_PWC);
+
+- /* Then iterate the pages */
+- end = addr + HPAGE_PMD_SIZE;
+- if (local)
++ if (mm_is_thread_local(mm)) {
++ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+- else
++ } else {
++ _tlbie_pid(pid, RIC_FLUSH_PWC);
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
++ }
+
+ preempt_enable();
+ }
+--
+2.23.0
+
diff --git a/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch b/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
index 9780fe56c2..3c71c5e9b6 100644
--- a/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
+++ b/patches.suse/powerpc-mm-Fixup-tlbie-vs-store-ordering-issue-on-PO.patch
@@ -31,10 +31,7 @@ Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
callers are __radix__flush_tlb_range_psize() (which here
calls __tlbie_va() directly, so add the fixup after that)
and radix__flush_tlb_collapsed_pmd() (in SLES15 it calls
- the wrapper directly, so it's already fixed up).
- - hunk 5: removed -- the modified function radix__flush_tlb_range()
- in SLES15 just redirects to a full flush, which calls the
- _tlbie_pid() wrapper, which is already fixed up.]
+ the wrapper directly, so it's already fixed up).]
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Acked-by: Michal Suchanek <msuchanek@suse.de>
@@ -215,11 +212,19 @@ index 28c980e..adf469f 100644
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-@@ -225,6 +238,7 @@ static inline void _tlbie_va_range(unsig
+@@ -239,6 +252,7 @@ static inline void _tlbie_va_range(unsig
+ {
asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size)
- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+@@ -465,6 +479,7 @@ void radix__flush_tlb_range(struct vm_ar
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
++ fixup_tlbie();
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
diff --git a/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch b/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
index 54eb6ae72f..e0bafd6523 100644
--- a/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
+++ b/patches.suse/powerpc-mm-Workaround-Nest-MMU-bug-with-TLB-invalida.patch
@@ -32,8 +32,7 @@ Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
just calls radix__flush_tlb_mm() which is already
patched; but there is another instance of _tlbie_pid(
(pid, RIC_FLUSH_TLB) in radix__flush_tlb_range_psize(),
- so cover that one).
- - hunk 5: removed -- this function does not exist in SLES 15.]
+ so cover that one).]
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Acked-by: Michal Suchanek <msuchanek@suse.de>
@@ -43,7 +42,7 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
-@@ -288,6 +288,16 @@ void radix__local_flush_tlb_page(struct
+@@ -302,6 +302,16 @@ void radix__local_flush_tlb_page(struct
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
@@ -60,7 +59,7 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
#ifdef CONFIG_SMP
void radix__flush_tlb_mm(struct mm_struct *mm)
{
-@@ -298,9 +308,12 @@ void radix__flush_tlb_mm(struct mm_struc
+@@ -312,9 +322,12 @@ void radix__flush_tlb_mm(struct mm_struc
return;
preempt_disable();
@@ -76,10 +75,10 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
_tlbiel_pid(pid, RIC_FLUSH_TLB);
preempt_enable();
}
-@@ -440,10 +453,14 @@ void radix__flush_tlb_range_psize(struct
- local = mm_is_thread_local(mm);
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+@@ -418,10 +431,14 @@ void radix__flush_tlb_range(struct vm_ar
+ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+
+ if (full) {
- if (local)
+ if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -92,5 +91,17 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ }
} else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+@@ -514,7 +531,10 @@ void radix__flush_tlb_range_psize(struct
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+- _tlbie_pid(pid, RIC_FLUSH_TLB);
++ if (mm_needs_flush_escalation(mm))
++ _tlbie_pid(pid, RIC_FLUSH_ALL);
++ else
++ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
if (local)
_tlbiel_va_range(start, end, pid, page_size, psize);
diff --git a/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch b/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
index fcad39edb2..466007a689 100644
--- a/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
+++ b/patches.suse/powerpc-mm-radix-Move-the-functions-that-does-the-ac.patch
@@ -78,10 +78,10 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
- trace_tlbie(0, 1, rb, rs, ric, prs, r);
-}
-
- static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
- {
-@@ -189,22 +205,6 @@ static inline void _tlbiel_va_range(unsi
+ static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+@@ -196,22 +212,6 @@ static inline void _tlbiel_va_range(unsi
asm volatile("ptesync": : :"memory");
}
@@ -101,6 +101,6 @@ Acked-by: Michal Suchanek <msuchanek@suse.de>
- trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
- static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long psize, unsigned long ric)
- {
+ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
diff --git a/series.conf b/series.conf
index 2e00271542..db2ccfab2d 100644
--- a/series.conf
+++ b/series.conf
@@ -10194,6 +10194,7 @@
patches.suse/powerpc-powernv-ioda-Remove-explicit-max-window-size.patch
patches.suse/powerpc-64s-radix-Optimize-TLB-range-flush-barriers.patch
patches.suse/powerpc-64s-radix-Implement-_tlbie-l-_va_range-flush.patch
+ patches.suse/powerpc-64s-radix-Optimize-flush_tlb_range.patch
patches.suse/powerpc-mm-radix-Fix-crashes-on-Power9-DD1-with-radix.patch
patches.suse/powerpc-kprobes-Disable-preemption-before-invoking-p.patch
patches.suse/powerpc-powernv-idle-Round-up-latency-and-residency-.patch