Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.com>2016-05-30 13:47:33 +0100
committerMel Gorman <mgorman@suse.com>2016-05-30 13:47:33 +0100
commit26bc90716d8c723cf34738922ec9be447bde5912 (patch)
tree38f9b961d4baace09dcd3dd253a4a032080e4b3e
parenteb7390244ed2cad655e35a895fc65daa30bb9e5d (diff)
mm: make faultaround produce old ptesrpm-4.4.11-41
(bnc#971975 VM performance -- page aging).
-rw-r--r--patches.suse/mm-make-faultaround-produce-old-ptes.patch160
-rw-r--r--series.conf3
2 files changed, 163 insertions, 0 deletions
diff --git a/patches.suse/mm-make-faultaround-produce-old-ptes.patch b/patches.suse/mm-make-faultaround-produce-old-ptes.patch
new file mode 100644
index 0000000000..5277535fc5
--- /dev/null
+++ b/patches.suse/mm-make-faultaround-produce-old-ptes.patch
@@ -0,0 +1,160 @@
+From 5f2cae8a57f51684f568a0e5122104046e897bf3 Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Fri, 20 May 2016 16:58:41 -0700
+Subject: [PATCH] mm: make faultaround produce old ptes
+
+References: bnc#971975 VM performance -- page aging
+Patch-mainline: v4.7
+Git-commit: 5c0a85fad949212b3e059692deecdeed74ae7ec7
+
+Currently, faultaround code produces young pte. This can screw up
+vmscan behaviour[1], as it makes vmscan think that these pages are hot
+and not push them out on first round.
+
+During sparse file access faultaround gets more pages mapped and all of
+them are young. Under memory pressure, this makes vmscan swap out anon
+pages instead, or to drop other page cache pages which otherwise stay
+resident.
+
+Modify faultaround to produce old ptes, so they can easily be reclaimed
+under memory pressure.
+
+This can to some extend defeat the purpose of faultaround on machines
+without hardware accessed bit as it will not help us with reducing the
+number of minor page faults.
+
+We may want to disable faultaround on such machines altogether, but
+that's subject for separate patchset.
+
+Minchan:
+ "I tested 512M mmap sequential word read test on non-HW access bit
+ system (i.e., ARM) and confirmed it doesn't increase minor fault any
+ more.
+
+ old: 4096 fault_around
+ minor fault: 131291
+ elapsed time: 6747645 usec
+
+ new: 65536 fault_around
+ minor fault: 131291
+ elapsed time: 6709263 usec
+
+ 0.56% benefit"
+
+[1] https://lkml.kernel.org/r/1460992636-711-1-git-send-email-vinmenon@codeaurora.org
+
+Link: http://lkml.kernel.org/r/1463488366-47723-1-git-send-email-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Tested-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Vinayak Menon <vinmenon@codeaurora.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+---
+ include/linux/mm.h | 2 +-
+ mm/filemap.c | 2 +-
+ mm/memory.c | 23 ++++++++++++++++++-----
+ 3 files changed, 20 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index d576f37b2a76..eb4fa93d026b 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -583,7 +583,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+ }
+
+ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+- struct page *page, pte_t *pte, bool write, bool anon);
++ struct page *page, pte_t *pte, bool write, bool anon, bool old);
+ #endif
+
+ /*
+diff --git a/mm/filemap.c b/mm/filemap.c
+index db6427f9b629..324d6198bdeb 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -2221,7 +2221,7 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+ if (file->f_ra.mmap_miss > 0)
+ file->f_ra.mmap_miss--;
+ addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
+- do_set_pte(vma, addr, page, pte, false, false);
++ do_set_pte(vma, addr, page, pte, false, false, true);
+ unlock_page(page);
+ goto next;
+ unlock:
+diff --git a/mm/memory.c b/mm/memory.c
+index 707541988edc..5752c64606f5 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2859,7 +2859,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
+ * vm_ops->map_pages.
+ */
+ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+- struct page *page, pte_t *pte, bool write, bool anon)
++ struct page *page, pte_t *pte, bool write, bool anon, bool old)
+ {
+ pte_t entry;
+
+@@ -2867,6 +2867,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+ entry = mk_pte(page, vma->vm_page_prot);
+ if (write)
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++ if (old)
++ entry = pte_mkold(entry);
+ if (anon) {
+ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
+ page_add_new_anon_rmap(page, vma, address);
+@@ -3004,9 +3006,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ */
+ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
+ pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+- do_fault_around(vma, address, pte, pgoff, flags);
+ if (!pte_same(*pte, orig_pte))
+ goto unlock_out;
++ do_fault_around(vma, address, pte, pgoff, flags);
++ /* Check if the fault is handled by faultaround */
++ if (!pte_same(*pte, orig_pte)) {
++ /*
++ * Faultaround produce old pte, but the pte we've
++ * handler fault for should be young.
++ */
++ pte_t entry = pte_mkyoung(*pte);
++ if (ptep_set_access_flags(vma, address, pte, entry, 0))
++ update_mmu_cache(vma, address, pte);
++ goto unlock_out;
++ }
+ pte_unmap_unlock(pte, ptl);
+ }
+
+@@ -3021,7 +3034,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ page_cache_release(fault_page);
+ return ret;
+ }
+- do_set_pte(vma, address, fault_page, pte, false, false);
++ do_set_pte(vma, address, fault_page, pte, false, false, false);
+ unlock_page(fault_page);
+ unlock_out:
+ pte_unmap_unlock(pte, ptl);
+@@ -3072,7 +3085,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ }
+ goto uncharge_out;
+ }
+- do_set_pte(vma, address, new_page, pte, true, true);
++ do_set_pte(vma, address, new_page, pte, true, true, false);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
+ pte_unmap_unlock(pte, ptl);
+@@ -3125,7 +3138,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ page_cache_release(fault_page);
+ return ret;
+ }
+- do_set_pte(vma, address, fault_page, pte, true, false);
++ do_set_pte(vma, address, fault_page, pte, true, false, false);
+ pte_unmap_unlock(pte, ptl);
+
+ if (set_page_dirty(fault_page))
diff --git a/series.conf b/series.conf
index b755877247..f655a0d1fc 100644
--- a/series.conf
+++ b/series.conf
@@ -219,6 +219,9 @@
patches.suse/mm-page_alloc-restore-the-original-nodemask-if-the-fast-path-allocation-failed.patch
patches.suse/mm-page_alloc-prevent-infinite-loop-in-buffered_rmqueue.patch
+ # -stable pending page aging
+ patches.suse/mm-make-faultaround-produce-old-ptes.patch
+
# -stable pending times/clock_gettime performance (bnc#981795)
# Patch slightly improves the situation but still short of expectations
patches.suse/sched-cpuacct-Simplify-the-cpuacct-code.patch