summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-11-12 15:08:32 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-12-08 07:29:27 -0800
commitf99510dc9a9b63ba8131e1e5eb70884db122a1bc (patch)
tree1a6cd8f06bd7ef01fb54aec0ad3f8dcb9a0b4601 /mm
parent7281bb5614bcdbf6789e9559420b814689b122e3 (diff)
downloadlinux-3.10-f99510dc9a9b63ba8131e1e5eb70884db122a1bc.tar.gz
linux-3.10-f99510dc9a9b63ba8131e1e5eb70884db122a1bc.tar.bz2
linux-3.10-f99510dc9a9b63ba8131e1e5eb70884db122a1bc.zip
mm: numa: return the number of base pages altered by protection changes
commit 72403b4a0fbdf433c1fe0127e49864658f6f6468 upstream. Commit 0255d4918480 ("mm: Account for a THP NUMA hinting update as one PTE update") was added to account for the number of PTE updates when marking pages prot_numa. task_numa_work was using the old return value to track how much address space had been updated. Altering the return value causes the scanner to do more work than it is configured or documented to in a single unit of work. This patch reverts that commit and accounts for the number of THP updates separately in vmstat. It is up to the administrator to interpret the pair of values correctly. This is a straight-forward operation and likely to only be of interest when actively debugging NUMA balancing problems. The impact of this patch is that the NUMA PTE scanner will scan slower when THP is enabled and workloads may converge slower as a result. On the flip size system CPU usage should be lower than recent tests reported. This is an illustrative example of a short single JVM specjbb test specjbb 3.12.0 3.12.0 vanilla acctupdates TPut 1 26143.00 ( 0.00%) 25747.00 ( -1.51%) TPut 7 185257.00 ( 0.00%) 183202.00 ( -1.11%) TPut 13 329760.00 ( 0.00%) 346577.00 ( 5.10%) TPut 19 442502.00 ( 0.00%) 460146.00 ( 3.99%) TPut 25 540634.00 ( 0.00%) 549053.00 ( 1.56%) TPut 31 512098.00 ( 0.00%) 519611.00 ( 1.47%) TPut 37 461276.00 ( 0.00%) 474973.00 ( 2.97%) TPut 43 403089.00 ( 0.00%) 414172.00 ( 2.75%) 3.12.0 3.12.0 vanillaacctupdates User 5169.64 5184.14 System 100.45 80.02 Elapsed 252.75 251.85 Performance is similar but note the reduction in system CPU time. While this showed a performance gain, it will not be universal but at least it'll be behaving as documented. The vmstats are obviously different but here is an obvious interpretation of them from mmtests. 3.12.0 3.12.0 vanillaacctupdates NUMA page range updates 1408326 11043064 NUMA huge PMD updates 0 21040 NUMA PTE updates 1408326 291624 "NUMA page range updates" == nr_pte_updates and is the value returned to the NUMA pte scanner. NUMA huge PMD updates were the number of THP updates which in combination can be used to calculate how many ptes were updated from userspace. Signed-off-by: Mel Gorman <mgorman@suse.de> Reported-by: Alex Thorlton <athorlton@sgi.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mprotect.c7
-rw-r--r--mm/vmstat.c1
2 files changed, 7 insertions, 1 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2bbb648ea73..d4d5399c7ab 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -135,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pmd_t *pmd;
unsigned long next;
unsigned long pages = 0;
+ unsigned long nr_huge_updates = 0;
bool all_same_node;
pmd = pmd_offset(pud, addr);
@@ -145,7 +146,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
split_huge_page_pmd(vma, addr, pmd);
else if (change_huge_pmd(vma, pmd, addr, newprot,
prot_numa)) {
- pages++;
+ pages += HPAGE_PMD_NR;
+ nr_huge_updates++;
continue;
}
/* fall through */
@@ -165,6 +167,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
change_pmd_protnuma(vma->vm_mm, addr, pmd);
} while (pmd++, addr = next, addr != end);
+ if (nr_huge_updates)
+ count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
+
return pages;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f42745e6578..10bbb5427a6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -779,6 +779,7 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_NUMA_BALANCING
"numa_pte_updates",
+ "numa_huge_pte_updates",
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",