summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-02-07 00:14:13 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 08:42:20 -0800
commitfa1de9008c9bcce8ab5122529dd19b24c273eba2 (patch)
tree5afd94b03265ae4d9173ef471b6d5a30ed6d4ff3
parent436c6541b13a73790646eb11429bdc8ee50eec41 (diff)
downloadlinux-3.10-fa1de9008c9bcce8ab5122529dd19b24c273eba2.tar.gz
linux-3.10-fa1de9008c9bcce8ab5122529dd19b24c273eba2.tar.bz2
linux-3.10-fa1de9008c9bcce8ab5122529dd19b24c273eba2.zip
memcgroup: revert swap_state mods
If we're charging rss and we're charging cache, it seems obvious that we should be charging swapcache - as has been done. But in practice that doesn't work out so well: both swapin readahead and swapoff leave the majority of pages charged to the wrong cgroup (the cgroup that happened to read them in, rather than the cgroup to which they belong). (Which is why unuse_pte's GFP_KERNEL while holding pte lock never showed up as a problem: no allocation was ever done there, every page read being already charged to the cgroup which initiated the swapoff.) It all works rather better if we leave the charging to do_swap_page and unuse_pte, and do nothing for swapcache itself: revert mm/swap_state.c to what it was before the memory-controller patches. This also speeds up significantly a contained process working at its limit: because it no longer needs to keep waiting for swap writeback to complete. Is it unfair that swap pages become uncharged once they're unmapped, even though they're still clearly private to particular cgroups? For a short while, yes; but PageReclaim arranges for those pages to go to the end of the inactive list and be reclaimed soon if necessary. shmem/tmpfs pages are a distinct case: their charging also benefits from this change, but their second life on the lists as swapcache pages may prove more unfair - that I need to check next. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Pavel Emelianov <xemul@openvz.org> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/swap_state.c13
1 files changed, 1 insertions, 12 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6ce0669aced..ec42f01a8d0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,7 +17,6 @@
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
-#include <linux/memcontrol.h>
#include <asm/pgtable.h>
@@ -75,11 +74,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
BUG_ON(!PageLocked(page));
BUG_ON(PageSwapCache(page));
BUG_ON(PagePrivate(page));
-
- error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
- if (error)
- goto out;
-
error = radix_tree_preload(gfp_mask);
if (!error) {
write_lock_irq(&swapper_space.tree_lock);
@@ -92,14 +86,10 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
total_swapcache_pages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
INC_CACHE_INFO(add_total);
- } else {
- mem_cgroup_uncharge_page(page);
}
write_unlock_irq(&swapper_space.tree_lock);
radix_tree_preload_end();
- } else
- mem_cgroup_uncharge_page(page);
-out:
+ }
return error;
}
@@ -114,7 +104,6 @@ void __delete_from_swap_cache(struct page *page)
BUG_ON(PageWriteback(page));
BUG_ON(PagePrivate(page));
- mem_cgroup_uncharge_page(page);
radix_tree_delete(&swapper_space.page_tree, page_private(page));
set_page_private(page, 0);
ClearPageSwapCache(page);