summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2013-09-24 15:27:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-24 17:00:26 -0700
commit694fbc0fe78518d06efa63910bf4ecee660e7852 (patch)
tree895244c4cc010759ce5c29333bee3cddc8c852d2
parent30361e51cae7a4df3fec89f935a450a6fe6f16fa (diff)
downloadkernel-common-694fbc0fe78518d06efa63910bf4ecee660e7852.tar.gz
kernel-common-694fbc0fe78518d06efa63910bf4ecee660e7852.tar.bz2
kernel-common-694fbc0fe78518d06efa63910bf4ecee660e7852.zip
revert "memcg: enhance memcg iterator to support predicates"
Revert commit de57780dc659 ("memcg: enhance memcg iterator to support predicates") I merged this prematurely - Michal and Johannes still disagree about the overall design direction and the future remains unclear. Cc: Michal Hocko <mhocko@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h49
-rw-r--r--mm/memcontrol.c70
-rw-r--r--mm/vmscan.c16
3 files changed, 32 insertions, 103 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 60e95872da29..ef2b9bd7fafa 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation;
};
-enum mem_cgroup_filter_t {
- VISIT, /* visit current node */
- SKIP, /* skip the current node and continue traversal */
- SKIP_TREE, /* skip the whole subtree and continue traversal */
-};
-
-/*
- * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
- * iterate through the hierarchy tree. Each tree element is checked by the
- * predicate before it is returned by the iterator. If a filter returns
- * SKIP or SKIP_TREE then the iterator code continues traversal (with the
- * next node down the hierarchy or the next node that doesn't belong under the
- * memcg's subtree).
- */
-typedef enum mem_cgroup_filter_t
-(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
-
#ifdef CONFIG_MEMCG
/*
* All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok);
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond);
-
-static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim)
-{
- return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
-}
-
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+ struct mem_cgroup *,
+ struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
/*
@@ -260,8 +234,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
mem_cgroup_update_page_stat(page, idx, -1);
}
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
struct mem_cgroup *root);
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
@@ -376,15 +349,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok)
{
}
-static inline struct mem_cgroup *
-mem_cgroup_iter_cond(struct mem_cgroup *root,
- struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond)
-{
- /* first call must return non-NULL, second return NULL */
- return (struct mem_cgroup *)(unsigned long)!prev;
-}
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +435,10 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
}
static inline
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
struct mem_cgroup *root)
{
- return VISIT;
+ return false;
}
static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 916892c2b8e0..65e7bec4b0f0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -862,15 +862,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return memcg;
}
-static enum mem_cgroup_filter_t
-mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
- mem_cgroup_iter_filter cond)
-{
- if (!cond)
- return VISIT;
- return cond(memcg, root);
-}
-
/*
* Returns a next (in a pre-order walk) alive memcg (with elevated css
* ref. count) or NULL if the whole root's subtree has been visited.
@@ -878,7 +869,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
* helper function to be used by mem_cgroup_iter
*/
static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
- struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
+ struct mem_cgroup *last_visited)
{
struct cgroup_subsys_state *prev_css, *next_css;
@@ -896,31 +887,11 @@ skip_node:
if (next_css) {
struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
- switch (mem_cgroup_filter(mem, root, cond)) {
- case SKIP:
+ if (css_tryget(&mem->css))
+ return mem;
+ else {
prev_css = next_css;
goto skip_node;
- case SKIP_TREE:
- if (mem == root)
- return NULL;
- /*
- * css_rightmost_descendant is not an optimal way to
- * skip through a subtree (especially for imbalanced
- * trees leaning to right) but that's what we have right
- * now. More effective solution would be traversing
- * right-up for first non-NULL without calling
- * css_next_descendant_pre afterwards.
- */
- prev_css = css_rightmost_descendant(next_css);
- goto skip_node;
- case VISIT:
- if (css_tryget(&mem->css))
- return mem;
- else {
- prev_css = next_css;
- goto skip_node;
- }
- break;
}
}
@@ -984,7 +955,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
* @root: hierarchy root
* @prev: previously returned memcg, NULL on first invocation
* @reclaim: cookie for shared reclaim walks, NULL for full walks
- * @cond: filter for visited nodes, NULL for no filter
*
* Returns references to children of the hierarchy below @root, or
* @root itself, or %NULL after a full round-trip.
@@ -997,18 +967,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
* divide up the memcgs in the hierarchy among all concurrent
* reclaimers operating on the same zone and priority.
*/
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
- struct mem_cgroup_reclaim_cookie *reclaim,
- mem_cgroup_iter_filter cond)
+ struct mem_cgroup_reclaim_cookie *reclaim)
{
struct mem_cgroup *memcg = NULL;
struct mem_cgroup *last_visited = NULL;
- if (mem_cgroup_disabled()) {
- /* first call must return non-NULL, second return NULL */
- return (struct mem_cgroup *)(unsigned long)!prev;
- }
+ if (mem_cgroup_disabled())
+ return NULL;
if (!root)
root = root_mem_cgroup;
@@ -1019,9 +986,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
if (!root->use_hierarchy && root != root_mem_cgroup) {
if (prev)
goto out_css_put;
- if (mem_cgroup_filter(root, root, cond) == VISIT)
- return root;
- return NULL;
+ return root;
}
rcu_read_lock();
@@ -1044,7 +1009,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
last_visited = mem_cgroup_iter_load(iter, root, &seq);
}
- memcg = __mem_cgroup_iter_next(root, last_visited, cond);
+ memcg = __mem_cgroup_iter_next(root, last_visited);
if (reclaim) {
mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1055,11 +1020,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
reclaim->generation = iter->generation;
}
- /*
- * We have finished the whole tree walk or no group has been
- * visited because filter told us to skip the root node.
- */
- if (!memcg && (prev || (cond && !last_visited)))
+ if (prev && !memcg)
goto out_unlock;
}
out_unlock:
@@ -1804,14 +1765,13 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
* a) it is over its soft limit
* b) any parent up the hierarchy is over its soft limit
*/
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
struct mem_cgroup *root)
{
struct mem_cgroup *parent = memcg;
if (res_counter_soft_limit_excess(&memcg->res))
- return VISIT;
+ return true;
/*
* If any parent up to the root in the hierarchy is over its soft limit
@@ -1819,12 +1779,12 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
*/
while ((parent = parent_mem_cgroup(parent))) {
if (res_counter_soft_limit_excess(&parent->res))
- return VISIT;
+ return true;
if (parent == root)
break;
}
- return SKIP;
+ return false;
}
static DEFINE_SPINLOCK(memcg_oom_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cdd300b81485..17a7134de4d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2185,16 +2185,21 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
.zone = zone,
.priority = sc->priority,
};
- struct mem_cgroup *memcg = NULL;
- mem_cgroup_iter_filter filter = (soft_reclaim) ?
- mem_cgroup_soft_reclaim_eligible : NULL;
+ struct mem_cgroup *memcg;
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
struct lruvec *lruvec;
+ if (soft_reclaim &&
+ !mem_cgroup_soft_reclaim_eligible(memcg, root)) {
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ continue;
+ }
+
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
shrink_lruvec(lruvec, sc);
@@ -2214,7 +2219,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
mem_cgroup_iter_break(root, memcg);
break;
}
- }
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ } while (memcg);
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
sc->nr_scanned - nr_scanned,