diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 02:12:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 08:58:18 -0700 |
commit | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (patch) | |
tree | 547176a090beb787722a153cf2b8b942dc0e68db /mm | |
parent | 18ea7e710d2452fa726814a406779188028cf1bf (diff) | |
download | linux-3.10-54a6eb5c4765aa573a030ceeba2c14e3d2ea5706.tar.gz linux-3.10-54a6eb5c4765aa573a030ceeba2c14e3d2ea5706.tar.bz2 linux-3.10-54a6eb5c4765aa573a030ceeba2c14e3d2ea5706.zip |
mm: use two zonelist that are filtered by GFP mask
Currently a node has two sets of zonelists, one for each zone type in the
system and a second set for GFP_THISNODE allocations. Based on the zones
allowed by a gfp mask, one of these zonelists is selected. All of these
zonelists consume memory and occupy cache lines.
This patch replaces the multiple zonelists per-node with two zonelists. The
first contains all populated zones in the system, ordered by distance, for
fallback allocations when the target/preferred node has no free pages. The
second contains all populated zones in the node suitable for GFP_THISNODE
allocations.
An iterator macro is introduced called for_each_zone_zonelist() that interates
through each zone allowed by the GFP flags in the selected zonelist.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/oom_kill.c | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 170 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slub.c | 8 | ||||
-rw-r--r-- | mm/vmscan.c | 21 |
6 files changed, 101 insertions, 122 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 51c9e2c0164..ddd141cad77 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -97,11 +97,11 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, struct mempolicy *mpol; struct zonelist *zonelist = huge_zonelist(vma, address, htlb_alloc_mask, &mpol); - struct zone **z; + struct zone *zone, **z; - for (z = zonelist->zones; *z; z++) { - nid = zone_to_nid(*z); - if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && + for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) { + nid = zone_to_nid(zone); + if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && !list_empty(&hugepage_freelists[nid])) { page = list_entry(hugepage_freelists[nid].next, struct page, lru); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index beb592fe938..2c93502cfcb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -175,12 +175,14 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) { #ifdef CONFIG_NUMA + struct zone *zone; struct zone **z; + enum zone_type high_zoneidx = gfp_zone(gfp_mask); nodemask_t nodes = node_states[N_HIGH_MEMORY]; - for (z = zonelist->zones; *z; z++) - if (cpuset_zone_allowed_softwall(*z, gfp_mask)) - node_clear(zone_to_nid(*z), nodes); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) + if (cpuset_zone_allowed_softwall(zone, gfp_mask)) + node_clear(zone_to_nid(zone), nodes); else return CONSTRAINT_CPUSET; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 187efd47a44..4ccb8651cf2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1378,42 +1378,29 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) */ static struct page * get_page_from_freelist(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, int alloc_flags) + struct zonelist *zonelist, int high_zoneidx, int alloc_flags) { struct zone **z; struct page *page = NULL; - int classzone_idx = zone_idx(zonelist->zones[0]); + int classzone_idx; struct zone *zone, *preferred_zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ - enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ + + z = first_zones_zonelist(zonelist, high_zoneidx); + classzone_idx = zone_idx(*z); + preferred_zone = *z; zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ - z = zonelist->zones; - preferred_zone = *z; - - do { - /* - * In NUMA, this could be a policy zonelist which contains - * zones that may not be allowed by the current gfp_mask. - * Check the zone is allowed by the current flags - */ - if (unlikely(alloc_should_filter_zonelist(zonelist))) { - if (highest_zoneidx == -1) - highest_zoneidx = gfp_zone(gfp_mask); - if (zone_idx(*z) > highest_zoneidx) - continue; - } - + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; - zone = *z; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) goto try_next_zone; @@ -1447,7 +1434,7 @@ try_next_zone: zlc_active = 1; did_zlc_setup = 1; } - } while (*(++z) != NULL); + } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ @@ -1465,6 +1452,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { const gfp_t wait = gfp_mask & __GFP_WAIT; + enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone **z; struct page *page; struct reclaim_state reclaim_state; @@ -1490,7 +1478,7 @@ restart: } page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, - zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); + zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); if (page) goto got_pg; @@ -1534,7 +1522,8 @@ restart: * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ - page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags); + page = get_page_from_freelist(gfp_mask, order, zonelist, + high_zoneidx, alloc_flags); if (page) goto got_pg; @@ -1547,7 +1536,7 @@ rebalance: nofail_alloc: /* go through the zonelist yet again, ignoring mins */ page = get_page_from_freelist(gfp_mask, order, - zonelist, ALLOC_NO_WATERMARKS); + zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); if (page) goto got_pg; if (gfp_mask & __GFP_NOFAIL) { @@ -1582,7 +1571,7 @@ nofail_alloc: if (likely(did_some_progress)) { page = get_page_from_freelist(gfp_mask, order, - zonelist, alloc_flags); + zonelist, high_zoneidx, alloc_flags); if (page) goto got_pg; } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { @@ -1598,7 +1587,7 @@ nofail_alloc: * under heavy pressure. */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, - zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET); + zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); if (page) { clear_zonelist_oom(zonelist); goto got_pg; @@ -1713,14 +1702,15 @@ EXPORT_SYMBOL(free_pages); static unsigned int nr_free_zone_pages(int offset) { + struct zone **z; + struct zone *zone; + /* Just pick one node, since fallback list is circular */ unsigned int sum = 0; struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); - struct zone **zonep = zonelist->zones; - struct zone *zone; - for (zone = *zonep++; zone; zone = *zonep++) { + for_each_zone_zonelist(zone, z, zonelist, offset) { unsigned long size = zone->present_pages; unsigned long high = zone->pages_high; if (size > high) @@ -2078,17 +2068,15 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) */ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) { - enum zone_type i; int j; struct zonelist *zonelist; - for (i = 0; i < MAX_NR_ZONES; i++) { - zonelist = pgdat->node_zonelists + i; - for (j = 0; zonelist->zones[j] != NULL; j++) - ; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); - zonelist->zones[j] = NULL; - } + zonelist = &pgdat->node_zonelists[0]; + for (j = 0; zonelist->zones[j] != NULL; j++) + ; + j = build_zonelists_node(NODE_DATA(node), zonelist, j, + MAX_NR_ZONES - 1); + zonelist->zones[j] = NULL; } /* @@ -2096,15 +2084,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) */ static void build_thisnode_zonelists(pg_data_t *pgdat) { - enum zone_type i; int j; struct zonelist *zonelist; - for (i = 0; i < MAX_NR_ZONES; i++) { - zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i; - j = build_zonelists_node(pgdat, zonelist, 0, i); - zonelist->zones[j] = NULL; - } + zonelist = &pgdat->node_zonelists[1]; + j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); + zonelist->zones[j] = NULL; } /* @@ -2117,27 +2102,24 @@ static int node_order[MAX_NUMNODES]; static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) { - enum zone_type i; int pos, j, node; int zone_type; /* needs to be signed */ struct zone *z; struct zonelist *zonelist; - for (i = 0; i < MAX_NR_ZONES; i++) { - zonelist = pgdat->node_zonelists + i; - pos = 0; - for (zone_type = i; zone_type >= 0; zone_type--) { - for (j = 0; j < nr_nodes; j++) { - node = node_order[j]; - z = &NODE_DATA(node)->node_zones[zone_type]; - if (populated_zone(z)) { - zonelist->zones[pos++] = z; - check_highest_zone(zone_type); - } + zonelist = &pgdat->node_zonelists[0]; + pos = 0; + for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) { + for (j = 0; j < nr_nodes; j++) { + node = node_order[j]; + z = &NODE_DATA(node)->node_zones[zone_type]; + if (populated_zone(z)) { + zonelist->zones[pos++] = z; + check_highest_zone(zone_type); } } - zonelist->zones[pos] = NULL; } + zonelist->zones[pos] = NULL; } static int default_zonelist_order(void) @@ -2264,19 +2246,15 @@ static void build_zonelists(pg_data_t *pgdat) /* Construct the zonelist performance cache - see further mmzone.h */ static void build_zonelist_cache(pg_data_t *pgdat) { - int i; - - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zonelist *zonelist; - struct zonelist_cache *zlc; - struct zone **z; + struct zonelist *zonelist; + struct zonelist_cache *zlc; + struct zone **z; - zonelist = pgdat->node_zonelists + i; - zonelist->zlcache_ptr = zlc = &zonelist->zlcache; - bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); - for (z = zonelist->zones; *z; z++) - zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); - } + zonelist = &pgdat->node_zonelists[0]; + zonelist->zlcache_ptr = zlc = &zonelist->zlcache; + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + for (z = zonelist->zones; *z; z++) + zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); } @@ -2290,45 +2268,43 @@ static void set_zonelist_order(void) static void build_zonelists(pg_data_t *pgdat) { int node, local_node; - enum zone_type i,j; + enum zone_type j; + struct zonelist *zonelist; local_node = pgdat->node_id; - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zonelist *zonelist; - zonelist = pgdat->node_zonelists + i; + zonelist = &pgdat->node_zonelists[0]; + j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); - j = build_zonelists_node(pgdat, zonelist, 0, i); - /* - * Now we build the zonelist so that it contains the zones - * of all the other nodes. - * We don't want to pressure a particular node, so when - * building the zones for node N, we make sure that the - * zones coming right after the local ones are those from - * node N+1 (modulo N) - */ - for (node = local_node + 1; node < MAX_NUMNODES; node++) { - if (!node_online(node)) - continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); - } - for (node = 0; node < local_node; node++) { - if (!node_online(node)) - continue; - j = build_zonelists_node(NODE_DATA(node), zonelist, j, i); - } - - zonelist->zones[j] = NULL; + /* + * Now we build the zonelist so that it contains the zones + * of all the other nodes. + * We don't want to pressure a particular node, so when + * building the zones for node N, we make sure that the + * zones coming right after the local ones are those from + * node N+1 (modulo N) + */ + for (node = local_node + 1; node < MAX_NUMNODES; node++) { + if (!node_online(node)) + continue; + j = build_zonelists_node(NODE_DATA(node), zonelist, j, + MAX_NR_ZONES - 1); } + for (node = 0; node < local_node; node++) { + if (!node_online(node)) + continue; + j = build_zonelists_node(NODE_DATA(node), zonelist, j, + MAX_NR_ZONES - 1); + } + + zonelist->zones[j] = NULL; } /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ static void build_zonelist_cache(pg_data_t *pgdat) { - int i; - - for (i = 0; i < MAX_NR_ZONES; i++) - pgdat->node_zonelists[i].zlcache_ptr = NULL; + pgdat->node_zonelists[0].zlcache_ptr = NULL; + pgdat->node_zonelists[1].zlcache_ptr = NULL; } #endif /* CONFIG_NUMA */ diff --git a/mm/slab.c b/mm/slab.c index 5488c54b117..29851841da6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3243,6 +3243,8 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) struct zonelist *zonelist; gfp_t local_flags; struct zone **z; + struct zone *zone; + enum zone_type high_zoneidx = gfp_zone(flags); void *obj = NULL; int nid; @@ -3257,10 +3259,10 @@ retry: * Look through allowed nodes for objects available * from existing per node queues. */ - for (z = zonelist->zones; *z && !obj; z++) { - nid = zone_to_nid(*z); + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { + nid = zone_to_nid(zone); - if (cpuset_zone_allowed_hardwall(*z, flags) && + if (cpuset_zone_allowed_hardwall(zone, flags) && cache->nodelists[nid] && cache->nodelists[nid]->free_objects) obj = ____cache_alloc_node(cache, diff --git a/mm/slub.c b/mm/slub.c index 19ebbfb2068..80d20cc1c0f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1285,6 +1285,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) #ifdef CONFIG_NUMA struct zonelist *zonelist; struct zone **z; + struct zone *zone; + enum zone_type high_zoneidx = gfp_zone(flags); struct page *page; /* @@ -1310,12 +1312,12 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) return NULL; zonelist = node_zonelist(slab_node(current->mempolicy), flags); - for (z = zonelist->zones; *z; z++) { + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; - n = get_node(s, zone_to_nid(*z)); + n = get_node(s, zone_to_nid(zone)); - if (n && cpuset_zone_allowed_hardwall(*z, flags) && + if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > MIN_PARTIAL) { page = get_partial_node(n); if (page) diff --git a/mm/vmscan.c b/mm/vmscan.c index ef8551e0d2d..0515b8f4489 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1249,15 +1249,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, static unsigned long shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { + enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); unsigned long nr_reclaimed = 0; - struct zone **zones = zonelist->zones; - int i; - + struct zone **z; + struct zone *zone; sc->all_unreclaimable = 1; - for (i = 0; zones[i] != NULL; i++) { - struct zone *zone = zones[i]; - + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { if (!populated_zone(zone)) continue; /* @@ -1311,8 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, unsigned long nr_reclaimed = 0; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long lru_pages = 0; - struct zone **zones = zonelist->zones; - int i; + struct zone **z; + struct zone *zone; + enum zone_type high_zoneidx = gfp_zone(gfp_mask); if (scan_global_lru(sc)) count_vm_event(ALLOCSTALL); @@ -1320,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, * mem_cgroup will not do shrink_slab. */ if (scan_global_lru(sc)) { - for (i = 0; zones[i] != NULL; i++) { - struct zone *zone = zones[i]; + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; @@ -1385,8 +1383,7 @@ out: priority = 0; if (scan_global_lru(sc)) { - for (i = 0; zones[i] != NULL; i++) { - struct zone *zone = zones[i]; + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; |