[PATCH] SPARSEMEM EXTREME

A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture platforms with a very sparse physical address space would likely want to select this option. For those architecture platforms that don't select the option, the code generated is equivalent to SPARSEMEM currently in -mm. I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature. ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM -mm implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false. I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME. I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME and tested with aim. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Bob Picco <bob.picco@hp.com> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Bob Picco <bob.picco@hp.com> 2005-09-03 15:54:26 -0700
committer: Linus Torvalds <torvalds@evo.osdl.org> 2005-09-05 00:05:38 -0700
commit: 802f192e4a600f7ef84ca25c8b818c8830acef5a (patch)
tree: 51e9a6ed164e6a2d8741af510c3954ad79bf19af /arch
parent: 0216f86dafb389c0ad97529fd45e64e883298cfd (diff)
download: linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.tar.gz
linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.tar.bz2
linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.zip
2 files changed, 49 insertions, 21 deletions
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index c02dc9809ca..b3b1e9c1770 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -552,27 +552,18 @@ void __init do_init_bootmem(void)
 	/* Add all physical memory to the bootmem map, mark each area
 	 * present.
 	 */
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base, size;
-		unsigned long start_pfn, end_pfn;
-
-		base = lmb.memory.region[i].base;
-		size = lmb.memory.region[i].size;
-
-		start_pfn = base >> PAGE_SHIFT;
-		end_pfn = start_pfn + (size >> PAGE_SHIFT);
-		memory_present(0, start_pfn, end_pfn);
-
-		free_bootmem(base, size);
-	}
+	for (i=0; i < lmb.memory.cnt; i++)
+		free_bootmem(lmb_start_pfn(&lmb.memory, i),
+			     lmb_size_bytes(&lmb.memory, i));
 
 	/* reserve the sections we're already using */
-	for (i=0; i < lmb.reserved.cnt; i++) {
-		unsigned long base = lmb.reserved.region[i].base;
-		unsigned long size = lmb.reserved.region[i].size;
+	for (i=0; i < lmb.reserved.cnt; i++)
+		reserve_bootmem(lmb_start_pfn(&lmb.reserved, i),
+				lmb_size_bytes(&lmb.reserved, i));
 
-		reserve_bootmem(base, size);
-	}
+	for (i=0; i < lmb.memory.cnt; i++)
+		memory_present(0, lmb_start_pfn(&lmb.memory, i),
+			       lmb_end_pfn(&lmb.memory, i));
 }
 
 /*
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
index c3116f0d788..cb864b8f275 100644
--- a/arch/ppc64/mm/numa.c
+++ b/arch/ppc64/mm/numa.c
@@ -440,8 +440,6 @@ new_range:
 		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
 			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
 				numa_domain;
-		memory_present(numa_domain, start >> PAGE_SHIFT,
-						(start + size) >> PAGE_SHIFT);
 
 		if (--ranges)
 			goto new_range;
@@ -483,7 +481,6 @@ static void __init setup_nonnuma(void)
 
 	for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
 		numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
-	memory_present(0, 0, init_node_data[0].node_end_pfn);
 }
 
 static void __init dump_numa_topology(void)
@@ -695,6 +692,46 @@ new_range:
 						     size);
 			}
 		}
+		/*
+		 * This loop may look famaliar, but we have to do it again
+		 * after marking our reserved memory to mark memory present
+		 * for sparsemem.
+		 */
+		addr_cells = get_mem_addr_cells();
+		size_cells = get_mem_size_cells();
+		memory = NULL;
+		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+			unsigned long mem_start, mem_size;
+			int numa_domain, ranges;
+			unsigned int *memcell_buf;
+			unsigned int len;
+
+			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+			if (!memcell_buf || len <= 0)
+				continue;
+
+			ranges = memory->n_addrs;	/* ranges in cell */
+new_range2:
+			mem_start = read_n_cells(addr_cells, &memcell_buf);
+			mem_size = read_n_cells(size_cells, &memcell_buf);
+			if (numa_enabled) {
+				numa_domain = of_node_numa_domain(memory);
+				if (numa_domain  >= MAX_NUMNODES)
+					numa_domain = 0;
+			} else
+				numa_domain =  0;
+
+			if (numa_domain != nid)
+				continue;
+
+			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
+			memory_present(numa_domain, mem_start >> PAGE_SHIFT,
+				       (mem_start + mem_size) >> PAGE_SHIFT);
+
+			if (--ranges)		/* process all ranges in cell */
+				goto new_range2;
+		}
+
 	}
 }
author	Bob Picco <bob.picco@hp.com>	2005-09-03 15:54:26 -0700
committer	Linus Torvalds <torvalds@evo.osdl.org>	2005-09-05 00:05:38 -0700
commit	802f192e4a600f7ef84ca25c8b818c8830acef5a (patch)
tree	51e9a6ed164e6a2d8741af510c3954ad79bf19af /arch
parent	0216f86dafb389c0ad97529fd45e64e883298cfd (diff)
download	linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.tar.gz linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.tar.bz2 linux-3.10-802f192e4a600f7ef84ca25c8b818c8830acef5a.zip