[PATCH] overcommit: add calculate_totalreserve_pages()

These patches are an enhancement of OVERCOMMIT_GUESS algorithm in __vm_enough_memory(). - why the kernel needed patching When the kernel can't allocate anonymous pages in practice, currnet OVERCOMMIT_GUESS could return success. This implementation might be the cause of oom kill in memory pressure situation. If the Linux runs with page reservation features like /proc/sys/vm/lowmem_reserve_ratio and without swap region, I think the oom kill occurs easily. - the overall design approach in the patch When the OVERCOMMET_GUESS algorithm calculates number of free pages, the reserved free pages are regarded as non-free pages. This change helps to avoid the pitfall that the number of free pages become less than the number which the kernel tries to keep free. - testing results I tested the patches using my test kernel module. If the patches aren't applied to the kernel, __vm_enough_memory() returns success in the situation but autual page allocation is failed. On the other hand, if the patches are applied to the kernel, memory allocation failure is avoided since __vm_enough_memory() returns failure in the situation. I checked that on i386 SMP 16GB memory machine. I haven't tested on nommu environment currently. This patch adds totalreserve_pages for __vm_enough_memory(). Calculate_totalreserve_pages() checks maximum lowmem_reserve pages and pages_high in each zone. Finally, the function stores the sum of each zone to totalreserve_pages. The totalreserve_pages is calculated when the VM is initilized. And the variable is updated when /proc/sys/vm/lowmem_reserve_raito or /proc/sys/vm/min_free_kbytes are changed. Signed-off-by: Hideo Aoki <haoki@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Hideo AOKI <haoki@redhat.com> 2006-04-10 22:52:59 -0700
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-04-11 06:18:32 -0700
commit: cb45b0e966cbe747b6189c15b108901cc7d6c97c (patch)
tree: 0402d4809ec175e80b083f7a713ec32c0109baad /mm
parent: e23ca00bf1b1c6c0f04702cb4d29e275ab8dc330 (diff)
download: linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.tar.gz
linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.tar.bz2
linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.zip
1 files changed, 39 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b8165e037de..97d6827c7d6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -51,6 +51,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
 EXPORT_SYMBOL(node_possible_map);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
 
@@ -2477,6 +2478,38 @@ void __init page_alloc_init(void)
 }
 
 /*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *	or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+	struct pglist_data *pgdat;
+	unsigned long reserve_pages = 0;
+	int i, j;
+
+	for_each_online_pgdat(pgdat) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			struct zone *zone = pgdat->node_zones + i;
+			unsigned long max = 0;
+
+			/* Find valid and maximum lowmem_reserve in the zone */
+			for (j = i; j < MAX_NR_ZONES; j++) {
+				if (zone->lowmem_reserve[j] > max)
+					max = zone->lowmem_reserve[j];
+			}
+
+			/* we treat pages_high as reserved pages. */
+			max += zone->pages_high;
+
+			if (max > zone->present_pages)
+				max = zone->present_pages;
+			reserve_pages += max;
+		}
+	}
+	totalreserve_pages = reserve_pages;
+}
+
+/*
  * setup_per_zone_lowmem_reserve - called whenever
  *	sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
  *	has a correct pages reserved value, so an adequate number of
@@ -2507,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
 			}
 		}
 	}
+
+	/* update totalreserve_pages */
+	calculate_totalreserve_pages();
 }
 
 /*
@@ -2561,6 +2597,9 @@ void setup_per_zone_pages_min(void)
 		zone->pages_high  = zone->pages_min + tmp / 2;
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
+
+	/* update totalreserve_pages */
+	calculate_totalreserve_pages();
 }
 
 /*
author	Hideo AOKI <haoki@redhat.com>	2006-04-10 22:52:59 -0700
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-04-11 06:18:32 -0700
commit	cb45b0e966cbe747b6189c15b108901cc7d6c97c (patch)
tree	0402d4809ec175e80b083f7a713ec32c0109baad /mm
parent	e23ca00bf1b1c6c0f04702cb4d29e275ab8dc330 (diff)
download	linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.tar.gz linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.tar.bz2 linux-3.10-cb45b0e966cbe747b6189c15b108901cc7d6c97c.zip