22 files changed, 170 insertions, 123 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 4e9937ac3529..391ffc54d136 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -29,7 +29,7 @@ config FLATMEM_MANUAL
 	  If unsure, choose this option (Flat Memory) over any other.
 
 config DISCONTIGMEM_MANUAL
-	bool "Discontigious Memory"
+	bool "Discontiguous Memory"
 	depends on ARCH_DISCONTIGMEM_ENABLE
 	help
 	  This option provides enhanced support for discontiguous
@@ -52,7 +52,7 @@ config SPARSEMEM_MANUAL
 	  memory hotplug systems.  This is normal.
 
 	  For many other systems, this will be an alternative to
-	  "Discontigious Memory".  This option provides some potential
+	  "Discontiguous Memory".  This option provides some potential
 	  performance benefits, along with decreased code complexity,
 	  but it is newer, and more experimental.
 
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8ec4e4c2a179..a58699b6579e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -61,17 +61,9 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
 {
 	bootmem_data_t *bdata = pgdat->bdata;
 	unsigned long mapsize = ((end - start)+7)/8;
-	static struct pglist_data *pgdat_last;
-
-	pgdat->pgdat_next = NULL;
-	/* Add new nodes last so that bootmem always starts
-	   searching in the first nodes, not the last ones */
-	if (pgdat_last)
-		pgdat_last->pgdat_next = pgdat;
-	else {
-		pgdat_list = pgdat; 	
-		pgdat_last = pgdat;
-	}
+
+	pgdat->pgdat_next = pgdat_list;
+	pgdat_list = pgdat;
 
 	mapsize = ALIGN(mapsize, sizeof(long));
 	bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
@@ -162,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
  */
 static void * __init
 __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
-		unsigned long align, unsigned long goal)
+	      unsigned long align, unsigned long goal, unsigned long limit)
 {
 	unsigned long offset, remaining_size, areasize, preferred;
-	unsigned long i, start = 0, incr, eidx;
+	unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
 	void *ret;
 
 	if(!size) {
@@ -174,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	}
 	BUG_ON(align & (align-1));
 
-	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+	if (limit && bdata->node_boot_start >= limit)
+		return NULL;
+
+        limit >>=PAGE_SHIFT;
+	if (limit && end_pfn > limit)
+		end_pfn = limit;
+
+	eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -186,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < end_pfn)) {
 		preferred = goal - bdata->node_boot_start;
 
 		if (bdata->last_success >= preferred)
-			preferred = bdata->last_success;
+			if (!limit || (limit && limit > bdata->last_success))
+				preferred = bdata->last_success;
 	} else
 		preferred = 0;
 
@@ -390,14 +390,15 @@ unsigned long __init free_all_bootmem (void)
 	return(free_all_bootmem_core(NODE_DATA(0)));
 }
 
-void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
+				unsigned long limit)
 {
 	pg_data_t *pgdat = pgdat_list;
 	void *ptr;
 
 	for_each_pgdat(pgdat)
 		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
-						align, goal)))
+						 align, goal, limit)))
 			return(ptr);
 
 	/*
@@ -408,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned
 	return NULL;
 }
 
-void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal)
+
+void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
+				     unsigned long goal, unsigned long limit)
 {
 	void *ptr;
 
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal);
+	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
 	if (ptr)
 		return (ptr);
 
-	return __alloc_bootmem(size, align, goal);
+	return __alloc_bootmem_limit(size, align, goal, limit);
 }
 
diff --git a/mm/fremap.c b/mm/fremap.c
index 3235fb77c133..ab23a0673c35 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -89,6 +89,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (!page->mapping || page->index >= size)
 		goto err_unlock;
+	err = -ENOMEM;
+	if (page_mapcount(page) > INT_MAX/2)
+		goto err_unlock;
 
 	zap_pte(mm, vma, addr, pte);
 
diff --git a/mm/highmem.c b/mm/highmem.c
index 400911599468..90e1861e2da0 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -30,7 +30,7 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
-static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
 {
 	unsigned int gfp = gfp_mask | (unsigned int) (long) data;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 901ac523a1c3..61d380678030 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 {
 	pte_t *src_pte, *dst_pte, entry;
 	struct page *ptepage;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
+	unsigned long addr;
 
-	while (addr < end) {
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
 		dst_pte = huge_pte_alloc(dst, addr);
 		if (!dst_pte)
 			goto nomem;
+		spin_lock(&src->page_table_lock);
 		src_pte = huge_pte_offset(src, addr);
-		BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */
-		entry = *src_pte;
-		ptepage = pte_page(entry);
-		get_page(ptepage);
-		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
-		set_huge_pte_at(dst, addr, dst_pte, entry);
-		addr += HPAGE_SIZE;
+		if (src_pte && !pte_none(*src_pte)) {
+			entry = *src_pte;
+			ptepage = pte_page(entry);
+			get_page(ptepage);
+			add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+			set_huge_pte_at(dst, addr, dst_pte, entry);
+		}
+		spin_unlock(&src->page_table_lock);
 	}
 	return 0;
 
@@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 
 		page = pte_page(pte);
 		put_page(page);
+		add_mm_counter(mm, rss,  - (HPAGE_SIZE / PAGE_SIZE));
 	}
-	add_mm_counter(mm, rss,  -((end - start) >> PAGE_SHIFT));
 	flush_tlb_range(vma, start, end);
 }
 
@@ -393,6 +394,28 @@ out:
 	return ret;
 }
 
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully.  Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write_access)
+{
+	int ret = VM_FAULT_SIGBUS;
+	pte_t *pte;
+
+	spin_lock(&mm->page_table_lock);
+	pte = huge_pte_offset(mm, address);
+	if (pte && !pte_none(*pte))
+		ret = VM_FAULT_MINOR;
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i)
@@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	BUG_ON(!is_vm_hugetlb_page(vma));
 
 	vpfn = vaddr/PAGE_SIZE;
+	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 
 		if (pages) {
@@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			 * indexing below to work. */
 			pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
 
-			/* hugetlb should be locked, and hence, prefaulted */
-			WARN_ON(!pte || pte_none(*pte));
+			/* the hugetlb file might have been truncated */
+			if (!pte || pte_none(*pte)) {
+				remainder = 0;
+				if (!i)
+					i = -EFAULT;
+				break;
+			}
 
 			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
 
@@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		--remainder;
 		++i;
 	}
-
+	spin_unlock(&mm->page_table_lock);
 	*length = remainder;
 	*position = vaddr;
 
diff --git a/mm/madvise.c b/mm/madvise.c
index 4454936f87d1..20e075d1c64c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -83,6 +83,9 @@ static long madvise_willneed(struct vm_area_struct * vma,
 {
 	struct file *file = vma->vm_file;
 
+	if (!file)
+		return -EBADF;
+
 	if (file->f_mapping->a_ops->get_xip_page) {
 		/* no bad return value, but ignore advice */
 		return 0;
@@ -141,11 +144,7 @@ static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
 {
-	struct file *filp = vma->vm_file;
-	long error = -EBADF;
-
-	if (!filp)
-		goto  out;
+	long error;
 
 	switch (behavior) {
 	case MADV_NORMAL:
@@ -166,8 +165,6 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		error = -EINVAL;
 		break;
 	}
-		
-out:
 	return error;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index ae8161f1f459..1db40e935e55 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
 	inc_page_state(pgfault);
 
-	if (is_vm_hugetlb_page(vma))
-		return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
+	if (unlikely(is_vm_hugetlb_page(vma)))
+		return hugetlb_fault(mm, vma, address, write_access);
 
 	/*
 	 * We need the page table lock to synchronize with kswapd
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9033f0859aa8..37af443eb094 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -687,7 +687,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo
 }
 
 /* Return a zonelist representing a mempolicy */
-static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy)
+static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 {
 	int nd;
 
@@ -751,7 +751,7 @@ static unsigned offset_il_node(struct mempolicy *pol,
 
 /* Allocate a page in interleaved policy.
    Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned order, unsigned nid)
+static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned nid)
 {
 	struct zonelist *zl;
 	struct page *page;
@@ -789,7 +789,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or
  *	Should be called with the mm_sem of the vma hold.
  */
 struct page *
-alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr)
+alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 {
 	struct mempolicy *pol = get_vma_policy(current, vma, addr);
 
@@ -832,7 +832,7 @@ alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned l
  *	1) it's ok to take cpuset_sem (can WAIT), and
  *	2) allocating for current task (not interrupt).
  */
-struct page *alloc_pages_current(unsigned int __nocast gfp, unsigned order)
+struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 {
 	struct mempolicy *pol = current->mempolicy;
 
diff --git a/mm/mempool.c b/mm/mempool.c
index 65f2957b8d51..9e377ea700b2 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(mempool_create_node);
  * while this function is running. mempool_alloc() & mempool_free()
  * might be called (eg. from IRQ contexts) while this function executes.
  */
-int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask)
+int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
 {
 	void *element;
 	void **new_elements;
@@ -200,7 +200,7 @@ EXPORT_SYMBOL(mempool_destroy);
  * *never* fails when called from process contexts. (it might
  * fail if called from an IRQ context.)
  */
-void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask)
+void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
 {
 	void *element;
 	unsigned long flags;
@@ -276,7 +276,7 @@ EXPORT_SYMBOL(mempool_free);
 /*
  * A commonly used alloc and free fn.
  */
-void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data)
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
 {
 	kmem_cache_t *mem = (kmem_cache_t *) pool_data;
 	return kmem_cache_alloc(mem, gfp_mask);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8b8e05f07cdb..fa11d91242e8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1640,7 +1640,7 @@ static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 /*
  * Get rid of page table information in the indicated region.
  *
- * Called with the page table lock held.
+ * Called with the mm semaphore held.
  */
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e9fbd013ad9a..57577f63b305 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
 
 		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
-		if ((newflags & ~(newflags >> 4)) & 0xf) {
+		/* newflags >> 4 shift VM_MAY% in place of VM_% */
+		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
 			error = -EACCES;
 			goto out;
 		}
diff --git a/mm/mremap.c b/mm/mremap.c
index a32fed454bd7..f343fc73a8bd 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -141,10 +141,10 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 			if (dst) {
 				pte_t pte;
 				pte = ptep_clear_flush(vma, old_addr, src);
+
 				/* ZERO_PAGE can be dependant on virtual addr */
-				if (pfn_valid(pte_pfn(pte)) &&
-					pte_page(pte) == ZERO_PAGE(old_addr))
-					pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot));
+				pte = move_pte(pte, new_vma->vm_page_prot,
+							old_addr, new_addr);
 				set_pte_at(mm, new_addr, dst, pte);
 			} else
 				error = -ENOMEM;
diff --git a/mm/nommu.c b/mm/nommu.c
index 064d70442895..0ef241ae3763 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -157,8 +157,7 @@ void vfree(void *addr)
 	kfree(addr);
 }
 
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask,
-			pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
 	/*
 	 * kmalloc doesn't like __GFP_HIGHMEM for some reason
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ac3bf33e5370..d348b9035955 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -263,7 +263,7 @@ static struct mm_struct *oom_kill_process(struct task_struct *p)
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(unsigned int __nocast gfp_mask, int order)
+void out_of_memory(gfp_t gfp_mask, int order)
 {
 	struct mm_struct *mm = NULL;
 	task_t * p;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ae2903339e71..cc1fe2672a31 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -671,7 +671,7 @@ void fastcall free_cold_page(struct page *page)
 	free_hot_cold_page(page, 1);
 }
 
-static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	int i;
 
@@ -686,7 +686,7 @@ static inline void prep_zero_page(struct page *page, int order, unsigned int __n
  * or two.
  */
 static struct page *
-buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
 	struct page *page = NULL;
@@ -761,7 +761,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 }
 
 static inline int
-should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
+should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
 {
 	if (!z->reclaim_pages)
 		return 0;
@@ -774,7 +774,7 @@ should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page * fastcall
-__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
 		struct zonelist *zonelist)
 {
 	const int wait = gfp_mask & __GFP_WAIT;
@@ -977,7 +977,7 @@ EXPORT_SYMBOL(__alloc_pages);
 /*
  * Common helper functions.
  */
-fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)
+fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page * page;
 	page = alloc_pages(gfp_mask, order);
@@ -988,7 +988,7 @@ fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned
 
 EXPORT_SYMBOL(__get_free_pages);
 
-fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
+fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
 {
 	struct page * page;
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 2e605a19ce57..330e00d6db00 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -19,7 +19,7 @@
 #include <linux/writeback.h>
 #include <asm/pgtable.h>
 
-static struct bio *get_swap_bio(unsigned int __nocast gfp_flags, pgoff_t index,
+static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
 				struct page *page, bio_end_io_t end_io)
 {
 	struct bio *bio;
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f7aeb210c7b..ea064d89cda9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -921,8 +921,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
 }
 
 static inline struct page *
-shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info,
-				 unsigned long idx)
+shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
 {
 	return alloc_page(gfp | __GFP_ZERO);
 }
diff --git a/mm/slab.c b/mm/slab.c
index 437d3388054b..d05c678bceb3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
 #define	SIZE_L3 (1 + MAX_NUMNODES)
 
 /*
- * This function may be completely optimized away if
+ * This function must be completely optimized away if
  * a constant is passed to it. Mostly the same as
  * what is in linux/slab.h except it returns an
  * index.
  */
-static inline int index_of(const size_t size)
+static __always_inline int index_of(const size_t size)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -329,7 +329,8 @@ static inline int index_of(const size_t size)
 			extern void __bad_size(void);
 			__bad_size();
 		}
-	}
+	} else
+		BUG();
 	return 0;
 }
 
@@ -639,7 +640,7 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
 static void enable_cpucache (kmem_cache_t *cachep);
 static void cache_reap (void *unused);
 static int __node_shrink(kmem_cache_t *cachep, int node);
@@ -649,8 +650,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
 	return cachep->array[smp_processor_id()];
 }
 
-static inline kmem_cache_t *__find_general_cachep(size_t size,
-						unsigned int __nocast gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	struct cache_sizes *csizep = malloc_sizes;
 
@@ -674,8 +674,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size,
 	return csizep->cs_cachep;
 }
 
-kmem_cache_t *kmem_find_general_cachep(size_t size,
-		unsigned int __nocast gfpflags)
+kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
 {
 	return __find_general_cachep(size, gfpflags);
 }
@@ -804,7 +803,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
 
 	if (ac->avail) {
 		spin_lock(&rl3->list_lock);
-		free_block(cachep, ac->entry, ac->avail);
+		free_block(cachep, ac->entry, ac->avail, node);
 		ac->avail = 0;
 		spin_unlock(&rl3->list_lock);
 	}
@@ -925,7 +924,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
 			/* Free limit for this kmem_list3 */
 			l3->free_limit -= cachep->batchcount;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail);
+				free_block(cachep, nc->entry, nc->avail, node);
 
 			if (!cpus_empty(mask)) {
                                 spin_unlock(&l3->list_lock);
@@ -934,7 +933,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
 
 			if (l3->shared) {
 				free_block(cachep, l3->shared->entry,
-						l3->shared->avail);
+						l3->shared->avail, node);
 				kfree(l3->shared);
 				l3->shared = NULL;
 			}
@@ -1184,7 +1183,7 @@ __initcall(cpucache_init);
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	struct page *page;
 	void *addr;
@@ -1882,12 +1881,13 @@ static void do_drain(void *arg)
 {
 	kmem_cache_t *cachep = (kmem_cache_t*)arg;
 	struct array_cache *ac;
+	int node = numa_node_id();
 
 	check_irq_off();
 	ac = ac_data(cachep);
-	spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
-	free_block(cachep, ac->entry, ac->avail);
-	spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
+	spin_lock(&cachep->nodelists[node]->list_lock);
+	free_block(cachep, ac->entry, ac->avail, node);
+	spin_unlock(&cachep->nodelists[node]->list_lock);
 	ac->avail = 0;
 }
 
@@ -2046,7 +2046,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
 static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
-			int colour_off, unsigned int __nocast local_flags)
+			int colour_off, gfp_t local_flags)
 {
 	struct slab *slabp;
 	
@@ -2147,7 +2147,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	struct slab	*slabp;
 	void		*objp;
@@ -2354,7 +2354,7 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(kmem_cache_t *cachep, unsigned int __nocast flags)
+static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
@@ -2454,7 +2454,7 @@ alloc_done:
 }
 
 static inline void
-cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
+cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags)
 {
 	might_sleep_if(flags & __GFP_WAIT);
 #if DEBUG
@@ -2465,7 +2465,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
 #if DEBUG
 static void *
 cache_alloc_debugcheck_after(kmem_cache_t *cachep,
-			unsigned int __nocast flags, void *objp, void *caller)
+			gfp_t flags, void *objp, void *caller)
 {
 	if (!objp)	
 		return objp;
@@ -2508,16 +2508,12 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-
-static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
-	unsigned long save_flags;
 	void* objp;
 	struct array_cache *ac;
 
-	cache_alloc_debugcheck_before(cachep, flags);
-
-	local_irq_save(save_flags);
+	check_irq_off();
 	ac = ac_data(cachep);
 	if (likely(ac->avail)) {
 		STATS_INC_ALLOCHIT(cachep);
@@ -2527,6 +2523,18 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	return objp;
+}
+
+static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+{
+	unsigned long save_flags;
+	void* objp;
+
+	cache_alloc_debugcheck_before(cachep, flags);
+
+	local_irq_save(save_flags);
+	objp = ____cache_alloc(cachep, flags);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp,
 					__builtin_return_address(0));
@@ -2608,7 +2616,7 @@ done:
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
 {
 	int i;
 	struct kmem_list3 *l3;
@@ -2617,14 +2625,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
 		void *objp = objpp[i];
 		struct slab *slabp;
 		unsigned int objnr;
-		int nodeid = 0;
 
 		slabp = GET_PAGE_SLAB(virt_to_page(objp));
-		nodeid = slabp->nodeid;
-		l3 = cachep->nodelists[nodeid];
+		l3 = cachep->nodelists[node];
 		list_del(&slabp->list);
 		objnr = (objp - slabp->s_mem) / cachep->objsize;
-		check_spinlock_acquired_node(cachep, nodeid);
+		check_spinlock_acquired_node(cachep, node);
 		check_slabp(cachep, slabp);
 
 
@@ -2664,13 +2670,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
+	int node = numa_node_id();
 
 	batchcount = ac->batchcount;
 #if DEBUG
 	BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
 	check_irq_off();
-	l3 = cachep->nodelists[numa_node_id()];
+	l3 = cachep->nodelists[node];
 	spin_lock(&l3->list_lock);
 	if (l3->shared) {
 		struct array_cache *shared_array = l3->shared;
@@ -2686,7 +2693,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
 		}
 	}
 
-	free_block(cachep, ac->entry, batchcount);
+	free_block(cachep, ac->entry, batchcount, node);
 free_done:
 #if STATS
 	{
@@ -2751,7 +2758,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
 			} else {
 				spin_lock(&(cachep->nodelists[nodeid])->
 						list_lock);
-				free_block(cachep, &objp, 1);
+				free_block(cachep, &objp, 1, nodeid);
 				spin_unlock(&(cachep->nodelists[nodeid])->
 						list_lock);
 			}
@@ -2778,7 +2785,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
  * Allocate an object from this cache.  The flags are only relevant
  * if the cache has no available objects.
  */
-void *kmem_cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
 	return __cache_alloc(cachep, flags);
 }
@@ -2839,12 +2846,12 @@ out:
  * New and improved: it will now make sure that the object gets
  * put on the correct node list so that there is no false sharing.
  */
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
 	unsigned long save_flags;
 	void *ptr;
 
-	if (nodeid == numa_node_id() || nodeid == -1)
+	if (nodeid == -1)
 		return __cache_alloc(cachep, flags);
 
 	if (unlikely(!cachep->nodelists[nodeid])) {
@@ -2855,7 +2862,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
 
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
-	ptr = __cache_alloc_node(cachep, flags, nodeid);
+	if (nodeid == numa_node_id())
+		ptr = ____cache_alloc(cachep, flags);
+	else
+		ptr = __cache_alloc_node(cachep, flags, nodeid);
 	local_irq_restore(save_flags);
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0));
 
@@ -2863,7 +2873,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
-void *kmalloc_node(size_t size, unsigned int __nocast flags, int node)
+void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	kmem_cache_t *cachep;
 
@@ -2896,7 +2906,7 @@ EXPORT_SYMBOL(kmalloc_node);
  * platforms.  For example, on i386, it means that the memory must come
  * from the first 16MB.
  */
-void *__kmalloc(size_t size, unsigned int __nocast flags)
+void *__kmalloc(size_t size, gfp_t flags)
 {
 	kmem_cache_t *cachep;
 
@@ -2985,7 +2995,7 @@ EXPORT_SYMBOL(kmem_cache_free);
  * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  */
-void *kzalloc(size_t size, unsigned int __nocast flags)
+void *kzalloc(size_t size, gfp_t flags)
 {
 	void *ret = kmalloc(size, flags);
 	if (ret)
@@ -3079,7 +3089,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
 
 			if ((nc = cachep->nodelists[node]->shared))
 				free_block(cachep, nc->entry,
-							nc->avail);
+							nc->avail, node);
 
 			l3->shared = new;
 			if (!cachep->nodelists[node]->alien) {
@@ -3160,7 +3170,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
 		if (!ccold)
 			continue;
 		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail);
+		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
 		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
 		kfree(ccold);
 	}
@@ -3240,7 +3250,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
 		if (tofree > ac->avail) {
 			tofree = (ac->avail+1)/2;
 		}
-		free_block(cachep, ac->entry, tofree);
+		free_block(cachep, ac->entry, tofree, node);
 		ac->avail -= tofree;
 		memmove(ac->entry, &(ac->entry[tofree]),
 					sizeof(void*)*ac->avail);
@@ -3591,7 +3601,7 @@ unsigned int ksize(const void *objp)
  * @s: the string to duplicate
  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  */
-char *kstrdup(const char *s, unsigned int __nocast gfp)
+char *kstrdup(const char *s, gfp_t gfp)
 {
 	size_t len;
 	char *buf;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index adbc2b426c2f..132164f7d0a7 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -68,7 +68,7 @@ void show_swap_cache_info(void)
  * but sets SwapCache flag and private instead of mapping and index.
  */
 static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
-			       unsigned int __nocast gfp_mask)
+			       gfp_t gfp_mask)
 {
 	int error;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0184f510aace..1dcaeda039f4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1381,6 +1381,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 		error = bd_claim(bdev, sys_swapon);
 		if (error < 0) {
 			bdev = NULL;
+			error = -EINVAL;
 			goto bad_swap;
 		}
 		p->old_block_size = block_size(bdev);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 13c3d82968ae..1150229b6366 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -395,7 +395,7 @@ void *vmap(struct page **pages, unsigned int count,
 
 EXPORT_SYMBOL(vmap);
 
-void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 {
 	struct page **pages;
 	unsigned int nr_pages, array_size, i;
@@ -446,7 +446,7 @@ fail:
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  */
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
 	struct vm_struct *area;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0ea71e887bb6..64f9570cff56 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
 		 * PageDirty _after_ making sure that the page is freeable and
 		 * not in use by anybody. 	(pagecache + us == 2)
 		 */
-		if (page_count(page) != 2 || PageDirty(page)) {
-			write_unlock_irq(&mapping->tree_lock);
-			goto keep_locked;
-		}
+		if (unlikely(page_count(page) != 2))
+			goto cannot_free;
+		smp_rmb();
+		if (unlikely(PageDirty(page)))
+			goto cannot_free;
 
 #ifdef CONFIG_SWAP
 		if (PageSwapCache(page)) {
@@ -538,6 +539,10 @@ free_it:
 			__pagevec_release_nonlru(&freed_pvec);
 		continue;
 
+cannot_free:
+		write_unlock_irq(&mapping->tree_lock);
+		goto keep_locked;
+
 activate_locked:
 		SetPageActive(page);
 		pgactivate++;