diff options
author | Jonathan Corbet <corbet@lwn.net> | 2008-07-14 15:29:34 -0600 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2008-07-14 15:29:34 -0600 |
commit | 2fceef397f9880b212a74c418290ce69e7ac00eb (patch) | |
tree | d9cc09ab992825ef7fede4a688103503e3caf655 /mm | |
parent | feae1ef116ed381625d3731c5ae4f4ebcb3fa302 (diff) | |
parent | bce7f793daec3e65ec5c5705d2457b81fe7b5725 (diff) | |
download | linux-3.10-2fceef397f9880b212a74c418290ce69e7ac00eb.tar.gz linux-3.10-2fceef397f9880b212a74c418290ce69e7ac00eb.tar.bz2 linux-3.10-2fceef397f9880b212a74c418290ce69e7ac00eb.zip |
Merge commit 'v2.6.26' into bkl-removal
Diffstat (limited to 'mm')
-rw-r--r-- | mm/allocpercpu.c | 2 | ||||
-rw-r--r-- | mm/backing-dev.c | 12 | ||||
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 85 | ||||
-rw-r--r-- | mm/mempolicy.c | 6 | ||||
-rw-r--r-- | mm/migrate.c | 12 | ||||
-rw-r--r-- | mm/mmap.c | 12 | ||||
-rw-r--r-- | mm/nommu.c | 21 | ||||
-rw-r--r-- | mm/page_alloc.c | 43 | ||||
-rw-r--r-- | mm/pagewalk.c | 42 | ||||
-rw-r--r-- | mm/slab.c | 5 | ||||
-rw-r--r-- | mm/slob.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 23 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
17 files changed, 180 insertions, 104 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6ee..05f2b4009cc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -1,7 +1,7 @@ /* * linux/mm/allocpercpu.c * - * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> + * Separated from slab.c August 11, 2006 Christoph Lameter */ #include <linux/mm.h> #include <linux/module.h> diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7c4f9e09709..f2e574dbc30 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -172,30 +172,22 @@ postcore_initcall(bdi_class_init); int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { - char *name; va_list args; int ret = 0; struct device *dev; va_start(args, fmt); - name = kvasprintf(GFP_KERNEL, fmt, args); + dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); va_end(args); - - if (!name) - return -ENOMEM; - - dev = device_create(bdi_class, parent, MKDEV(0, 0), name); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto exit; } bdi->dev = dev; - dev_set_drvdata(bdi->dev, bdi); - bdi_debug_register(bdi, name); + bdi_debug_register(bdi, dev_name(dev)); exit: - kfree(name); return ret; } EXPORT_SYMBOL(bdi_register); diff --git a/mm/bootmem.c b/mm/bootmem.c index e8fb927392b..8d9f60e06f6 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, +int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { int ret; ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); if (ret < 0) - return; + return -ENOMEM; reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + + return 0; } void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bbf953eeb58..ab171274ef2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -785,7 +785,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, continue; spin_lock(&dst->page_table_lock); - spin_lock(&src->page_table_lock); + spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); if (!huge_pte_none(huge_ptep_get(src_pte))) { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); diff --git a/mm/memory.c b/mm/memory.c index fb5608a120e..2302d228fe0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -999,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, goto no_page_table; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!ptep) - goto out; pte = *ptep; if (!pte_present(pte)) - goto unlock; + goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; page = vm_normal_page(vma, address, pte); if (unlikely(!page)) - goto unlock; + goto bad_page; if (flags & FOLL_GET) get_page(page); @@ -1024,6 +1022,15 @@ unlock: out: return page; +bad_page: + pte_unmap_unlock(ptep, ptl); + return ERR_PTR(-EFAULT); + +no_page: + pte_unmap_unlock(ptep, ptl); + if (!pte_none(pte)) + return page; + /* Fall through to ZERO_PAGE handling */ no_page_table: /* * When core dumping an enormous anonymous area that nobody @@ -1038,6 +1045,26 @@ no_page_table: return page; } +/* Can we do the FOLL_ANON optimization? */ +static inline int use_zero_page(struct vm_area_struct *vma) +{ + /* + * We don't want to optimize FOLL_ANON for make_pages_present() + * when it tries to page in a VM_LOCKED region. As to VM_SHARED, + * we want to get the page from the page tables to make sure + * that we serialize and update with any other user of that + * mapping. + */ + if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) + return 0; + /* + * And if we have a fault or a nopfn routine, it's not an + * anonymous region. + */ + return !vma->vm_ops || + (!vma->vm_ops->fault && !vma->vm_ops->nopfn); +} + int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) @@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, foll_flags = FOLL_TOUCH; if (pages) foll_flags |= FOLL_GET; - if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->fault)) + if (!write && use_zero_page(vma)) foll_flags |= FOLL_ANON; do { @@ -1125,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, * be processed until returning to user space. */ if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) - return -ENOMEM; + return i ? i : -ENOMEM; if (write) foll_flags |= FOLL_WRITE; @@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, cond_resched(); } + if (IS_ERR(page)) + return i ? i : PTR_ERR(page); if (pages) { pages[i] = page; @@ -1669,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); - if (!old_page) + if (!old_page) { + /* + * VM_MIXEDMAP !pfn_valid() case + * + * We should not cow pages in a shared writeable mapping. + * Just mark the pages writable as we can't do any dirty + * accounting on raw pfn maps. + */ + if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED)) + goto reuse; goto gotten; + } /* * Take out anonymous pages first, anonymous shared vmas are @@ -1723,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, } if (reuse) { +reuse: flush_cache_page(vma, address, pte_pfn(orig_pte)); entry = pte_mkyoung(orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -1757,7 +1797,6 @@ gotten: page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { - page_remove_rmap(old_page, vma); if (!PageAnon(old_page)) { dec_mm_counter(mm, file_rss); inc_mm_counter(mm, anon_rss); @@ -1779,6 +1818,32 @@ gotten: lru_cache_add_active(new_page); page_add_new_anon_rmap(new_page, vma, address); + if (old_page) { + /* + * Only after switching the pte to the new page may + * we remove the mapcount here. Otherwise another + * process may come and find the rmap count decremented + * before the pte is switched to the new page, and + * "reuse" the old page writing into it while our pte + * here still points into it and can be read by other + * threads. + * + * The critical issue is to order this + * page_remove_rmap with the ptp_clear_flush above. + * Those stores are ordered by (if nothing else,) + * the barrier present in the atomic_add_negative + * in page_remove_rmap. + * + * Then the TLB flush in ptep_clear_flush ensures that + * no process can access the old page before the + * decremented mapcount is visible. And the old page + * cannot be reused until after the decremented + * mapcount is visible. So transitively, TLBs to + * old page will be flushed before it can be reused. + */ + page_remove_rmap(old_page, vma); + } + /* Free the old page.. */ new_page = old_page; ret |= VM_FAULT_WRITE; @@ -2295,8 +2360,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, vmf.flags = flags; vmf.page = NULL; - BUG_ON(vma->vm_flags & VM_PFNMAP); - ret = vma->vm_ops->fault(vma, &vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37a5034f63..c94e58b192c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, } else { *policy = pol == &default_policy ? MPOL_DEFAULT : pol->mode; - *policy |= pol->flags; + /* + * Internal mempolicy flags must be masked off before exposing + * the policy to userspace. + */ + *policy |= (pol->flags & MPOL_MODE_FLAGS); } if (vma) { diff --git a/mm/migrate.c b/mm/migrate.c index 449d77d409f..55bd355d170 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -9,7 +9,7 @@ * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> * Hirokazu Takahashi <taka@valinux.co.jp> * Dave Hansen <haveblue@us.ibm.com> - * Christoph Lameter <clameter@sgi.com> + * Christoph Lameter */ #include <linux/migrate.h> @@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); + + err = PTR_ERR(page); + if (IS_ERR(page)) + goto set_status; + err = -ENOENT; if (!page) goto set_status; @@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) goto set_status; page = follow_page(vma, pm->addr, 0); + + err = PTR_ERR(page); + if (IS_ERR(page)) + goto set_status; + err = -ENOENT; /* Use PageReserved to check for zero page */ if (!page || PageReserved(page)) diff --git a/mm/mmap.c b/mm/mmap.c index fac66337da2..3354fdd83d4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -atomic_t vm_committed_space = ATOMIC_INIT(0); +atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); /* * Check that a process has enough memory to allocate a new virtual @@ -177,7 +177,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * cast `allowed' as a signed long because vm_committed_space * sometimes has a negative value */ - if (atomic_read(&vm_committed_space) < (long)allowed) + if (atomic_long_read(&vm_committed_space) < (long)allowed) return 0; error: vm_unacct_memory(pages); @@ -245,10 +245,16 @@ asmlinkage unsigned long sys_brk(unsigned long brk) unsigned long rlim, retval; unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; + unsigned long min_brk; down_write(&mm->mmap_sem); - if (brk < mm->start_brk) +#ifdef CONFIG_COMPAT_BRK + min_brk = mm->end_code; +#else + min_brk = mm->start_brk; +#endif + if (brk < min_brk) goto out; /* diff --git a/mm/nommu.c b/mm/nommu.c index ef8c62cec69..4462b6a3fcb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -39,7 +39,7 @@ struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; unsigned long askedalloc, realalloc; -atomic_t vm_committed_space = ATOMIC_INIT(0); +atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; @@ -109,16 +109,23 @@ unsigned int kobjsize(const void *objp) * If the object we have should not have ksize performed on it, * return size of 0 */ - if (!objp || (unsigned long)objp >= memory_end || !((page = virt_to_page(objp)))) + if (!objp || !virt_addr_valid(objp)) return 0; + page = virt_to_head_page(objp); + + /* + * If the allocator sets PageSlab, we know the pointer came from + * kmalloc(). + */ if (PageSlab(page)) return ksize(objp); - BUG_ON(page->index < 0); - BUG_ON(page->index >= MAX_ORDER); - - return (PAGE_SIZE << page->index); + /* + * The ksize() function is only guaranteed to work for pointers + * returned by kmalloc(). So handle arbitrary pointers here. + */ + return PAGE_SIZE << compound_order(page); } /* @@ -1410,7 +1417,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * cast `allowed' as a signed long because vm_committed_space * sometimes has a negative value */ - if (atomic_read(&vm_committed_space) < (long)allowed) + if (atomic_long_read(&vm_committed_space) < (long)allowed) return 0; error: vm_unacct_memory(pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 63835579323..f32fae3121f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -237,16 +237,7 @@ static void bad_page(struct page *page) printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" KERN_EMERG "Backtrace:\n"); dump_stack(); - page->flags &= ~(1 << PG_lru | - 1 << PG_private | - 1 << PG_locked | - 1 << PG_active | - 1 << PG_dirty | - 1 << PG_reclaim | - 1 << PG_slab | - 1 << PG_swapcache | - 1 << PG_writeback | - 1 << PG_buddy ); + page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD; set_page_count(page, 0); reset_page_mapcount(page); page->mapping = NULL; @@ -463,16 +454,7 @@ static inline int free_pages_check(struct page *page) (page->mapping != NULL) | (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | - (page->flags & ( - 1 << PG_lru | - 1 << PG_private | - 1 << PG_locked | - 1 << PG_active | - 1 << PG_slab | - 1 << PG_swapcache | - 1 << PG_writeback | - 1 << PG_reserved | - 1 << PG_buddy )))) + (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) bad_page(page); if (PageDirty(page)) __ClearPageDirty(page); @@ -616,17 +598,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) (page->mapping != NULL) | (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | - (page->flags & ( - 1 << PG_lru | - 1 << PG_private | - 1 << PG_locked | - 1 << PG_active | - 1 << PG_dirty | - 1 << PG_slab | - 1 << PG_swapcache | - 1 << PG_writeback | - 1 << PG_reserved | - 1 << PG_buddy )))) + (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) bad_page(page); /* @@ -1396,6 +1368,9 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); + if (!preferred_zone) + return NULL; + classzone_idx = zone_idx(preferred_zone); zonelist_scan: @@ -2353,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat) static void build_zonelist_cache(pg_data_t *pgdat) { pgdat->node_zonelists[0].zlcache_ptr = NULL; - pgdat->node_zonelists[1].zlcache_ptr = NULL; } #endif /* CONFIG_NUMA */ @@ -2804,7 +2778,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) alloc_size = zone->wait_table_hash_nr_entries * sizeof(wait_queue_head_t); - if (system_state == SYSTEM_BOOTING) { + if (!slab_is_available()) { zone->wait_table = (wait_queue_head_t *) alloc_bootmem_node(pgdat, alloc_size); } else { @@ -3378,7 +3352,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ - memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; + memmap_pages = + PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; printk(KERN_DEBUG diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 0afd2387e50..d5878bed784 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -3,14 +3,14 @@ #include <linux/sched.h> static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pte_t *pte; int err = 0; pte = pte_offset_map(pmd, addr); for (;;) { - err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); + err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) break; addr += PAGE_SIZE; @@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, } static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pmd_t *pmd; unsigned long next; @@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry) - err = walk->pmd_entry(pmd, addr, next, private); + err = walk->pmd_entry(pmd, addr, next, walk); if (!err && walk->pte_entry) - err = walk_pte_range(pmd, addr, next, walk, private); + err = walk_pte_range(pmd, addr, next, walk); if (err) break; } while (pmd++, addr = next, addr != end); @@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, } static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) + struct mm_walk *walk) { pud_t *pud; unsigned long next; @@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pud_entry) - err = walk->pud_entry(pud, addr, next, private); + err = walk->pud_entry(pud, addr, next, walk); if (!err && (walk->pmd_entry || walk->pte_entry)) - err = walk_pmd_range(pud, addr, next, walk, private); + err = walk_pmd_range(pud, addr, next, walk); if (err) break; } while (pud++, addr = next, addr != end); @@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, * @addr: starting address * @end: ending address * @walk: set of callbacks to invoke for each level of the tree - * @private: private data passed to the callback function * * Recursively walk the page table for the memory area in a VMA, * calling supplied callbacks. Callbacks are called in-order (first * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, * etc.). If lower-level callbacks are omitted, walking depth is reduced. * - * Each callback receives an entry pointer, the start and end of the - * associated range, and a caller-supplied private data pointer. + * Each callback receives an entry pointer and the start and end of the + * associated range, and a copy of the original mm_walk for access to + * the ->private or ->mm fields. * * No locks are taken, but the bottom level iterator will map PTE * directories from highmem if necessary. @@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. */ -int walk_page_range(const struct mm_struct *mm, - unsigned long addr, unsigned long end, - const struct mm_walk *walk, void *private) +int walk_page_range(unsigned long addr, unsigned long end, + struct mm_walk *walk) { pgd_t *pgd; unsigned long next; @@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm, if (addr >= end) return err; - pgd = pgd_offset(mm, addr); + if (!walk->mm) + return -EINVAL; + + pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) - err = walk->pte_hole(addr, next, private); + err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pgd_entry) - err = walk->pgd_entry(pgd, addr, next, private); + err = walk->pgd_entry(pgd, addr, next, walk); if (!err && (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) - err = walk_pud_range(pgd, addr, next, walk, private); + err = walk_pud_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); diff --git a/mm/slab.c b/mm/slab.c index 06236e4ddc1..046607f05f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3263,9 +3263,12 @@ retry: if (cpuset_zone_allowed_hardwall(zone, flags) && cache->nodelists[nid] && - cache->nodelists[nid]->free_objects) + cache->nodelists[nid]->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); + if (obj) + break; + } } if (!obj) { diff --git a/mm/slob.c b/mm/slob.c index 6038cbadf79..a3ad6671adf 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -469,8 +469,9 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) return ZERO_SIZE_PTR; m = slob_alloc(size + align, gfp, align, node); - if (m) - *m = size; + if (!m) + return NULL; + *m = size; return (void *)m + align; } else { void *ret; diff --git a/mm/slub.c b/mm/slub.c index a505a828ef4..315c392253c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5,7 +5,7 @@ * The allocator synchronizes using per slab locks and only * uses a centralized lock to manage a pool of partial slabs. * - * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> + * (C) 2007 SGI, Christoph Lameter */ #include <linux/mm.h> @@ -1628,9 +1628,11 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; + unsigned int objsize; local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); + objsize = c->objsize; if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1643,7 +1645,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_restore(flags); if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, c->objsize); + memset(object, 0, objsize); return object; } @@ -2726,9 +2728,10 @@ size_t ksize(const void *object) page = virt_to_head_page(object); - if (unlikely(!PageSlab(page))) + if (unlikely(!PageSlab(page))) { + WARN_ON(!PageCompound(page)); return PAGE_SIZE << compound_order(page); - + } s = page->slab; #ifdef CONFIG_SLUB_DEBUG @@ -2994,8 +2997,6 @@ void __init kmem_cache_init(void) create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_KERNEL); caches++; - } - if (KMALLOC_MIN_SIZE <= 128) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_KERNEL); caches++; @@ -3025,6 +3026,16 @@ void __init kmem_cache_init(void) for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + if (KMALLOC_MIN_SIZE == 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[(i - 1) / 8] = 8; + } + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a..a91b5f8fcaf 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -1,7 +1,7 @@ /* * Virtual Memory Map support * - * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>. + * (C) 2007 sgi. Christoph Lameter. * * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn, * virt_to_page, page_address() to be implemented as a base offset diff --git a/mm/swap.c b/mm/swap.c index 91e194445a5..45c9f25a8a3 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -503,7 +503,7 @@ void vm_acct_memory(long pages) local = &__get_cpu_var(committed_space); *local += pages; if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_add(*local, &vm_committed_space); + atomic_long_add(*local, &vm_committed_space); *local = 0; } preempt_enable(); @@ -520,7 +520,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, committed = &per_cpu(committed_space, (long)hcpu); if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - atomic_add(*committed, &vm_committed_space); + atomic_long_add(*committed, &vm_committed_space); *committed = 0; drain_cpu_pagevecs((long)hcpu); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 9a29901ad3b..967d30ccd92 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1307,7 +1307,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int priority; - int ret = 0; + unsigned long ret = 0; unsigned long total_scanned = 0; unsigned long nr_reclaimed = 0; struct reclaim_state *reclaim_state = current->reclaim_state; |