diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 25 | ||||
-rw-r--r-- | mm/mlock.c | 47 | ||||
-rw-r--r-- | mm/mmap.c | 79 | ||||
-rw-r--r-- | mm/shmem.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 5 |
5 files changed, 65 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4d0ea3ceba6..8e4be9cb2a6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = { static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, struct page_cgroup *pc, @@ -1684,7 +1685,7 @@ move_account: /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); ret = 0; - for_each_node_state(node, N_POSSIBLE) { + for_each_node_state(node, N_HIGH_MEMORY) { for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { enum lru_list l; for_each_lru(l) { @@ -2193,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem) static void mem_cgroup_put(struct mem_cgroup *mem) { - if (atomic_dec_and_test(&mem->refcnt)) + if (atomic_dec_and_test(&mem->refcnt)) { + struct mem_cgroup *parent = parent_mem_cgroup(mem); __mem_cgroup_free(mem); + if (parent) + mem_cgroup_put(parent); + } } +/* + * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. + */ +static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) +{ + if (!mem->res.parent) + return NULL; + return mem_cgroup_from_res_counter(mem->res.parent, res); +} #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP static void __init enable_swap_cgroup(void) @@ -2235,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) if (parent && parent->use_hierarchy) { res_counter_init(&mem->res, &parent->res); res_counter_init(&mem->memsw, &parent->memsw); + /* + * We increment refcnt of the parent to ensure that we can + * safely access it on res_counter_charge/uncharge. + * This refcnt will be decremented when freeing this + * mem_cgroup(see mem_cgroup_put). + */ + mem_cgroup_get(parent); } else { res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); diff --git a/mm/mlock.c b/mm/mlock.c index 2904a347e47..028ec482fdd 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval) * * return number of pages [> 0] to be removed from locked_vm on success * of "special" vmas. - * - * return negative error if vma spanning @start-@range disappears while - * mmap semaphore is dropped. Unlikely? */ long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct mm_struct *mm = vma->vm_mm; int nr_pages = (end - start) / PAGE_SIZE; BUG_ON(!(vma->vm_flags & VM_LOCKED)); @@ -314,20 +310,8 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))) { - long error; - downgrade_write(&mm->mmap_sem); - - error = __mlock_vma_pages_range(vma, start, end, 1); - up_read(&mm->mmap_sem); - /* vma can change or disappear */ - down_write(&mm->mmap_sem); - vma = find_vma(mm, start); - /* non-NULL vma must contain @start, but need to check @end */ - if (!vma || end > vma->vm_end) - return -ENOMEM; - - return 0; /* hide other errors from mmap(), et al */ + return __mlock_vma_pages_range(vma, start, end, 1); } /* @@ -438,41 +422,14 @@ success: vma->vm_flags = newflags; if (lock) { - /* - * mmap_sem is currently held for write. Downgrade the write - * lock to a read lock so that other faults, mmap scans, ... - * while we fault in all pages. - */ - downgrade_write(&mm->mmap_sem); - ret = __mlock_vma_pages_range(vma, start, end, 1); - /* - * Need to reacquire mmap sem in write mode, as our callers - * expect this. We have no support for atomically upgrading - * a sem to write, so we need to check for ranges while sem - * is unlocked. - */ - up_read(&mm->mmap_sem); - /* vma can change or disappear */ - down_write(&mm->mmap_sem); - *prev = find_vma(mm, start); - /* non-NULL *prev must contain @start, but need to check @end */ - if (!(*prev) || end > (*prev)->vm_end) - ret = -ENOMEM; - else if (ret > 0) { + if (ret > 0) { mm->locked_vm -= ret; ret = 0; } else ret = __mlock_posix_error_return(ret); /* translate if needed */ } else { - /* - * TODO: for unlocking, pages will already be resident, so - * we don't need to wait for allocations/reclaim/pagein, ... - * However, unlocking a very large region can still take a - * while. Should we downgrade the semaphore for both lock - * AND unlock ? - */ __mlock_vma_pages_range(vma, start, end, 0); } diff --git a/mm/mmap.c b/mm/mmap.c index 8d95902e9a3..214b6a258ee 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end); validate_mm(mm); } +/* Flags that can be inherited from an existing mapping when merging */ +#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR) + /* * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. @@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - if (vma->vm_flags != vm_flags) + if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS) return 0; if (vma->vm_file != file) return 0; @@ -1087,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma) mapping_cap_account_dirty(vma->vm_file->f_mapping); } +/* + * We account for memory if it's a private writeable mapping, + * and VM_NORESERVE wasn't set. + */ +static inline int accountable_mapping(unsigned int vm_flags) +{ + return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; +} + unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, unsigned int vm_flags, unsigned long pgoff, @@ -1114,36 +1126,32 @@ munmap_back: if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; - if (flags & MAP_NORESERVE) + /* + * Set 'VM_NORESERVE' if we should not account for the + * memory use of this mapping. We only honor MAP_NORESERVE + * if we're allowed to overcommit memory. + */ + if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) + vm_flags |= VM_NORESERVE; + if (!accountable) vm_flags |= VM_NORESERVE; - if (accountable && (!(flags & MAP_NORESERVE) || - sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { - if (vm_flags & VM_SHARED) { - /* Check memory availability in shmem_file_setup? */ - vm_flags |= VM_ACCOUNT; - } else if (vm_flags & VM_WRITE) { - /* - * Private writable mapping: check memory availability - */ - charged = len >> PAGE_SHIFT; - if (security_vm_enough_memory(charged)) - return -ENOMEM; - vm_flags |= VM_ACCOUNT; - } + /* + * Private writable mapping: check memory availability + */ + if (accountable_mapping(vm_flags)) { + charged = len >> PAGE_SHIFT; + if (security_vm_enough_memory(charged)) + return -ENOMEM; + vm_flags |= VM_ACCOUNT; } /* - * Can we just expand an old private anonymous mapping? - * The VM_SHARED test is necessary because shmem_zero_setup - * will create the file object for a shared anonymous map below. + * Can we just expand an old mapping? */ - if (!file && !(vm_flags & VM_SHARED)) { - vma = vma_merge(mm, prev, addr, addr + len, vm_flags, - NULL, NULL, pgoff, NULL); - if (vma) - goto out; - } + vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL); + if (vma) + goto out; /* * Determine the object being mapped and call the appropriate @@ -1186,14 +1194,6 @@ munmap_back: goto free_vma; } - /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform - * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) - * that memory reservation must be checked; but that reservation - * belongs to shared memory object, not to vma: so now clear it. - */ - if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT)) - vma->vm_flags &= ~VM_ACCOUNT; - /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their @@ -1206,17 +1206,8 @@ munmap_back: if (vma_wants_writenotify(vma)) vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); - if (file && vma_merge(mm, prev, addr, vma->vm_end, - vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { - mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); - fput(file); - if (vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(mm); - } else { - vma_link(mm, vma, prev, rb_link, rb_parent); - file = vma->vm_file; - } + vma_link(mm, vma, prev, rb_link, rb_parent); + file = vma->vm_file; /* Once vma denies write, undo our temporary denial count */ if (correct_wcount) diff --git a/mm/shmem.c b/mm/shmem.c index 5d0de96c978..19d566ccdee 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) goto close_file; #ifdef CONFIG_SHMEM - SHMEM_I(inode)->flags = flags & VM_ACCOUNT; + SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT; #endif d_instantiate(dentry, inode); inode->i_size = size; diff --git a/mm/swapfile.c b/mm/swapfile.c index f48b831e5e5..7e6304dfafa 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -698,8 +698,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, pte_t *pte; int ret = 1; - if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) + if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) { ret = -ENOMEM; + goto out_nolock; + } pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { @@ -723,6 +725,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, activate_page(page); out: pte_unmap_unlock(pte, ptl); +out_nolock: return ret; } |