diff options
38 files changed, 455 insertions, 169 deletions
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 5f502bf68fbc..ff456871bf4b 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -92,6 +92,13 @@ Brief summary of control files. memory.oom_control set/show oom controls. memory.numa_stat show the number of memory usage per numa node + memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel + memory hard limit. Kernel hard limit is not + supported since 5.16. Writing any value to + do file will not have any effect same as if + nokmem kernel parameter was specified. + Kernel memory is still charged and reported + by memory.kmem.usage_in_bytes. memory.kmem.usage_in_bytes show current kernel memory allocation memory.kmem.failcnt show the number of kernel memory usage hits limits diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index f43a38ac1779..2ddc33d93b13 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -28,7 +28,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 9c52718ea750..13fd592228b1 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -241,15 +241,8 @@ static void clear_flush(struct mm_struct *mm, flush_tlb_range(&vma, saddr, addr); } -static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) -{ - VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); - - return page_folio(pfn_to_page(swp_offset_pfn(entry))); -} - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { size_t pgsize; int i; @@ -257,13 +250,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long pfn, dpfn; pgprot_t hugeprot; - if (!pte_present(pte)) { - struct folio *folio; - - folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte)); - ncontig = num_contig_ptes(folio_size(folio), &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); - for (i = 0; i < ncontig; i++, ptep++) + if (!pte_present(pte)) { + for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) set_pte_at(mm, addr, ptep, pte); return; } @@ -273,7 +263,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, return; } - ncontig = find_num_contig(mm, addr, ptep, &pgsize); pfn = pte_pfn(pte); dpfn = pgsize >> PAGE_SHIFT; hugeprot = pte_pgprot(pte); @@ -571,5 +560,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index f7f078c2872c..72daacc472a0 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -6,7 +6,7 @@ #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index a8a1a7c1e16e..a9f7e21f6656 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -140,7 +140,7 @@ static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, unsigned long sz) { __set_huge_pte_at(mm, addr, ptep, entry); } diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index de092b04ee1a..92df40c6cc6b 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -46,7 +46,8 @@ static inline int check_and_get_huge_psize(int shift) } #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index 3bc0eb21b2a0..5a2e512e96db 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -143,11 +143,14 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { + unsigned long psize; if (radix_enabled()) return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + + psize = huge_page_size(hstate_vma(vma)); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } void __init hugetlbpage_init_defaultsize(void) diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index 17075c78d4bc..35fd2a95be24 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -47,6 +47,7 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, pte_t old_pte, pte_t pte) { struct mm_struct *mm = vma->vm_mm; + unsigned long psize = huge_page_size(hstate_vma(vma)); /* * POWER9 NMMU must flush the TLB after clearing the PTE before @@ -58,5 +59,5 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index dbbfe897455d..a642a7929892 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -91,7 +91,8 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) return -EINVAL; - set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + set_huge_pte_at(&init_mm, va, ptep, + pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize); return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3f86fd217690..3ba9fe411604 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -288,7 +288,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, } #if defined(CONFIG_PPC_8xx) -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz) { pmd_t *pmd = pmd_off(mm, addr); pte_basic_t val; diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 34e24f078cc1..4c5b0e929890 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -18,7 +18,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pte); + unsigned long addr, pte_t *ptep, pte_t pte, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 96225a8533ad..e4a2ace92dbe 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -180,7 +180,8 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte) + pte_t pte, + unsigned long sz) { int i, pte_num; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index f07267875a19..deb198a61039 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -16,6 +16,8 @@ #define hugepages_supported() (MACHINE_HAS_EDAT1) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, @@ -65,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(ptep), pte); if (changed) { huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; } @@ -74,7 +76,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c718f2a0de94..297a6d897d5a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -142,7 +142,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) __storage_key_init_range(paddr, paddr + size - 1); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { unsigned long rste; @@ -163,6 +163,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, __pte(rste)); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, pte); +} + pte_t huge_ptep_get(pte_t *ptep) { return __rste_to_pte(pte_val(*ptep)); diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 0a26cca24232..c714ca6a05aa 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -14,6 +14,8 @@ extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end; #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR @@ -32,7 +34,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t old_pte = *ptep; - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -42,7 +44,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, { int changed = !pte_same(*ptep, pte); if (changed) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); flush_tlb_page(vma, addr); } return changed; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d7018823206c..b432500c13a5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -328,7 +328,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return pte_offset_huge(pmd, addr); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { unsigned int nptes, orig_shift, shift; @@ -364,6 +364,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, orig_shift); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, entry); +} + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d6ad98ca1288..e02b179ec659 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -955,6 +955,14 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } +static inline pte_t pte_next_pfn(pte_t pte) +{ + if (__pte_needs_invert(pte_val(pte))) + return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#define pte_next_pfn pte_next_pfn + static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 43b2a2851ba3..206812ce544a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -345,10 +345,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) /* there's now no turning back... the old userspace image is dead, * defunct, deceased, etc. */ + SET_PERSONALITY(exec_params.hdr); if (elf_check_fdpic(&exec_params.hdr)) - set_personality(PER_LINUX_FDPIC); - else - set_personality(PER_LINUX); + current->personality |= PER_LINUX_FDPIC; if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 48fe71d309cb..8beb2730929d 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, struct the_nilfs *nilfs = inode->i_sb->s_fs_info; err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn); - if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ - brelse(bh); + if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */ goto failed; - } } lock_buffer(bh); @@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, failed: unlock_page(bh->b_page); put_page(bh->b_page); + if (unlikely(err)) + brelse(bh); return err; } diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 4da02798a00b..6dcf4d576970 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -76,7 +76,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { set_pte_at(mm, addr, ptep, pte); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5b2626063f4f..a30686e649f7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -984,7 +984,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1173,7 +1175,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e41c70ac7744..d01e850b570f 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -428,6 +428,8 @@ struct ma_wr_state { #define MAS_ROOT ((struct maple_enode *)5UL) #define MAS_NONE ((struct maple_enode *)9UL) #define MAS_PAUSE ((struct maple_enode *)17UL) +#define MAS_OVERFLOW ((struct maple_enode *)33UL) +#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) @@ -511,6 +513,15 @@ static inline bool mas_is_paused(const struct ma_state *mas) return mas->node == MAS_PAUSE; } +/* Check if the mas is pointing to a node or not */ +static inline bool mas_is_active(struct ma_state *mas) +{ + if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) + return true; + + return false; +} + /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1fba072b3dac..af7639c3b0a3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -206,6 +206,14 @@ static inline int pmd_young(pmd_t pmd) #endif #ifndef set_ptes + +#ifndef pte_next_pfn +static inline pte_t pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#endif + /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -231,7 +239,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, if (--nr == 0) break; ptep++; - pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + pte = pte_next_pfn(pte); } arch_leave_lazy_mmu_mode(); } diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 03a7932cde0a..2f675ef045d4 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -740,6 +740,17 @@ subsys_initcall(crash_notes_memory_init); #define pr_fmt(fmt) "crash hp: " fmt /* + * Different than kexec/kdump loading/unloading/jumping/shrinking which + * usually rarely happen, there will be many crash hotplug events notified + * during one short period, e.g one memory board is hot added and memory + * regions are online. So mutex lock __crash_hotplug_lock is used to + * serialize the crash hotplug handling specifically. + */ +DEFINE_MUTEX(__crash_hotplug_lock); +#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) +#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) + +/* * This routine utilized when the crash_hotplug sysfs node is read. * It reflects the kernel's ability/permission to update the crash * elfcorehdr directly. @@ -748,9 +759,11 @@ int crash_check_update_elfcorehdr(void) { int rc = 0; + crash_hotplug_lock(); /* Obtain lock while reading crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return 0; } if (kexec_crash_image) { @@ -761,6 +774,7 @@ int crash_check_update_elfcorehdr(void) } /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); return rc; } @@ -783,9 +797,11 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) { struct kimage *image; + crash_hotplug_lock(); /* Obtain lock while changing crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return; } @@ -852,6 +868,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) out: /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); } static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ee1ff0c59fd7..0e00a84e8e8f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -256,6 +256,22 @@ bool mas_is_err(struct ma_state *mas) return xa_is_err(mas->node); } +static __always_inline bool mas_is_overflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_OVERFLOW)) + return true; + + return false; +} + +static __always_inline bool mas_is_underflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_UNDERFLOW)) + return true; + + return false; +} + static inline bool mas_searchable(struct ma_state *mas) { if (mas_is_none(mas)) @@ -4415,10 +4431,13 @@ no_entry: * * @mas: The maple state * @max: The minimum starting range + * @empty: Can be empty + * @set_underflow: Set the @mas->node to underflow state on limit. * * Return: The entry in the previous slot which is possibly NULL */ -static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) +static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, + bool set_underflow) { void *entry; void __rcu **slots; @@ -4435,7 +4454,6 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->min <= min) { pivot = mas_safe_min(mas, pivots, mas->offset); @@ -4443,9 +4461,10 @@ again: goto retry; if (pivot <= min) - return NULL; + goto underflow; } +again: if (likely(mas->offset)) { mas->offset--; mas->last = mas->index - 1; @@ -4457,7 +4476,7 @@ again: } if (mas_is_none(mas)) - return NULL; + goto underflow; mas->last = mas->max; node = mas_mn(mas); @@ -4474,10 +4493,19 @@ again: if (likely(entry)) return entry; - if (!empty) + if (!empty) { + if (mas->index <= min) + goto underflow; + goto again; + } return entry; + +underflow: + if (set_underflow) + mas->node = MAS_UNDERFLOW; + return NULL; } /* @@ -4567,10 +4595,13 @@ no_entry: * @mas: The maple state * @max: The maximum starting range * @empty: Can be empty + * @set_overflow: Should @mas->node be set to overflow when the limit is + * reached. * * Return: The entry in the next slot which is possibly NULL */ -static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) +static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, + bool set_overflow) { void __rcu **slots; unsigned long *pivots; @@ -4589,22 +4620,22 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->max >= max) { if (likely(mas->offset < data_end)) pivot = pivots[mas->offset]; else - return NULL; /* must be mas->max */ + goto overflow; if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; if (pivot >= max) - return NULL; + goto overflow; } if (likely(mas->offset < data_end)) { mas->index = pivots[mas->offset] + 1; +again: mas->offset++; if (likely(mas->offset < data_end)) mas->last = pivots[mas->offset]; @@ -4616,8 +4647,11 @@ again: goto retry; } - if (mas_is_none(mas)) + if (WARN_ON_ONCE(mas_is_none(mas))) { + mas->node = MAS_OVERFLOW; return NULL; + goto overflow; + } mas->offset = 0; mas->index = mas->min; @@ -4636,12 +4670,20 @@ again: return entry; if (!empty) { - if (!mas->offset) - data_end = 2; + if (mas->last >= max) + goto overflow; + + mas->index = mas->last + 1; + /* Node cannot end on NULL, so it's safe to short-cut here */ goto again; } return entry; + +overflow: + if (set_overflow) + mas->node = MAS_OVERFLOW; + return NULL; } /* @@ -4651,17 +4693,20 @@ again: * * Set the @mas->node to the next entry and the range_start to * the beginning value for the entry. Does not check beyond @limit. - * Sets @mas->index and @mas->last to the limit if it is hit. + * Sets @mas->index and @mas->last to the range, Does not update @mas->index and + * @mas->last on overflow. * Restarts on dead nodes. * * Return: the next entry or %NULL. */ static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) { - if (mas->last >= limit) + if (mas->last >= limit) { + mas->node = MAS_OVERFLOW; return NULL; + } - return mas_next_slot(mas, limit, false); + return mas_next_slot(mas, limit, false, true); } /* @@ -4837,7 +4882,7 @@ void *mas_walk(struct ma_state *mas) { void *entry; - if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas)) + if (!mas_is_active(mas) || !mas_is_start(mas)) mas->node = MAS_START; retry: entry = mas_state_walk(mas); @@ -5294,14 +5339,22 @@ static inline void mte_destroy_walk(struct maple_enode *enode, static void mas_wr_store_setup(struct ma_wr_state *wr_mas) { - if (mas_is_start(wr_mas->mas)) - return; + if (!mas_is_active(wr_mas->mas)) { + if (mas_is_start(wr_mas->mas)) + return; - if (unlikely(mas_is_paused(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_paused(wr_mas->mas))) + goto reset; - if (unlikely(mas_is_none(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_none(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_overflow(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_underflow(wr_mas->mas))) + goto reset; + } /* * A less strict version of mas_is_span_wr() where we allow spanning @@ -5595,8 +5648,25 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, { bool was_none = mas_is_none(mas); - if (mas_is_none(mas) || mas_is_paused(mas)) + if (unlikely(mas->last >= max)) { + mas->node = MAS_OVERFLOW; + return true; + } + + if (mas_is_active(mas)) + return false; + + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_overflow(mas)) { + /* Overflowed before, but the max changed */ mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } if (mas_is_start(mas)) *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ @@ -5615,6 +5685,7 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, if (mas_is_none(mas)) return true; + return false; } @@ -5637,7 +5708,7 @@ void *mas_next(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, true); } EXPORT_SYMBOL_GPL(mas_next); @@ -5660,7 +5731,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, true); } EXPORT_SYMBOL_GPL(mas_next_range); @@ -5691,18 +5762,31 @@ EXPORT_SYMBOL_GPL(mt_next); static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) { - if (mas->index <= min) - goto none; + if (unlikely(mas->index <= min)) { + mas->node = MAS_UNDERFLOW; + return true; + } - if (mas_is_none(mas) || mas_is_paused(mas)) + if (mas_is_active(mas)) + return false; + + if (mas_is_overflow(mas)) { mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } - if (mas_is_start(mas)) { - mas_walk(mas); - if (!mas->index) - goto none; + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + /* underflowed before but the min changed */ + mas->node = MAS_START; } + if (mas_is_start(mas)) + mas_walk(mas); + if (unlikely(mas_is_ptr(mas))) { if (!mas->index) goto none; @@ -5747,7 +5831,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, true); } EXPORT_SYMBOL_GPL(mas_prev); @@ -5770,7 +5854,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, true); } EXPORT_SYMBOL_GPL(mas_prev_range); @@ -5828,24 +5912,35 @@ EXPORT_SYMBOL_GPL(mas_pause); static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) { - *entry = NULL; + if (mas_is_active(mas)) { + if (mas->last < max) + return false; - if (unlikely(mas_is_none(mas))) { + return true; + } + + if (mas_is_paused(mas)) { if (unlikely(mas->last >= max)) return true; - mas->index = mas->last; + mas->index = ++mas->last; mas->node = MAS_START; - } else if (unlikely(mas_is_paused(mas))) { + } else if (mas_is_none(mas)) { if (unlikely(mas->last >= max)) return true; + mas->index = mas->last; mas->node = MAS_START; - mas->index = ++mas->last; - } else if (unlikely(mas_is_ptr(mas))) - goto ptr_out_of_range; + } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { + if (mas->index > max) { + mas->node = MAS_OVERFLOW; + return true; + } + + mas->node = MAS_START; + } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index > max) return true; @@ -5895,7 +5990,7 @@ void *mas_find(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, false); } EXPORT_SYMBOL_GPL(mas_find); @@ -5913,13 +6008,13 @@ EXPORT_SYMBOL_GPL(mas_find); */ void *mas_find_range(struct ma_state *mas, unsigned long max) { - void *entry; + void *entry = NULL; if (mas_find_setup(mas, max, &entry)) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, false); } EXPORT_SYMBOL_GPL(mas_find_range); @@ -5934,26 +6029,36 @@ EXPORT_SYMBOL_GPL(mas_find_range); static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, void **entry) { - *entry = NULL; - - if (unlikely(mas_is_none(mas))) { - if (mas->index <= min) - goto none; + if (mas_is_active(mas)) { + if (mas->index > min) + return false; - mas->last = mas->index; - mas->node = MAS_START; + return true; } - if (unlikely(mas_is_paused(mas))) { + if (mas_is_paused(mas)) { if (unlikely(mas->index <= min)) { mas->node = MAS_NONE; return true; } mas->node = MAS_START; mas->last = --mas->index; + } else if (mas_is_none(mas)) { + if (mas->index <= min) + goto none; + + mas->last = mas->index; + mas->node = MAS_START; + } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { + if (mas->last <= min) { + mas->node = MAS_UNDERFLOW; + return true; + } + + mas->node = MAS_START; } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index < min) return true; @@ -6004,13 +6109,13 @@ none: */ void *mas_find_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, false); } EXPORT_SYMBOL_GPL(mas_find_rev); @@ -6030,13 +6135,13 @@ EXPORT_SYMBOL_GPL(mas_find_rev); */ void *mas_find_range_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, false); } EXPORT_SYMBOL_GPL(mas_find_range_rev); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 0674aebd4423..06959165e2f9 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2166,7 +2166,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt) MT_BUG_ON(mt, val != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 5); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); mas.index = 0; mas.last = 5; @@ -2917,6 +2917,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * exists MAS_NONE active range * exists active active range * DNE active active set to last range + * ERANGE active MAS_OVERFLOW last range * * Function ENTRY Start Result index & last * mas_prev() @@ -2945,6 +2946,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * any MAS_ROOT MAS_NONE 0 * exists active active range * DNE active active last range + * ERANGE active MAS_UNDERFLOW last range * * Function ENTRY Start Result index & last * mas_find() @@ -2955,7 +2957,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START MAS_NONE 0 * DNE MAS_PAUSE MAS_NONE 0 * DNE MAS_ROOT MAS_NONE 0 - * DNE MAS_NONE MAS_NONE 0 + * DNE MAS_NONE MAS_NONE 1 * if index == 0 * exists MAS_START MAS_ROOT 0 * exists MAS_PAUSE MAS_ROOT 0 @@ -2967,7 +2969,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to max * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to max - * exists MAS_NONE active range + * exists MAS_NONE active range (start at last) * exists active active range * DNE active active last range (max < last) * @@ -2992,7 +2994,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to min * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to min - * exists MAS_NONE active range + * exists MAS_NONE active range (start at index) * exists active active range * DNE active active last range (min > index) * @@ -3039,10 +3041,10 @@ static noinline void __init check_state_handling(struct maple_tree *mt) mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL); mas_lock(&mas); - /* prev: Start -> none */ + /* prev: Start -> underflow*/ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); /* prev: Start -> root */ mas_set(&mas, 10); @@ -3069,7 +3071,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.node != MAS_NONE); - /* next: start -> none */ + /* next: start -> none*/ mas_set(&mas, 10); entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, mas.index != 1); @@ -3268,25 +3270,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active beyond data */ entry = mas_next(&mas, 0x2999); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x2501); MT_BUG_ON(mt, mas.last != 0x2fff); MT_BUG_ON(mt, !mas_active(mas)); - /* Continue after out of range*/ + /* Continue after last range ends after max */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active continued */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, !mas_active(mas)); + + /* next:active -> overflow */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* next:overflow -> overflow */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* prev:overflow -> active */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != ptr3); + MT_BUG_ON(mt, mas.index != 0x3000); + MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); /* next: none -> active, skip value at location */ @@ -3307,11 +3330,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning end range */ + entry = mas_prev(&mas, 0x0100); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:active -> underflow */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* prev:underflow -> underflow */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* next:underflow -> active */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:first value -> underflow */ + entry = mas_prev(&mas, 0x1000); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* find:underflow -> first value */ + entry = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); /* prev: pause ->active */ @@ -3325,14 +3383,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning min */ entry = mas_prev(&mas, 0x1600); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* prev: active ->active, continue*/ + /* prev: active ->active, continue */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); @@ -3379,7 +3437,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* find: none ->active */ + /* find: overflow ->active */ entry = mas_find(&mas, 0x5000); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); @@ -3778,7 +3836,6 @@ static int __init maple_tree_seed(void) check_empty_area_fill(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_state_handling(&tree); mtree_destroy(&tree); diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index c4b455b5ee30..dcf1ca6b31cc 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -148,6 +148,8 @@ static void damon_do_test_apply_three_regions(struct kunit *test, KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); } + + damon_destroy_target(t); } /* diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 4c81a9dbd044..cf8a9fc5c9d1 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -341,13 +341,14 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, bool referenced = false; pte_t entry = huge_ptep_get(pte); struct folio *folio = pfn_folio(pte_pfn(entry)); + unsigned long psize = huge_page_size(hstate_vma(vma)); folio_get(folio); if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - set_huge_pte_at(mm, addr, pte, entry); + set_huge_pte_at(mm, addr, pte, entry, psize); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/filemap.c b/mm/filemap.c index 4ea4387053e8..f0a15ce1bd1b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3503,7 +3503,7 @@ skip: if (count) { set_pte_range(vmf, folio, page, count, addr); folio_ref_add(folio, count); - if (in_range(vmf->address, addr, count)) + if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; } @@ -3517,7 +3517,7 @@ skip: if (count) { set_pte_range(vmf, folio, page, count, addr); folio_ref_add(folio, count); - if (in_range(vmf->address, addr, count)) + if (in_range(vmf->address, addr, count * PAGE_SIZE)) ret = VM_FAULT_NOPAGE; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ba6d39b71cb1..52d26072dfda 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4980,7 +4980,7 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) static void hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, - struct folio *new_folio, pte_t old) + struct folio *new_folio, pte_t old, unsigned long sz) { pte_t newpte = make_huge_pte(vma, &new_folio->page, 1); @@ -4988,7 +4988,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add hugepage_add_new_anon_rmap(new_folio, vma, addr); if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(vma->vm_mm, addr, ptep, newpte); + set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); folio_set_hugetlb_migratable(new_folio); } @@ -5065,7 +5065,7 @@ again: } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = pte_swp_uffd_wp(entry); @@ -5080,18 +5080,18 @@ again: entry = swp_entry_to_pte(swp_entry); if (userfaultfd_wp(src_vma) && uffd_wp) entry = pte_swp_mkuffd_wp(entry); - set_huge_pte_at(src, addr, src_pte, entry); + set_huge_pte_at(src, addr, src_pte, entry, sz); } if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_pte_marker(entry))) { pte_marker marker = copy_pte_marker( pte_to_swp_entry(entry), dst_vma); if (marker) set_huge_pte_at(dst, addr, dst_pte, - make_pte_marker(marker)); + make_pte_marker(marker), sz); } else { entry = huge_ptep_get(src_pte); pte_folio = page_folio(pte_page(entry)); @@ -5145,7 +5145,7 @@ again: goto again; } hugetlb_install_folio(dst_vma, dst_pte, addr, - new_folio, src_pte_old); + new_folio, src_pte_old, sz); spin_unlock(src_ptl); spin_unlock(dst_ptl); continue; @@ -5166,7 +5166,7 @@ again: if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); hugetlb_count_add(npages, dst); } spin_unlock(src_ptl); @@ -5184,7 +5184,8 @@ again: } static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) + unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte, + unsigned long sz) { struct hstate *h = hstate_vma(vma); struct mm_struct *mm = vma->vm_mm; @@ -5202,7 +5203,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); - set_huge_pte_at(mm, new_addr, dst_pte, pte); + set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); if (src_ptl != dst_ptl) spin_unlock(src_ptl); @@ -5259,7 +5260,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, if (!dst_pte) break; - move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); + move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz); } if (shared_pmd) @@ -5337,7 +5338,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (pte_swp_uffd_wp_any(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); else huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); @@ -5371,7 +5373,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); @@ -5676,7 +5679,7 @@ retry_avoidcopy: hugepage_add_new_anon_rmap(new_folio, vma, haddr); if (huge_pte_uffd_wp(pte)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(mm, haddr, ptep, newpte); + set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h)); folio_set_hugetlb_migratable(new_folio); /* Make the old page be freed below */ new_folio = old_folio; @@ -5972,7 +5975,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, */ if (unlikely(pte_marker_uffd_wp(old_pte))) new_pte = huge_pte_mkuffd_wp(new_pte); - set_huge_pte_at(mm, haddr, ptep, new_pte); + set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { @@ -6261,7 +6264,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, } _dst_pte = make_pte_marker(PTE_MARKER_POISONED); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, + huge_page_size(h)); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); @@ -6412,7 +6416,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, if (wp_enabled) _dst_pte = huge_pte_mkuffd_wp(_dst_pte); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), dst_mm); @@ -6598,7 +6602,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, else if (uffd_wp_resolve) newpte = pte_swp_clear_uffd_wp(newpte); if (!pte_same(pte, newpte)) - set_huge_pte_at(mm, address, ptep, newpte); + set_huge_pte_at(mm, address, ptep, newpte, psize); } else if (unlikely(is_pte_marker(pte))) { /* No other markers apply for now. */ WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); @@ -6623,7 +6627,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (unlikely(uffd_wp)) /* Safe to modify directly (none->non-present). */ set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + psize); } spin_unlock(ptl); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d13dde2f8b56..5b009b233ab8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3867,6 +3867,13 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, case _MEMSWAP: ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; + case _KMEM: + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " + "Writing any value to this file has no effect. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); + ret = 0; + break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); break; @@ -5078,6 +5085,12 @@ static struct cftype mem_cgroup_legacy_files[] = { }, #endif { + .name = "kmem.limit_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, + { .name = "kmem.usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), .read_u64 = mem_cgroup_read_u64, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ unlock: * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out: diff --git a/mm/migrate.c b/mm/migrate.c index b7fa020003f3..2053b54556ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -243,7 +243,9 @@ static bool remove_migration_pte(struct folio *folio, #ifdef CONFIG_HUGETLB_PAGE if (folio_test_hugetlb(folio)) { - unsigned int shift = huge_page_shift(hstate_vma(vma)); + struct hstate *h = hstate_vma(vma); + unsigned int shift = huge_page_shift(h); + unsigned long psize = huge_page_size(h); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (folio_test_anon(folio)) @@ -251,7 +253,8 @@ static bool remove_migration_pte(struct folio *folio, rmap_flags); else page_dup_file_rmap(new, true); - set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); + set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte, + psize); } else #endif { diff --git a/mm/rmap.c b/mm/rmap.c index ec7f8e6c9e48..9f795b93cf40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1480,6 +1480,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1511,6 +1512,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -1628,7 +1632,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -1820,6 +1825,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1855,6 +1861,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -2020,7 +2029,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -2044,7 +2054,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (arch_unmap_one(mm, vma, address, pteval) < 0) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2058,7 +2069,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (anon_exclusive && page_try_share_anon_rmap(subpage)) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2090,7 +2102,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, swp_pte); + set_huge_pte_at(mm, address, pvmw.pte, swp_pte, + hsz); else set_pte_at(mm, address, pvmw.pte, swp_pte); trace_set_migration_pte(address, pte_val(swp_pte), diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef8599d394fd..a3fedb3ee0db 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -111,7 +111,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t entry = pfn_pte(pfn, prot); entry = arch_make_huge_pte(entry, ilog2(size), 0); - set_huge_pte_at(&init_mm, addr, pte, entry); + set_huge_pte_at(&init_mm, addr, pte, entry, size); pfn += PFN_DOWN(size); continue; } diff --git a/mm/zswap.c b/mm/zswap.c index 412b1409a0d7..083c693602b8 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1219,6 +1219,19 @@ bool zswap_store(struct folio *folio) return false; /* + * If this is a duplicate, it must be removed before attempting to store + * it, otherwise, if the store fails the old page won't be removed from + * the tree, and it might be written back overriding the new data. + */ + spin_lock(&tree->lock); + dupentry = zswap_rb_search(&tree->rbroot, offset); + if (dupentry) { + zswap_duplicate_entry++; + zswap_invalidate_entry(tree, dupentry); + } + spin_unlock(&tree->lock); + + /* * XXX: zswap reclaim does not work with cgroups yet. Without a * cgroup-aware entry LRU, we will push out entries system-wide based on * local cgroup limits. @@ -1333,7 +1346,14 @@ insert_entry: /* map */ spin_lock(&tree->lock); + /* + * A duplicate entry should have been removed at the beginning of this + * function. Since the swap entry should be pinned, if a duplicate is + * found again here it means that something went wrong in the swap + * cache. + */ while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) { + WARN_ON(1); zswap_duplicate_entry++; zswap_invalidate_entry(tree, dupentry); } diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index a5cb4b09a46c..0899019a7fcb 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then fi if [[ $cgroup2 ]]; then - cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path @@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb" >$cgroup_path/cgroup.subtree_control else - cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index bf2d2a684edf..14d26075c863 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -20,7 +20,7 @@ fi if [[ $cgroup2 ]]; then - CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT @@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control else - CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT |