summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmu_notifier.h47
-rw-r--r--mm/filemap_xip.c4
-rw-r--r--mm/huge_memory.c42
-rw-r--r--mm/hugetlb.c21
-rw-r--r--mm/memory.c28
-rw-r--r--mm/mremap.c8
-rw-r--r--mm/rmap.c18
7 files changed, 92 insertions, 76 deletions
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 4b7183e9806..bc823c4c028 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -246,50 +246,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__mmu_notifier_mm_destroy(mm);
}
-/*
- * These two macros will sometime replace ptep_clear_flush.
- * ptep_clear_flush is implemented as macro itself, so this also is
- * implemented as a macro until ptep_clear_flush will converted to an
- * inline function, to diminish the risk of compilation failure. The
- * invalidate_page method over time can be moved outside the PT lock
- * and these two macros can be later removed.
- */
-#define ptep_clear_flush_notify(__vma, __address, __ptep) \
-({ \
- pte_t __pte; \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- __pte = ptep_clear_flush(___vma, ___address, __ptep); \
- mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
- __pte; \
-})
-
-#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \
-({ \
- pmd_t __pmd; \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
- mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE);\
- __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \
- mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE); \
- __pmd; \
-})
-
-#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \
-({ \
- struct vm_area_struct *___vma = __vma; \
- unsigned long ___address = __address; \
- VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
- mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE);\
- pmdp_splitting_flush(___vma, ___address, __pmdp); \
- mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
- (__address)+HPAGE_PMD_SIZE); \
-})
-
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
({ \
int __young; \
@@ -380,9 +336,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
-#define ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_clear_flush_notify pmdp_clear_flush
-#define pmdp_splitting_flush_notify pmdp_splitting_flush
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index a52daee11d3..a912da6ddfd 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -192,11 +192,13 @@ retry:
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush_notify(vma, address, pte);
+ pteval = ptep_clear_flush(vma, address, pte);
page_remove_rmap(page);
dec_mm_counter(mm, MM_FILEPAGES);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
+ /* must invalidate_page _before_ freeing the page */
+ mmu_notifier_invalidate_page(mm, address);
page_cache_release(page);
}
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0e7740923fb..08a943b9cf9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -787,6 +787,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
pmd_t _pmd;
int ret = 0, i;
struct page **pages;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR,
GFP_KERNEL);
@@ -823,12 +825,16 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
cond_resched();
}
+ mmun_start = haddr;
+ mmun_end = haddr + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto out_free_pages;
VM_BUG_ON(!PageHead(page));
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_clear_flush(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm);
@@ -851,6 +857,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
page_remove_rmap(page);
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
ret |= VM_FAULT_WRITE;
put_page(page);
@@ -859,6 +867,7 @@ out:
out_free_pages:
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) {
mem_cgroup_uncharge_page(pages[i]);
@@ -875,6 +884,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
int ret = 0;
struct page *page, *new_page;
unsigned long haddr;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
VM_BUG_ON(!vma->anon_vma);
spin_lock(&mm->page_table_lock);
@@ -925,20 +936,24 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
__SetPageUptodate(new_page);
+ mmun_start = haddr;
+ mmun_end = haddr + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
spin_lock(&mm->page_table_lock);
put_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(&mm->page_table_lock);
mem_cgroup_uncharge_page(new_page);
put_page(new_page);
- goto out;
+ goto out_mn;
} else {
pmd_t entry;
VM_BUG_ON(!PageHead(page));
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry);
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache(vma, address, pmd);
@@ -946,10 +961,14 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
put_page(page);
ret |= VM_FAULT_WRITE;
}
-out_unlock:
spin_unlock(&mm->page_table_lock);
+out_mn:
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
return ret;
+out_unlock:
+ spin_unlock(&mm->page_table_lock);
+ return ret;
}
struct page *follow_trans_huge_pmd(struct mm_struct *mm,
@@ -1162,7 +1181,11 @@ static int __split_huge_page_splitting(struct page *page,
struct mm_struct *mm = vma->vm_mm;
pmd_t *pmd;
int ret = 0;
+ /* For mmu_notifiers */
+ const unsigned long mmun_start = address;
+ const unsigned long mmun_end = address + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock);
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG);
@@ -1174,10 +1197,11 @@ static int __split_huge_page_splitting(struct page *page,
* and it won't wait on the anon_vma->root->mutex to
* serialize against split_huge_page*.
*/
- pmdp_splitting_flush_notify(vma, address, pmd);
+ pmdp_splitting_flush(vma, address, pmd);
ret = 1;
}
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
return ret;
}
@@ -1898,6 +1922,8 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *ptl;
int isolated;
unsigned long hstart, hend;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
@@ -1952,6 +1978,9 @@ static void collapse_huge_page(struct mm_struct *mm,
pte = pte_offset_map(pmd, address);
ptl = pte_lockptr(mm, pmd);
+ mmun_start = address;
+ mmun_end = address + HPAGE_PMD_SIZE;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); /* probably unnecessary */
/*
* After this gup_fast can't run anymore. This also removes
@@ -1959,8 +1988,9 @@ static void collapse_huge_page(struct mm_struct *mm,
* huge and small TLB entries for the same virtual address
* to avoid the risk of CPU bugs in that area.
*/
- _pmd = pmdp_clear_flush_notify(vma, address, pmd);
+ _pmd = pmdp_clear_flush(vma, address, pmd);
spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
spin_lock(ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index de5d1dcf34f..993f7c1820a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2355,13 +2355,15 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
+ const unsigned long mmun_start = start; /* For mmu_notifiers */
+ const unsigned long mmun_end = end; /* For mmu_notifiers */
WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
BUG_ON(end & ~huge_page_mask(h));
tlb_start_vma(tlb, vma);
- mmu_notifier_invalidate_range_start(mm, start, end);
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
again:
spin_lock(&mm->page_table_lock);
for (address = start; address < end; address += sz) {
@@ -2425,7 +2427,7 @@ again:
if (address < end && !ref_page)
goto again;
}
- mmu_notifier_invalidate_range_end(mm, start, end);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
tlb_end_vma(tlb, vma);
}
@@ -2525,6 +2527,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *old_page, *new_page;
int avoidcopy;
int outside_reserve = 0;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
old_page = pte_page(pte);
@@ -2611,6 +2615,9 @@ retry_avoidcopy:
pages_per_huge_page(h));
__SetPageUptodate(new_page);
+ mmun_start = address & huge_page_mask(h);
+ mmun_end = mmun_start + huge_page_size(h);
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
/*
* Retake the page_table_lock to check for racing updates
* before the page tables are altered
@@ -2619,9 +2626,6 @@ retry_avoidcopy:
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
if (likely(pte_same(huge_ptep_get(ptep), pte))) {
/* Break COW */
- mmu_notifier_invalidate_range_start(mm,
- address & huge_page_mask(h),
- (address & huge_page_mask(h)) + huge_page_size(h));
huge_ptep_clear_flush(vma, address, ptep);
set_huge_pte_at(mm, address, ptep,
make_huge_pte(vma, new_page, 1));
@@ -2629,10 +2633,11 @@ retry_avoidcopy:
hugepage_add_new_anon_rmap(new_page, vma, address);
/* Make the old page be freed below */
new_page = old_page;
- mmu_notifier_invalidate_range_end(mm,
- address & huge_page_mask(h),
- (address & huge_page_mask(h)) + huge_page_size(h));
}
+ spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ /* Caller expects lock to be held */
+ spin_lock(&mm->page_table_lock);
page_cache_release(new_page);
page_cache_release(old_page);
return 0;
diff --git a/mm/memory.c b/mm/memory.c
index 5f5d1f039bf..b03a4a21c1d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -712,7 +712,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
add_taint(TAINT_BAD_PAGE);
}
-static inline int is_cow_mapping(vm_flags_t flags)
+static inline bool is_cow_mapping(vm_flags_t flags)
{
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
@@ -1039,6 +1039,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
unsigned long next;
unsigned long addr = vma->vm_start;
unsigned long end = vma->vm_end;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
+ bool is_cow;
int ret;
/*
@@ -1072,8 +1075,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* parent mm. And a permission downgrade will only happen if
* is_cow_mapping() returns true.
*/
- if (is_cow_mapping(vma->vm_flags))
- mmu_notifier_invalidate_range_start(src_mm, addr, end);
+ is_cow = is_cow_mapping(vma->vm_flags);
+ mmun_start = addr;
+ mmun_end = end;
+ if (is_cow)
+ mmu_notifier_invalidate_range_start(src_mm, mmun_start,
+ mmun_end);
ret = 0;
dst_pgd = pgd_offset(dst_mm, addr);
@@ -1089,9 +1096,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
}
} while (dst_pgd++, src_pgd++, addr = next, addr != end);
- if (is_cow_mapping(vma->vm_flags))
- mmu_notifier_invalidate_range_end(src_mm,
- vma->vm_start, end);
+ if (is_cow)
+ mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end);
return ret;
}
@@ -2516,7 +2522,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl, pte_t orig_pte)
__releases(ptl)
{
- struct page *old_page, *new_page;
+ struct page *old_page, *new_page = NULL;
pte_t entry;
int ret = 0;
int page_mkwrite = 0;
@@ -2760,10 +2766,14 @@ gotten:
} else
mem_cgroup_uncharge_page(new_page);
- if (new_page)
- page_cache_release(new_page);
unlock:
pte_unmap_unlock(page_table, ptl);
+ if (new_page) {
+ if (new_page == old_page)
+ /* cow happened, notify before releasing old_page */
+ mmu_notifier_invalidate_page(mm, address);
+ page_cache_release(new_page);
+ }
if (old_page) {
/*
* Don't let another task, with possibly unlocked vma,
diff --git a/mm/mremap.c b/mm/mremap.c
index 3b639a4b26b..1b61c2d3307 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -149,11 +149,15 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
bool need_flush = false;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
- mmu_notifier_invalidate_range_start(vma->vm_mm, old_addr, old_end);
+ mmun_start = old_addr;
+ mmun_end = old_end;
+ mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
cond_resched();
@@ -197,7 +201,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (likely(need_flush))
flush_tlb_range(vma, old_end-len, old_addr);
- mmu_notifier_invalidate_range_end(vma->vm_mm, old_end-len, old_end);
+ mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
return len + old_addr - old_end; /* how much done */
}
diff --git a/mm/rmap.c b/mm/rmap.c
index bf03149f495..7df7984d476 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -884,7 +884,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
pte_t entry;
flush_cache_page(vma, address, pte_pfn(*pte));
- entry = ptep_clear_flush_notify(vma, address, pte);
+ entry = ptep_clear_flush(vma, address, pte);
entry = pte_wrprotect(entry);
entry = pte_mkclean(entry);
set_pte_at(mm, address, pte, entry);
@@ -892,6 +892,9 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
}
pte_unmap_unlock(pte, ptl);
+
+ if (ret)
+ mmu_notifier_invalidate_page(mm, address);
out:
return ret;
}
@@ -1212,7 +1215,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
- pteval = ptep_clear_flush_notify(vma, address, pte);
+ pteval = ptep_clear_flush(vma, address, pte);
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))
@@ -1274,6 +1277,8 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
out_unmap:
pte_unmap_unlock(pte, ptl);
+ if (ret != SWAP_FAIL)
+ mmu_notifier_invalidate_page(mm, address);
out:
return ret;
@@ -1338,6 +1343,8 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
spinlock_t *ptl;
struct page *page;
unsigned long address;
+ unsigned long mmun_start; /* For mmu_notifiers */
+ unsigned long mmun_end; /* For mmu_notifiers */
unsigned long end;
int ret = SWAP_AGAIN;
int locked_vma = 0;
@@ -1361,6 +1368,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
if (!pmd_present(*pmd))
return ret;
+ mmun_start = address;
+ mmun_end = end;
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
/*
* If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
* keep the sem while scanning the cluster for mlocking pages.
@@ -1394,7 +1405,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush_notify(vma, address, pte);
+ pteval = ptep_clear_flush(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address))
@@ -1410,6 +1421,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
(*mapcount)--;
}
pte_unmap_unlock(pte - 1, ptl);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
if (locked_vma)
up_read(&vma->vm_mm->mmap_sem);
return ret;