From 25e4df5bae333a06cd2c9b88baf14432652dc9f7 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 30 Sep 2006 23:29:34 -0700 Subject: [PATCH] paravirt: combine flush accessed dirty.patch Remove ptep_test_and_clear_{dirty|young} from i386, and instead use the dominating functions, ptep_clear_flush_{dirty|young}. This allows the TLB page flush to be contained in the same macro, and allows for an eager optimization - if reading the PTE initially returned dirty/accessed, we can assume the fact that no subsequent update to the PTE which cleared accessed / dirty has occurred, as the only way A/D bits can change without holding the page table lock is if a remote processor clears them. This eliminates an extra branch which came from the generic version of the code, as we know that no other CPU could have cleared the A/D bit, so the flush will always be needed. We still export these two defines, even though we do not actually define the macros in the i386 code: #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY The reason for this is that the only use of these functions is within the generic clear_flush functions, and we want a strong guarantee that there are no other users of these functions, so we want to prevent the generic code from defining them for us. Signed-off-by: Zachary Amsden Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/pgtable.h | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 541b3e23433..94c87ff4b5a 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -262,21 +262,36 @@ do { \ } \ } while (0) +/* + * We don't actually have these, but we want to advertise them so that + * we can encompass the flush here. + */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); -} - #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); -} + +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH +#define ptep_clear_flush_dirty(vma, address, ptep) \ +({ \ + int __dirty; \ + __dirty = pte_dirty(*(ptep)); \ + if (__dirty) { \ + clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low); \ + flush_tlb_page(vma, address); \ + } \ + __dirty; \ +}) + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define ptep_clear_flush_young(vma, address, ptep) \ +({ \ + int __young; \ + __young = pte_young(*(ptep)); \ + if (__young) { \ + clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low); \ + flush_tlb_page(vma, address); \ + } \ + __young; \ +}) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) -- cgit v1.2.3 From 23002d88be309a7c78db69363c9d933a29a3b0bb Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 30 Sep 2006 23:29:35 -0700 Subject: [PATCH] paravirt: kpte flush Create a new PTE function which combines clearing a kernel PTE with the subsequent flush. This allows the two to be easily combined into a single hypercall or paravirt-op. More subtly, reverse the order of the flush for kmap_atomic. Instead of flushing on establishing a mapping, flush on clearing a mapping. This eliminates the possibility of leaving stale kmap entries which may still have valid TLB mappings. This is required for direct mode hypervisors, which need to reprotect all mappings of a given page when changing the page type from a normal page to a protected page (such as a page table or descriptor table page). But it also provides some nicer semantics for real hardware, by providing extra debug-proofing against using stale mappings, as well as ensuring that no stale mappings exist when changing the cacheability attributes of a page, which could lead to cache conflicts when two different types of mappings exist for the same page. Signed-off-by: Zachary Amsden Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/pgtable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 94c87ff4b5a..ee9696d2f67 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -441,6 +441,13 @@ extern pte_t *lookup_address(unsigned long address); #define pte_unmap_nested(pte) do { } while (0) #endif +/* Clear a kernel PTE and flush it from the TLB */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, vaddr, ptep); \ + __flush_tlb_one(vaddr); \ +} while (0) + /* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. -- cgit v1.2.3 From d6d861e3c963b4077c83e078e3e300c4b81f93e7 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 30 Sep 2006 23:29:36 -0700 Subject: [PATCH] paravirt: optimize ptep establish for pae The ptep_establish macro is only used on user-level PTEs, for P->P mapping changes. Since these always happen under protection of the pagetable lock, the strong synchronization of a 64-bit cmpxchg is not needed, in fact, not even a lock prefix needs to be used. We can simply instead clear the P-bit, followed by a normal set. The write ordering is still important to avoid the possibility of the TLB snooping a partially written PTE and getting a bad mapping installed. Signed-off-by: Zachary Amsden Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/pgtable.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index ee9696d2f67..8cb708a6bed 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -269,6 +269,17 @@ do { \ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +/* + * Rules for using ptep_establish: the pte MUST be a user pte, and + * must be a present->present transition. + */ +#define __HAVE_ARCH_PTEP_ESTABLISH +#define ptep_establish(vma, address, ptep, pteval) \ +do { \ + set_pte_present((vma)->vm_mm, address, ptep, pteval); \ + flush_tlb_page(vma, address); \ +} while (0) + #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH #define ptep_clear_flush_dirty(vma, address, ptep) \ ({ \ -- cgit v1.2.3 From 789e6ac0a7cbbb38402293256a295302fd8a1100 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 30 Sep 2006 23:29:38 -0700 Subject: [PATCH] paravirt: update pte hook Add a pte_update_hook which notifies about pte changes that have been made without using the set_pte / clear_pte interfaces. This allows shadow mode hypervisors which do not trap on page table access to maintain synchronized shadows. It also turns out, there was one pte update in PAE mode that wasn't using any accessor interface at all for setting NX protection. Considering it is PAE specific, and the accessor is i386 specific, I didn't want to add a generic encapsulation of this behavior yet. Signed-off-by: Zachary Amsden Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/pgtable.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/asm-i386/pgtable.h') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 8cb708a6bed..7d398f493dd 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -246,6 +246,23 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p # include #endif +/* + * Rules for using pte_update - it must be called after any PTE update which + * has not been done using the set_pte / clear_pte interfaces. It is used by + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE + * updates should either be sets, clears, or set_pte_atomic for P->P + * transitions, which means this hook should only be called for user PTEs. + * This hook implies a P->P protection or access change has taken place, which + * requires a subsequent TLB flush. The notification can optionally be delayed + * until the TLB flush event by using the pte_update_defer form of the + * interface, but care must be taken to assure that the flush happens while + * still holding the same page table lock so that the shadow and primary pages + * do not become out of sync on SMP. + */ +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) + + /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware @@ -258,6 +275,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p do { \ if (dirty) { \ (ptep)->pte_low = (entry).pte_low; \ + pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ flush_tlb_page(vma, address); \ } \ } while (0) @@ -287,6 +305,7 @@ do { \ __dirty = pte_dirty(*(ptep)); \ if (__dirty) { \ clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low); \ + pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ flush_tlb_page(vma, address); \ } \ __dirty; \ @@ -299,6 +318,7 @@ do { \ __young = pte_young(*(ptep)); \ if (__young) { \ clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low); \ + pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ flush_tlb_page(vma, address); \ } \ __young; \ @@ -321,6 +341,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, &ptep->pte_low); + pte_update(mm, addr, ptep); } /* -- cgit v1.2.3