arch/tile: support 4KB page size as well as 64KB

The Tilera architecture traditionally supports 64KB page sizes to improve TLB utilization and improve performance when the hardware is being used primarily to run a single application. For more generic server scenarios, it can be beneficial to run with 4KB page sizes, so this commit allows that to be specified (by modifying the arch/tile/include/hv/pagesize.h header). As part of this change, we also re-worked the PTE management slightly so that PTE writes all go through a __set_pte() function where we can do some additional validation. The set_pte_order() function was eliminated since the "order" argument wasn't being used. One bug uncovered was in the PCI DMA code, which wasn't properly flushing the specified range. This was benign with 64KB pages, but with 4KB pages we were getting some larger flushes wrong. The per-cpu memory reservation code also needed updating to conform with the newer percpu stuff; before it always chose 64KB, and that was always correct, but with 4KB granularity we now have to pay closer attention and reserve the amount of memory that will be requested when the percpu code starts allocating. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
author: Chris Metcalf <cmetcalf@tilera.com> 2011-02-28 16:37:34 -0500
committer: Chris Metcalf <cmetcalf@tilera.com> 2011-03-10 13:17:53 -0500
commit: 76c567fbba50c3da2f4d40e2e551bab26cfd4381 (patch)
tree: 6e3c92a266d0ec255e1930adf5ba5268cd71dee9 /arch/tile/include
parent: 09c17eab075ceeafb53935d858c575b6776394d1 (diff)
download: linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.tar.gz
linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.tar.bz2
linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.zip
7 files changed, 39 insertions, 47 deletions
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index 0521c277bbd..d396d180516 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -54,7 +54,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 				   pte_t *ptep, pte_t pte)
 {
-	set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER);
+	set_pte(ptep, pte);
 }
 
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 7979a45430d..3eb53525bf9 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -16,10 +16,11 @@
 #define _ASM_TILE_PAGE_H
 
 #include <linux/const.h>
+#include <hv/pagesize.h>
 
 /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#define PAGE_SHIFT	16
-#define HPAGE_SHIFT	24
+#define PAGE_SHIFT	HV_LOG2_PAGE_SIZE_SMALL
+#define HPAGE_SHIFT	HV_LOG2_PAGE_SIZE_LARGE
 
 #define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
 #define HPAGE_SIZE	(_AC(1, UL) << HPAGE_SHIFT)
@@ -29,25 +30,18 @@
 
 #ifdef __KERNEL__
 
-#include <hv/hypervisor.h>
-#include <arch/chip.h>
-
 /*
- * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx
- * definitions in <hv/hypervisor.h>.  We validate this at build time
- * here, and again at runtime during early boot.  We provide a
- * separate definition since userspace doesn't have <hv/hypervisor.h>.
- *
- * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since
- * they are the same on i386 but not TILE.
+ * If the Kconfig doesn't specify, set a maximum zone order that
+ * is enough so that we can create huge pages from small pages given
+ * the respective sizes of the two page types.  See <linux/mmzone.h>.
  */
-#if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT
-# error Small page size mismatch in Linux
-#endif
-#if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT
-# error Huge page size mismatch in Linux
+#ifndef CONFIG_FORCE_MAX_ZONEORDER
+#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1)
 #endif
 
+#include <hv/hypervisor.h>
+#include <arch/chip.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
@@ -81,12 +75,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
  * Hypervisor page tables are made of the same basic structure.
  */
 
-typedef __u64 pteval_t;
-typedef __u64 pmdval_t;
-typedef __u64 pudval_t;
-typedef __u64 pgdval_t;
-typedef __u64 pgprotval_t;
-
 typedef HV_PTE pte_t;
 typedef HV_PTE pgd_t;
 typedef HV_PTE pgprot_t;
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index cf52791a550..e919c0bdc22 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -41,9 +41,9 @@
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_64BIT
-	set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER);
+	set_pte(pmdp, pmd);
 #else
-	set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER);
+	set_pte(&pmdp->pud.pgd, pmd.pud.pgd);
 #endif
 }
 
@@ -100,6 +100,9 @@ pte_t *get_prealloc_pte(unsigned long pfn);
 /* During init, we can shatter kernel huge pages if needed. */
 void shatter_pmd(pmd_t *pmd);
 
+/* After init, a more complex technique is required. */
+void shatter_huge_page(unsigned long addr);
+
 #ifdef __tilegx__
 /* We share a single page allocator for both L1 and L2 page tables. */
 #if HV_L1_SIZE != HV_L2_SIZE
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index a6604e9485d..1a20b7ef8ea 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -233,15 +233,23 @@ static inline void __pte_clear(pte_t *ptep)
 #define pgd_ERROR(e) \
 	pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
 
+/* Return PA and protection info for a given kernel VA. */
+int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
+
+/*
+ * __set_pte() ensures we write the 64-bit PTE with 32-bit words in
+ * the right order on 32-bit platforms and also allows us to write
+ * hooks to check valid PTEs, etc., if we want.
+ */
+void __set_pte(pte_t *ptep, pte_t pte);
+
 /*
- * set_pte_order() sets the given PTE and also sanity-checks the
+ * set_pte() sets the given PTE and also sanity-checks the
  * requested PTE against the page homecaching.  Unspecified parts
  * of the PTE are filled in when it is written to memory, i.e. all
  * caching attributes if "!forcecache", or the home cpu if "anyhome".
  */
-extern void set_pte_order(pte_t *ptep, pte_t pte, int order);
-
-#define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0)
+extern void set_pte(pte_t *ptep, pte_t pte);
 #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
 
@@ -293,21 +301,6 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
 #define __swp_entry_to_pte(swp)	((pte_t) { (((long long) ((swp).val)) << 32) })
 
 /*
- * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
- *
- *  dst - pointer to pgd range anwhere on a pgd page
- *  src - ""
- *  count - the number of pgds to copy.
- *
- * dst and src can be on the same page, but the range must not overlap,
- * and must not cross a page boundary.
- */
-static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
-{
-       memcpy(dst, src, count * sizeof(pgd_t));
-}
-
-/*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 53ec3488474..9f98529761f 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -24,6 +24,7 @@
 #define PGDIR_SIZE	HV_PAGE_SIZE_LARGE
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+#define SIZEOF_PGD	(PTRS_PER_PGD * sizeof(pgd_t))
 
 /*
  * The level-2 index is defined by the difference between the huge
@@ -33,6 +34,7 @@
  * this nomenclature is somewhat confusing.
  */
 #define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define SIZEOF_PTE	(PTRS_PER_PTE * sizeof(pte_t))
 
 #ifndef __ASSEMBLY__
 
@@ -94,7 +96,6 @@ static inline int pgd_addr_invalid(unsigned long addr)
  */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 
 extern int ptep_test_and_clear_young(struct vm_area_struct *,
 				     unsigned long addr, pte_t *);
@@ -110,6 +111,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	return pte;
 }
 
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
+}
+
 /* Create a pmd from a PTFN. */
 static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
 {
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index f908473c322..4d97a2db932 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -18,13 +18,14 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <asm/backtrace.h>
+#include <asm/page.h>
 #include <hv/hypervisor.h>
 
 /* Everything we need to keep track of a backtrace iteration */
 struct KBacktraceIterator {
 	BacktraceIterator it;
 	struct task_struct *task;     /* task we are backtracing */
-	HV_PTE *pgtable;	      /* page table for user space access */
+	pte_t *pgtable;		      /* page table for user space access */
 	int end;		      /* iteration complete. */
 	int new_context;              /* new context is starting */
 	int profile;                  /* profiling, so stop on async intrpt */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 3872f2b345d..9e8e9c4dfa2 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -68,6 +68,7 @@ struct thread_info {
 #else
 #define THREAD_SIZE_ORDER (0)
 #endif
+#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER)
 
 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
author	Chris Metcalf <cmetcalf@tilera.com>	2011-02-28 16:37:34 -0500
committer	Chris Metcalf <cmetcalf@tilera.com>	2011-03-10 13:17:53 -0500
commit	76c567fbba50c3da2f4d40e2e551bab26cfd4381 (patch)
tree	6e3c92a266d0ec255e1930adf5ba5268cd71dee9 /arch/tile/include
parent	09c17eab075ceeafb53935d858c575b6776394d1 (diff)
download	linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.tar.gz linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.tar.bz2 linux-3.10-76c567fbba50c3da2f4d40e2e551bab26cfd4381.zip