9 files changed, 530 insertions, 0 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3ad2c4af099..295b5abee72 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -50,6 +50,10 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
 	bool
 	default y
 
+config IA64_UNCACHED_ALLOCATOR
+	bool
+	select GENERIC_ALLOCATOR
+
 choice
 	prompt "System type"
 	default IA64_GENERIC
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c1a02bbc252..4c73d8ba2e3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 
 # The gate DSO image is built using a special linker script.
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 4a3b1aac43e..179f230816e 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -410,6 +410,38 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 }
 
 /*
+ * Walk the EFI memory map to pull out leftover pages in the lower
+ * memory regions which do not end up in the regular memory map and
+ * stick them into the uncached allocator
+ *
+ * The regular walk function is significantly more complex than the
+ * uncached walk which means it really doesn't make sense to try and
+ * marge the two.
+ */
+void __init
+efi_memmap_walk_uc (efi_freemem_callback_t callback)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size, start, end;
+
+	efi_map_start = __va(ia64_boot_param->efi_memmap);
+	efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+	efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->attribute == EFI_MEMORY_UC) {
+			start = PAGE_ALIGN(md->phys_addr);
+			end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
+			if ((*callback)(start, end, NULL) < 0)
+				return;
+		}
+	}
+}
+
+
+/*
  * Look for the PAL_CODE region reported by EFI and maps it using an
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
  * Abstraction Layer chapter 11 in ADAG
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
new file mode 100644
index 00000000000..490dfc9ab47
--- /dev/null
+++ b/arch/ia64/kernel/uncached.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * A simple uncached page allocator using the generic allocator. This
+ * allocator first utilizes the spare (spill) pages found in the EFI
+ * memmap and will then start converting cached pages to uncached ones
+ * at a granule at a time. Node awareness is implemented by having a
+ * pool of pages per node.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/efi.h>
+#include <linux/genalloc.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/sn/arch.h>
+
+#define DEBUG	0
+
+#if DEBUG
+#define dprintk			printk
+#else
+#define dprintk(x...)		do { } while (0)
+#endif
+
+void __init efi_memmap_walk_uc (efi_freemem_callback_t callback);
+
+#define MAX_UNCACHED_GRANULES	5
+static int allocated_granules;
+
+struct gen_pool *uncached_pool[MAX_NUMNODES];
+
+
+static void uncached_ipi_visibility(void *data)
+{
+	int status;
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+	if ((status != PAL_VISIBILITY_OK) &&
+	    (status != PAL_VISIBILITY_OK_REMOTE_NEEDED))
+		printk(KERN_DEBUG "pal_prefetch_visibility() returns %i on "
+		       "CPU %i\n", status, get_cpu());
+}
+
+
+static void uncached_ipi_mc_drain(void *data)
+{
+	int status;
+	status = ia64_pal_mc_drain();
+	if (status)
+		printk(KERN_WARNING "ia64_pal_mc_drain() failed with %i on "
+		       "CPU %i\n", status, get_cpu());
+}
+
+
+static unsigned long
+uncached_get_new_chunk(struct gen_pool *poolp)
+{
+	struct page *page;
+	void *tmp;
+	int status, i;
+	unsigned long addr, node;
+
+	if (allocated_granules >= MAX_UNCACHED_GRANULES)
+		return 0;
+
+	node = poolp->private;
+	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO,
+				IA64_GRANULE_SHIFT-PAGE_SHIFT);
+
+	dprintk(KERN_INFO "get_new_chunk page %p, addr %lx\n",
+		page, (unsigned long)(page-vmem_map) << PAGE_SHIFT);
+
+	/*
+	 * Do magic if no mem on local node! XXX
+	 */
+	if (!page)
+		return 0;
+	tmp = page_address(page);
+
+	/*
+	 * There's a small race here where it's possible for someone to
+	 * access the page through /dev/mem halfway through the conversion
+	 * to uncached - not sure it's really worth bothering about
+	 */
+	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
+		SetPageUncached(&page[i]);
+
+	flush_tlb_kernel_range(tmp, tmp + IA64_GRANULE_SIZE);
+
+	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
+
+	dprintk(KERN_INFO "pal_prefetch_visibility() returns %i on cpu %i\n",
+		status, get_cpu());
+
+	if (!status) {
+		status = smp_call_function(uncached_ipi_visibility, NULL, 0, 1);
+		if (status)
+			printk(KERN_WARNING "smp_call_function failed for "
+			       "uncached_ipi_visibility! (%i)\n", status);
+	}
+
+	if (ia64_platform_is("sn2"))
+		sn_flush_all_caches((unsigned long)tmp, IA64_GRANULE_SIZE);
+	else
+		flush_icache_range((unsigned long)tmp,
+				   (unsigned long)tmp+IA64_GRANULE_SIZE);
+
+	ia64_pal_mc_drain();
+	status = smp_call_function(uncached_ipi_mc_drain, NULL, 0, 1);
+	if (status)
+		printk(KERN_WARNING "smp_call_function failed for "
+		       "uncached_ipi_mc_drain! (%i)\n", status);
+
+	addr = (unsigned long)tmp - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;
+
+	allocated_granules++;
+	return addr;
+}
+
+
+/*
+ * uncached_alloc_page
+ *
+ * Allocate 1 uncached page. Allocates on the requested node. If no
+ * uncached pages are available on the requested node, roundrobin starting
+ * with higher nodes.
+ */
+unsigned long
+uncached_alloc_page(int nid)
+{
+	unsigned long maddr;
+
+	maddr = gen_pool_alloc(uncached_pool[nid], PAGE_SIZE);
+
+	dprintk(KERN_DEBUG "uncached_alloc_page returns %lx on node %i\n",
+		maddr, nid);
+
+	/*
+	 * If no memory is availble on our local node, try the
+	 * remaining nodes in the system.
+	 */
+	if (!maddr) {
+		int i;
+
+		for (i = MAX_NUMNODES - 1; i >= 0; i--) {
+			if (i == nid || !node_online(i))
+				continue;
+			maddr = gen_pool_alloc(uncached_pool[i], PAGE_SIZE);
+			dprintk(KERN_DEBUG "uncached_alloc_page alternate search "
+				"returns %lx on node %i\n", maddr, i);
+			if (maddr) {
+				break;
+			}
+		}
+	}
+
+	return maddr;
+}
+EXPORT_SYMBOL(uncached_alloc_page);
+
+
+/*
+ * uncached_free_page
+ *
+ * Free a single uncached page.
+ */
+void
+uncached_free_page(unsigned long maddr)
+{
+	int node;
+
+	node = nasid_to_cnodeid(NASID_GET(maddr));
+
+	dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node);
+
+	if ((maddr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
+		panic("uncached_free_page invalid address %lx\n", maddr);
+
+	gen_pool_free(uncached_pool[node], maddr, PAGE_SIZE);
+}
+EXPORT_SYMBOL(uncached_free_page);
+
+
+/*
+ * uncached_build_memmap,
+ *
+ * Called at boot time to build a map of pages that can be used for
+ * memory special operations.
+ */
+static int __init
+uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
+{
+	long length;
+	unsigned long vstart, vend;
+	int node;
+
+	length = end - start;
+	vstart = start + __IA64_UNCACHED_OFFSET;
+	vend = end + __IA64_UNCACHED_OFFSET;
+
+	dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
+
+	memset((char *)vstart, 0, length);
+
+	node = nasid_to_cnodeid(NASID_GET(start));
+
+	for (; vstart < vend ; vstart += PAGE_SIZE) {
+		dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart);
+		gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE);
+	}
+
+	return 0;
+}
+
+
+static int __init uncached_init(void) {
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (!node_online(i))
+			continue;
+		uncached_pool[i] = gen_pool_create(0, IA64_GRANULE_SHIFT,
+						   &uncached_get_new_chunk, i);
+	}
+
+	efi_memmap_walk_uc(uncached_build_memmap);
+
+	return 0;
+}
+
+__initcall(uncached_init);
diff --git a/include/asm-ia64/uncached.h b/include/asm-ia64/uncached.h
new file mode 100644
index 00000000000..b82d923b73c
--- /dev/null
+++ b/include/asm-ia64/uncached.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2001-2005 Silicon Graphics, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * Prototypes for the uncached page allocator
+ */
+
+extern unsigned long uncached_alloc_page(int nid);
+extern void uncached_free_page(unsigned long);
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
new file mode 100644
index 00000000000..7fd0576a445
--- /dev/null
+++ b/include/linux/genalloc.h
@@ -0,0 +1,40 @@
+/*
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
+ *
+ * This code is based on the buddy allocator found in the sym53c8xx_2
+ * driver, adapted for general purpose use.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/spinlock.h>
+
+#define ALLOC_MIN_SHIFT		5 /* 32 bytes minimum */
+/*
+ *  Link between free memory chunks of a given size.
+ */
+struct gen_pool_link {
+	struct gen_pool_link *next;
+};
+
+/*
+ *  Memory pool descriptor.
+ */
+struct gen_pool {
+	spinlock_t lock;
+	unsigned long (*get_new_chunk)(struct gen_pool *);
+	struct gen_pool *next;
+	struct gen_pool_link *h;
+	unsigned long private;
+	int max_chunk_shift;
+};
+
+unsigned long gen_pool_alloc(struct gen_pool *poolp, int size);
+void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size);
+struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift,
+				 unsigned long (*fp)(struct gen_pool *),
+				 unsigned long data);
diff --git a/lib/Kconfig b/lib/Kconfig
index eeb45225248..2d4d4e3bc4a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -40,6 +40,12 @@ config ZLIB_DEFLATE
 	tristate
 
 #
+# Generic allocator support is selected if needed
+#
+config GENERIC_ALLOCATOR
+	boolean
+
+#
 # reed solomon support is select'ed if needed
 #
 config REED_SOLOMON
diff --git a/lib/Makefile b/lib/Makefile
index 5f10cb89840..dcb4231916e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
diff --git a/lib/genalloc.c b/lib/genalloc.c
new file mode 100644
index 00000000000..d6d30d2e716
--- /dev/null
+++ b/lib/genalloc.c
@@ -0,0 +1,188 @@
+/*
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
+ *
+ * This code is based on the buddy allocator found in the sym53c8xx_2
+ * driver Copyright (C) 1999-2001  Gerard Roudier <groudier@free.fr>,
+ * and adapted for general purpose use.
+ *
+ * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+
+#include <asm/page.h>
+
+
+struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift,
+				 unsigned long (*fp)(struct gen_pool *),
+				 unsigned long data)
+{
+	struct gen_pool *poolp;
+	unsigned long tmp;
+	int i;
+
+	/*
+	 * This is really an arbitrary limit, +10 is enough for
+	 * IA64_GRANULE_SHIFT, aka 16MB. If anyone needs a large limit
+	 * this can be increased without problems.
+	 */
+	if ((max_chunk_shift > (PAGE_SHIFT + 10)) ||
+	    ((max_chunk_shift < ALLOC_MIN_SHIFT) && max_chunk_shift))
+		return NULL;
+
+	if (!max_chunk_shift)
+		max_chunk_shift = PAGE_SHIFT;
+
+	poolp = kmalloc(sizeof(struct gen_pool), GFP_KERNEL);
+	if (!poolp)
+		return NULL;
+	memset(poolp, 0, sizeof(struct gen_pool));
+	poolp->h = kmalloc(sizeof(struct gen_pool_link) *
+			   (max_chunk_shift - ALLOC_MIN_SHIFT + 1),
+			   GFP_KERNEL);
+	if (!poolp->h) {
+		printk(KERN_WARNING "gen_pool_alloc() failed to allocate\n");
+		kfree(poolp);
+		return NULL;
+	}
+	memset(poolp->h, 0, sizeof(struct gen_pool_link) *
+	       (max_chunk_shift - ALLOC_MIN_SHIFT + 1));
+
+	spin_lock_init(&poolp->lock);
+	poolp->get_new_chunk = fp;
+	poolp->max_chunk_shift = max_chunk_shift;
+	poolp->private = data;
+
+	for (i = 0; i < nr_chunks; i++) {
+		tmp = poolp->get_new_chunk(poolp);
+		printk(KERN_INFO "allocated %lx\n", tmp);
+		if (!tmp)
+			break;
+		gen_pool_free(poolp, tmp, (1 << poolp->max_chunk_shift));
+	}
+
+	return poolp;
+}
+EXPORT_SYMBOL(gen_pool_create);
+
+
+/*
+ *  Simple power of two buddy-like generic allocator.
+ *  Provides naturally aligned memory chunks.
+ */
+unsigned long gen_pool_alloc(struct gen_pool *poolp, int size)
+{
+	int j, i, s, max_chunk_size;
+	unsigned long a, flags;
+	struct gen_pool_link *h = poolp->h;
+
+	max_chunk_size = 1 << poolp->max_chunk_shift;
+
+	if (size > max_chunk_size)
+		return 0;
+
+	i = 0;
+
+	size = max(size, 1 << ALLOC_MIN_SHIFT);
+	s = roundup_pow_of_two(size);
+
+	j = i;
+
+	spin_lock_irqsave(&poolp->lock, flags);
+	while (!h[j].next) {
+		if (s == max_chunk_size) {
+			struct gen_pool_link *ptr;
+			spin_unlock_irqrestore(&poolp->lock, flags);
+			ptr = (struct gen_pool_link *)poolp->get_new_chunk(poolp);
+			spin_lock_irqsave(&poolp->lock, flags);
+			h[j].next = ptr;
+			if (h[j].next)
+				h[j].next->next = NULL;
+			break;
+		}
+		j++;
+		s <<= 1;
+	}
+	a = (unsigned long) h[j].next;
+	if (a) {
+		h[j].next = h[j].next->next;
+		/*
+		 * This should be split into a seperate function doing
+		 * the chunk split in order to support custom
+		 * handling memory not physically accessible by host
+		 */
+		while (j > i) {
+			j -= 1;
+			s >>= 1;
+			h[j].next = (struct gen_pool_link *) (a + s);
+			h[j].next->next = NULL;
+		}
+	}
+	spin_unlock_irqrestore(&poolp->lock, flags);
+	return a;
+}
+EXPORT_SYMBOL(gen_pool_alloc);
+
+
+/*
+ *  Counter-part of the generic allocator.
+ */
+void gen_pool_free(struct gen_pool *poolp, unsigned long ptr, int size)
+{
+	struct gen_pool_link *q;
+	struct gen_pool_link *h = poolp->h;
+	unsigned long a, b, flags;
+	int i, s, max_chunk_size;
+
+	max_chunk_size = 1 << poolp->max_chunk_shift;
+
+	if (size > max_chunk_size)
+		return;
+
+	i = 0;
+
+	size = max(size, 1 << ALLOC_MIN_SHIFT);
+	s = roundup_pow_of_two(size);
+
+	a = ptr;
+
+	spin_lock_irqsave(&poolp->lock, flags);
+	while (1) {
+		if (s == max_chunk_size) {
+			((struct gen_pool_link *)a)->next = h[i].next;
+			h[i].next = (struct gen_pool_link *)a;
+			break;
+		}
+		b = a ^ s;
+		q = &h[i];
+
+		while (q->next && q->next != (struct gen_pool_link *)b)
+			q = q->next;
+
+		if (!q->next) {
+			((struct gen_pool_link *)a)->next = h[i].next;
+			h[i].next = (struct gen_pool_link *)a;
+			break;
+		}
+		q->next = q->next->next;
+		a = a & b;
+		s <<= 1;
+		i++;
+	}
+	spin_unlock_irqrestore(&poolp->lock, flags);
+}
+EXPORT_SYMBOL(gen_pool_free);