889 files changed, 27463 insertions, 24021 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937bf5a7..b1834166922 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,14 @@
 #define __NR_fanotify_init		494
 #define __NR_fanotify_mark		495
 #define __NR_prlimit64			496
+#define __NR_name_to_handle_at		497
+#define __NR_open_by_handle_at		498
+#define __NR_clock_adjtime		499
+#define __NR_syncfs			500
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			497
+#define NR_SYSCALLS			501
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 42aa078a5e4..5a621c6d22a 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
 		switch (which) {
 		case IPI_RESCHEDULE:
-			/* Reschedule callback.  Everything to be done
-			   is done by the interrupt return path.  */
+			scheduler_ipi();
 			break;
 
 		case IPI_CALL_FUNC:
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9db16..15f999d41c7 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -498,23 +498,27 @@ sys_call_table:
 	.quad sys_ni_syscall			/* sys_timerfd */
 	.quad sys_eventfd
 	.quad sys_recvmmsg
-	.quad sys_fallocate				/* 480 */
+	.quad sys_fallocate			/* 480 */
 	.quad sys_timerfd_create
 	.quad sys_timerfd_settime
 	.quad sys_timerfd_gettime
 	.quad sys_signalfd4
-	.quad sys_eventfd2				/* 485 */
+	.quad sys_eventfd2			/* 485 */
 	.quad sys_epoll_create1
 	.quad sys_dup3
 	.quad sys_pipe2
 	.quad sys_inotify_init1
-	.quad sys_preadv				/* 490 */
+	.quad sys_preadv			/* 490 */
 	.quad sys_pwritev
 	.quad sys_rt_tgsigqueueinfo
 	.quad sys_perf_event_open
 	.quad sys_fanotify_init
-	.quad sys_fanotify_mark				/* 495 */
+	.quad sys_fanotify_mark			/* 495 */
 	.quad sys_prlimit64
+	.quad sys_name_to_handle_at
+	.quad sys_open_by_handle_at
+	.quad sys_clock_adjtime
+	.quad sys_syncfs			/* 500 */
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 918e8e0b72f..818e74ed45d 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = {
 
 static inline void register_rpcc_clocksource(long cycle_freq)
 {
-	clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4);
-	clocksource_register(&clocksource_rpcc);
+	clocksource_register_hz(&clocksource_rpcc, cycle_freq);
 }
 #else /* !CONFIG_SMP */
 static inline void register_rpcc_clocksource(long cycle_freq)
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 433be2a24f3..3d890a98a08 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
 	__init_end = .;
 	/* Freed after init ends here */
 
+	_sdata = .;	/* Start of rw data section */
 	_data = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 8ebbb511c78..0c6852d9350 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -74,7 +74,7 @@ ZTEXTADDR	:= $(CONFIG_ZBOOT_ROM_TEXT)
 ZBSSADDR	:= $(CONFIG_ZBOOT_ROM_BSS)
 else
 ZTEXTADDR	:= 0
-ZBSSADDR	:= ALIGN(4)
+ZBSSADDR	:= ALIGN(8)
 endif
 
 SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index adf583cd0c3..49f5b2eaaa8 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -179,15 +179,14 @@ not_angel:
 		bl	cache_on
 
 restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
-		ldr	sp, [r0, #32]
+		ldmia	r0, {r1, r2, r3, r6, r9, r11, r12}
+		ldr	sp, [r0, #28]
 
 		/*
 		 * We might be running at a different address.  We need
 		 * to fix up various pointers.
 		 */
 		sub	r0, r0, r1		@ calculate the delta offset
-		add	r5, r5, r0		@ _start
 		add	r6, r6, r0		@ _edata
 
 #ifndef CONFIG_ZBOOT_ROM
@@ -206,31 +205,40 @@ restart:	adr	r0, LC0
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
- *   r5  = start of this image
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
- *   r4 >= r10 -> OK
- *   r4 + image length <= r5 -> OK
+ *   r4 - 16k page directory >= r10 -> OK
+ *   r4 + image length <= current position (pc) -> OK
  */
+		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-		cmp	r10, r5
+   ARM(		cmp	r10, pc		)
+ THUMB(		mov	lr, pc		)
+ THUMB(		cmp	r10, lr		)
 		bls	wont_overwrite
 
 /*
  * Relocate ourselves past the end of the decompressed kernel.
- *   r5  = start of this image
  *   r6  = _edata
  *   r10 = end of the decompressed kernel
  * Because we always copy ahead, we need to do it from the end and go
  * backward in case the source and destination overlap.
  */
-		/* Round up to next 256-byte boundary. */
-		add	r10, r10, #256
+		/*
+		 * Bump to the next 256-byte boundary with the size of
+		 * the relocation code added. This avoids overwriting
+		 * ourself when the offset is small.
+		 */
+		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 		bic	r10, r10, #255
 
+		/* Get start of code we want to copy and align it down. */
+		adr	r5, restart
+		bic	r5, r5, #31
+
 		sub	r9, r6, r5		@ size to copy
 		add	r9, r9, #31		@ rounded up to a multiple
 		bic	r9, r9, #31		@ ... of 32 bytes
@@ -245,6 +253,11 @@ restart:	adr	r0, LC0
 		/* Preserve offset to relocated code. */
 		sub	r6, r9, r6
 
+#ifndef CONFIG_ZBOOT_ROM
+		/* cache_clean_flush may use the stack, so relocate it */
+		add	sp, sp, r6
+#endif
+
 		bl	cache_clean_flush
 
 		adr	r0, BSYM(restart)
@@ -333,7 +346,6 @@ not_relocated:	mov	r0, #0
 LC0:		.word	LC0			@ r1
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
-		.word	_start			@ r5
 		.word	_edata			@ r6
 		.word	_image_size		@ r9
 		.word	_got_start		@ r11
@@ -1062,6 +1074,7 @@ memdump:	mov	r12, r0
 #endif
 
 		.ltorg
+reloc_code_end:
 
 		.align
 		.section ".stack", "aw", %nobits
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 5309909d728..ea80abe7884 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -54,6 +54,7 @@ SECTIONS
   .bss			: { *(.bss) }
   _end = .;
 
+  . = ALIGN(8);		/* the stack must be 64-bit aligned */
   .stack		: { *(.stack) }
 
   .stab 0		: { *(.stab) }
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 113085a7712..7aa4262ada7 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -22,17 +22,16 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
 #include <asm/mach/irq.h>
 #include <asm/hardware/vic.h>
 
-#if defined(CONFIG_PM)
+#ifdef CONFIG_PM
 /**
  * struct vic_device - VIC PM device
- * @sysdev: The system device which is registered.
  * @irq: The IRQ number for the base of the VIC.
  * @base: The register base for the VIC.
  * @resume_sources: A bitmask of interrupts for resume.
@@ -43,8 +42,6 @@
  * @protect: Save for VIC_PROTECT.
  */
 struct vic_device {
-	struct sys_device sysdev;
-
 	void __iomem	*base;
 	int		irq;
 	u32		resume_sources;
@@ -59,11 +56,6 @@ struct vic_device {
 static struct vic_device vic_devices[CONFIG_ARM_VIC_NR];
 
 static int vic_id;
-
-static inline struct vic_device *to_vic(struct sys_device *sys)
-{
-	return container_of(sys, struct vic_device, sysdev);
-}
 #endif /* CONFIG_PM */
 
 /**
@@ -85,10 +77,9 @@ static void vic_init2(void __iomem *base)
 	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
 }
 
-#if defined(CONFIG_PM)
-static int vic_class_resume(struct sys_device *dev)
+#ifdef CONFIG_PM
+static void resume_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base);
@@ -107,13 +98,18 @@ static int vic_class_resume(struct sys_device *dev)
 
 	writel(vic->soft_int, base + VIC_INT_SOFT);
 	writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR);
+}
 
-	return 0;
+static void vic_resume(void)
+{
+	int id;
+
+	for (id = vic_id - 1; id >= 0; id--)
+		resume_one_vic(vic_devices + id);
 }
 
-static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base);
@@ -128,14 +124,21 @@ static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
 
 	writel(vic->resume_irqs, base + VIC_INT_ENABLE);
 	writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR);
+}
+
+static int vic_suspend(void)
+{
+	int id;
+
+	for (id = 0; id < vic_id; id++)
+		suspend_one_vic(vic_devices + id);
 
 	return 0;
 }
 
-struct sysdev_class vic_class = {
-	.name		= "vic",
-	.suspend	= vic_class_suspend,
-	.resume		= vic_class_resume,
+struct syscore_ops vic_syscore_ops = {
+	.suspend	= vic_suspend,
+	.resume		= vic_resume,
 };
 
 /**
@@ -147,30 +150,8 @@ struct sysdev_class vic_class = {
 */
 static int __init vic_pm_init(void)
 {
-	struct vic_device *dev = vic_devices;
-	int err;
-	int id;
-
-	if (vic_id == 0)
-		return 0;
-
-	err = sysdev_class_register(&vic_class);
-	if (err) {
-		printk(KERN_ERR "%s: cannot register class\n", __func__);
-		return err;
-	}
-
-	for (id = 0; id < vic_id; id++, dev++) {
-		dev->sysdev.id = id;
-		dev->sysdev.cls = &vic_class;
-
-		err = sysdev_register(&dev->sysdev);
-		if (err) {
-			printk(KERN_ERR "%s: failed to register device\n",
-			       __func__);
-			return err;
-		}
-	}
+	if (vic_id > 0)
+		register_syscore_ops(&vic_syscore_ops);
 
 	return 0;
 }
diff --git a/arch/arm/configs/at91x40_defconfig b/arch/arm/configs/at91x40_defconfig
new file mode 100644
index 00000000000..c55e9212fcb
--- /dev/null
+++ b/arch/arm/configs/at91x40_defconfig
@@ -0,0 +1,48 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EMBEDDED=y
+# CONFIG_HOTPLUG is not set
+# CONFIG_ELF_CORE is not set
+# CONFIG_FUTEX is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
+CONFIG_ARCH_AT91=y
+CONFIG_ARCH_AT91X40=y
+CONFIG_MACH_AT91EB01=y
+CONFIG_AT91_EARLY_USART0=y
+CONFIG_CPU_ARM7TDMI=y
+CONFIG_SET_MEM_PARAM=y
+CONFIG_DRAM_BASE=0x01000000
+CONFIG_DRAM_SIZE=0x00400000
+CONFIG_FLASH_MEM_BASE=0x01400000
+CONFIG_PROCESSOR_ID=0x14000040
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_BINFMT_FLAT=y
+# CONFIG_SUSPEND is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_RAM=y
+CONFIG_MTD_ROM=y
+CONFIG_BLK_DEV_RAM=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h
new file mode 100644
index 00000000000..70656b69d5c
--- /dev/null
+++ b/arch/arm/include/asm/i8253.h
@@ -0,0 +1,15 @@
+#ifndef __ASMARM_I8253_H
+#define __ASMARM_I8253_H
+
+/* i8253A PIT registers */
+#define PIT_MODE	0x43
+#define PIT_CH0		0x40
+
+#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
+
+extern raw_spinlock_t i8253_lock;
+
+#define outb_pit	outb_p
+#define inb_pit		inb_p
+
+#endif
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index bb8a19bd582..e46bdd0097e 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -39,10 +39,13 @@ typedef u32 kprobe_opcode_t;
 struct kprobe;
 typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
 
+typedef unsigned long (kprobe_check_cc)(unsigned long);
+
 /* Architecture specific copy of original instruction. */
 struct arch_specific_insn {
 	kprobe_opcode_t		*insn;
 	kprobe_insn_handler_t	*insn_handler;
+	kprobe_check_cc		*insn_check_cc;
 };
 
 struct prev_kprobe {
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 883f6be5117..d5adaae5ee2 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -34,7 +34,6 @@
  *   timer interrupt which may be pending.
  */
 struct sys_timer {
-	struct sys_device	dev;
 	void			(*init)(void);
 	void			(*suspend)(void);
 	void			(*resume)(void);
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 885be097769..832888d0c20 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -159,7 +159,7 @@ extern unsigned int user_debug;
 #include <mach/barriers.h>
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
 #else
 #include <asm/memory.h>
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 23891317dc4..15eeff6aea0 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -34,9 +34,6 @@
  *
  *   *) If the PC is written to by the instruction, the
  *      instruction must be fully simulated in software.
- *      If it is a conditional instruction, the handler
- *      will use insn[0] to copy its condition code to
- *	set r0 to 1 and insn[1] to "mov pc, lr" to return.
  *
  *   *) Otherwise, a modified form of the instruction is
  *      directly executed.  Its handler calls the
@@ -68,13 +65,17 @@
 
 #define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
 
+#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos))
+
+/*
+ * Test if load/store instructions writeback the address register.
+ * if P (bit 24) == 0 or W (bit 21) == 1
+ */
+#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
+
 #define PSR_fs	(PSR_f|PSR_s)
 
 #define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
-#define SET_R0_TRUE_INSTRUCTION		0xe3a00001	/* mov	r0, #1 */
-
-#define	truecc_insn(insn)	(((insn) & 0xf0000000) | \
-				 (SET_R0_TRUE_INSTRUCTION & 0x0fffffff))
 
 typedef long (insn_0arg_fn_t)(void);
 typedef long (insn_1arg_fn_t)(long);
@@ -419,14 +420,10 @@ insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr,
 
 static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	long iaddr = (long)p->addr;
 	int disp  = branch_displacement(insn);
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 24))
 		regs->ARM_lr = iaddr + 4;
 
@@ -446,14 +443,10 @@ static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rm = insn & 0xf;
 	long rmv = regs->uregs[rm];
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 5))
 		regs->ARM_lr = (long)p->addr + 4;
 
@@ -463,9 +456,16 @@ static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 		regs->ARM_cpsr |= PSR_T_BIT;
 }
 
+static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
 static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rn = (insn >> 16) & 0xf;
 	int lbit = insn & (1 << 20);
@@ -476,9 +476,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 	int reg_bit_vector;
 	int reg_count;
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	reg_count = 0;
 	reg_bit_vector = insn & 0xffff;
 	while (reg_bit_vector) {
@@ -510,11 +507,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	regs->ARM_pc = (long)p->addr + str_pc_offset;
 	simulate_ldm1stm1(p, regs);
 	regs->ARM_pc = (long)p->addr + 4;
@@ -525,24 +517,16 @@ static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[12] = regs->uregs[13];
 }
 
-static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-
-	/* Save Rn in case of writeback. */
-	regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;  /* rm may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
 
 	/* Not following the C calling convention here, so need asm(). */
 	__asm__ __volatile__ (
@@ -554,29 +538,36 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 		"str	r0, %[rn]	\n\t"	/* in case of writeback */
 		"str	r2, %[rd0]	\n\t"
 		"str	r3, %[rd1]	\n\t"
-		: [rn]  "+m" (regs->uregs[rn]),
+		: [rn]  "+m" (rnv),
 		  [rd0] "=m" (regs->uregs[rd]),
 		  [rd1] "=m" (regs->uregs[rd+1])
-		: [rm]   "m" (regs->uregs[rm]),
+		: [rm]   "m" (rmv),
 		  [cpsr] "r" (regs->ARM_cpsr),
 		  [i_fn] "r" (i_fn)
 		: "r0", "r1", "r2", "r3", "lr", "cc"
 	);
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
 }
 
 static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm  = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rmv = regs->uregs[rm];  /* rm/rmv may be invalid, don't care. */
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	/* rm/rmv may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv_wb;
 
-	regs->uregs[rn] = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
+	rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
 					       regs->uregs[rd+1],
 					       regs->ARM_cpsr, i_fn);
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv_wb;
 }
 
 static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs)
@@ -630,31 +621,6 @@ static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs)
 		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
 }
 
-static void __kprobes emulate_mrrc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_llret_0arg_fn_t *i_fn = (insn_llret_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	union reg_pair fnr;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-
-	fnr.dr = insnslot_llret_0arg_rflags(regs->ARM_cpsr, i_fn);
-	regs->uregs[rn] = fnr.r0;
-	regs->uregs[rd] = fnr.r1;
-}
-
-static void __kprobes emulate_mcrr(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-	long rdv = regs->uregs[rd];
-
-	insnslot_2arg_rflags(rnv, rdv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -688,32 +654,32 @@ static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs)
 	insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rd12(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-
-	regs->uregs[rd] = insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_ird12(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes
+emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
-	int ird = (insn >> 12) & 0xf;
+	int rd = (insn >> 12) & 0xf;
+	long rdv = regs->uregs[rd];
 
-	insnslot_1arg_rflags(regs->uregs[ird], regs->ARM_cpsr, i_fn);
+	regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes
+emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
+	int rd = (insn >> 12) & 0xf;
+	int rn = insn & 0xf;
+	long rdv = regs->uregs[rd];
 	long rnv = regs->uregs[rn];
 
-	insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
+	regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn);
 }
 
 static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
@@ -819,6 +785,17 @@ emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs)
 }
 
 static void __kprobes
+emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	int rn = (insn >> 16) & 0xf;
+	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
+
+	insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
+}
+
+static void __kprobes
 emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
@@ -854,14 +831,34 @@ emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs)
 		insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
 }
 
+static void __kprobes
+emulate_alu_tests(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
+	int rn = (insn >> 16) & 0xf;
+	int rs = (insn >> 8) & 0xf;	/* rs/rsv may be invalid, don't care. */
+	int rm = insn & 0xf;
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rsv = regs->uregs[rs];
+
+	insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
+}
+
 static enum kprobe_insn __kprobes
 prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	int ibit = (insn & (1 << 26)) ? 25 : 22;
+	int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25))
+					 : (~insn & (1 << 22));
+
+	if (is_writeback(insn) && is_r15(insn, 16))
+		return INSN_REJECTED;	/* Writeback to PC */
 
 	insn &= 0xfff00fff;
 	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	if (insn & (1 << ibit)) {
+	if (not_imm) {
 		insn &= ~0xf;
 		insn |= 2;	/* Rm = r2 */
 	}
@@ -871,20 +868,40 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static enum kprobe_insn __kprobes
-prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0fff;	/* Rd = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12rm0;
+	asi->insn_handler = emulate_rd12_modify;
 	return INSN_GOOD;
 }
 
 static enum kprobe_insn __kprobes
-prep_emulate_rd12(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+prep_emulate_rd12rn0_modify(kprobe_opcode_t insn,
+			    struct arch_specific_insn *asi)
 {
-	insn &= 0xffff0fff;	/* Rd = r0 */
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0ff0;	/* Rd = r0 */
+	insn |= 0x00000001;	/* Rn = r1 */
+	asi->insn[0] = insn;
+	asi->insn_handler = emulate_rd12rn0_modify;
+	return INSN_GOOD;
+}
+
+static enum kprobe_insn __kprobes
+prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12;
+	asi->insn_handler = emulate_rd12rm0;
 	return INSN_GOOD;
 }
 
@@ -892,6 +909,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn,
 				struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000001;	/* Rm = r1 */
 	asi->insn[0] = insn;
@@ -903,6 +923,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn,
 			       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff0f0f0;	/* Rd = r0, Rs = r0 */
 	insn |= 0x00000001;	/* Rm = r1          */
 	asi->insn[0] = insn;
@@ -914,6 +937,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn,
 				   struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff000f0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000102;	/* Rs = r1, Rm = r2 */
 	asi->insn[0] = insn;
@@ -925,6 +951,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 				       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16) || is_r15(insn, 12))
+		return INSN_REJECTED;	/* RdHi or RdLo is PC */
+
 	insn &= 0xfff000f0;	/* RdHi = r0, RdLo = r1 */
 	insn |= 0x00001203;	/* Rs = r2, Rm = r3 */
 	asi->insn[0] = insn;
@@ -945,20 +974,13 @@ prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 static enum kprobe_insn __kprobes
 space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	/* CPS mmod == 1 : 1111 0001 0000 xx10 xxxx xxxx xx0x xxxx */
-	/* RFE           : 1111 100x x0x1 xxxx xxxx 1010 xxxx xxxx */
-	/* SRS           : 1111 100x x1x0 1101 xxxx 0101 xxxx xxxx */
-	if ((insn & 0xfff30020) == 0xf1020000 ||
-	    (insn & 0xfe500f00) == 0xf8100a00 ||
-	    (insn & 0xfe5f0f00) == 0xf84d0500)
-		return INSN_REJECTED;
-
-	/* PLD : 1111 01x1 x101 xxxx xxxx xxxx xxxx xxxx : */
-	if ((insn & 0xfd700000) == 0xf4500000) {
-		insn &= 0xfff0ffff;	/* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_rn16;
-		return INSN_GOOD;
+	/* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDI        : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDW        : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLD         : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */
+	if ((insn & 0xfe300000) == 0xf4100000) {
+		asi->insn_handler = emulate_nop;
+		return INSN_GOOD_NO_SLOT;
 	}
 
 	/* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */
@@ -967,41 +989,22 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return INSN_GOOD_NO_SLOT;
 	}
 
-	/* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-	/* CDP2   : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0xffff00f0) == 0xf1010000 ||
-	    (insn & 0xff000010) == 0xfe000000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
+	/* CPS   : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
+	/* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
 
+	/* SRS   : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE   : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
+
+	/* Coprocessor instructions... */
 	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
 	/* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-	if ((insn & 0xffe00000) == 0xfc400000) {
-		insn &= 0xfff00fff;	/* Rn = r0 */
-		insn |= 0x00001000;	/* Rd = r1 */
-		asi->insn[0] = insn;
-		asi->insn_handler =
-			(insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-		return INSN_GOOD;
-	}
+	/* LDC2  : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC2  : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP2  : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR2  : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC2  : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
 
-	/* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfe000000) == 0xfc000000) {
-		insn &= 0xfff0ffff;      /* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_ldcstc;
-		return INSN_GOOD;
-	}
-
-	/* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0]      = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1010,19 +1013,18 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */
 	if ((insn & 0x0f900010) == 0x01000000) {
 
-		/* BXJ  : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-		/* MSR  : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01200020 ||
-		    (insn & 0x0fb000f0) == 0x01200000)
-			return INSN_REJECTED;
-
-		/* MRS : cccc 0001 0x00 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0fb00010) == 0x01000000)
-			return prep_emulate_rd12(insn, asi);
+		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
+		if ((insn & 0x0ff000f0) == 0x01000000) {
+			if (is_r15(insn, 12))
+				return INSN_REJECTED;	/* Rd is PC */
+			asi->insn_handler = simulate_mrs;
+			return INSN_GOOD_NO_SLOT;
+		}
 
 		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
 		if ((insn & 0x0ff00090) == 0x01400080)
-			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 
 		/* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
 		/* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
@@ -1031,24 +1033,29 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
 
 		/* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */
-		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 0x00 xxxx : Q */
-		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */
+		if ((insn & 0x0ff00090) == 0x01000080 ||
+		    (insn & 0x0ff000b0) == 0x01200080)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+
+		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
 
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */
 	else if ((insn & 0x0f900090) == 0x01000010) {
 
-		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-		if ((insn & 0xfff000f0) == 0xe1200070)
-			return INSN_REJECTED;
-
 		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
 		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
 		if ((insn & 0x0ff000d0) == 0x01200010) {
-			asi->insn[0] = truecc_insn(insn);
+			if ((insn & 0x0ff000ff) == 0x0120003f)
+				return INSN_REJECTED; /* BLX pc */
 			asi->insn_handler = simulate_blx2bx;
-			return INSN_GOOD;
+			return INSN_GOOD_NO_SLOT;
 		}
 
 		/* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
@@ -1059,17 +1066,27 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* QSUB    : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDADD   : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDSUB   : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f9000f0) == 0x01000050)
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
+		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
+		/* SMC  : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
+
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */
-	else if ((insn & 0x0f000090) == 0x00000090) {
+	else if ((insn & 0x0f0000f0) == 0x00000090) {
 
 		/* MUL    : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MULS   : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* MLA    : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MLAS   : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMAAL  : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx :   */
+		/* MLS    : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULL  : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMLAL  : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx :   */
@@ -1078,13 +1095,15 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* SMLAL  : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx :   */
 		/* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */
-		if ((insn & 0x0fe000f0) == 0x00000090) {
-		       return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		} else if  ((insn & 0x0fe000f0) == 0x00200090) {
-		       return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-		} else {
-		       return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
-		}
+		if ((insn & 0x00d00000) == 0x00500000)
+			return INSN_REJECTED;
+		else if ((insn & 0x00e00000) == 0x00000000)
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		else if ((insn & 0x00a00000) == 0x00200000)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		else
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 	}
 
 	/* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */
@@ -1092,23 +1111,45 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* SWP   : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */
 		/* SWPB  : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */
-		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
-		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
+		/* ???   : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */
 		/* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */
 		/* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */
+
+		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
+		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
 		/* LDRH  : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */
 		/* STRH  : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */
 		/* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */
 		/* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */
-		if ((insn & 0x0fb000f0) == 0x01000090) {
-			/* SWP/SWPB */
-			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f0000f0) == 0x01000090) {
+			if ((insn & 0x0fb000f0) == 0x01000090) {
+				/* SWP/SWPB */
+				return prep_emulate_rd12rn16rm0_wflags(insn,
+									asi);
+			} else {
+				/* STREX/LDREX variants and unallocaed space */
+				return INSN_REJECTED;
+			}
+
 		} else if ((insn & 0x0e1000d0) == 0x00000d0) {
 			/* STRD/LDRD */
+			if ((insn & 0x0000e000) == 0x0000e000)
+				return INSN_REJECTED;	/* Rd is LR or PC */
+			if (is_writeback(insn) && is_r15(insn, 16))
+				return INSN_REJECTED;	/* Writeback to PC */
+
 			insn &= 0xfff00fff;
 			insn |= 0x00002000;	/* Rn = r0, Rd = r2 */
-			if (insn & (1 << 22)) {
-				/* I bit */
+			if (!(insn & (1 << 22))) {
+				/* Register index */
 				insn &= ~0xf;
 				insn |= 1;	/* Rm = r1 */
 			}
@@ -1118,6 +1159,9 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return INSN_GOOD;
 		}
 
+		/* LDRH/STRH/LDRSB/LDRSH */
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		return prep_emulate_ldr_str(insn, asi);
 	}
 
@@ -1125,7 +1169,7 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/*
 	 * ALU op with S bit and Rd == 15 :
-	 * 	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
+	 *	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0e10f000) == 0x0010f000)
 		return INSN_REJECTED;
@@ -1154,22 +1198,61 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		insn |= 0x00000200;     /* Rs = r2 */
 	}
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x01100000) {
+		/*
+		 * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 				emulate_alu_rwflags : emulate_alu_rflags;
+	}
 	return INSN_GOOD;
 }
 
 static enum kprobe_insn __kprobes
 space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* MOVW  : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
+	/* MOVT  : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fb00000) == 0x03000000)
+		return prep_emulate_rd12_modify(insn, asi);
+
+	/* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fff0000) == 0x03200000) {
+		unsigned op2 = insn & 0x000000ff;
+		if (op2 == 0x01 || op2 == 0x04) {
+			/* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
+			/* SEV   : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
+			asi->insn[0] = insn;
+			asi->insn_handler = emulate_none;
+			return INSN_GOOD;
+		} else if (op2 <= 0x03) {
+			/* NOP   : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
+			/* WFE   : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
+			/* WFI   : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
+			/*
+			 * We make WFE and WFI true NOPs to avoid stalls due
+			 * to missing events whilst processing the probe.
+			 */
+			asi->insn_handler = emulate_nop;
+			return INSN_GOOD_NO_SLOT;
+		}
+		/* For DBG and unallocated hints it's safest to reject them */
+		return INSN_REJECTED;
+	}
+
 	/*
 	 * MSR   : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
-	 * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx
 	 * ALU op with S bit and Rd == 15 :
 	 *	   cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0fb00000) == 0x03200000 ||	/* MSR */
-	    (insn & 0x0ff00000) == 0x03400000 ||	/* Undef */
 	    (insn & 0x0e10f000) == 0x0210f000)		/* ALU s-bit, R15  */
 		return INSN_REJECTED;
 
@@ -1180,10 +1263,22 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 * *S (bit 20) updates condition codes
 	 * ADC/SBC/RSC reads the C flag
 	 */
-	insn &= 0xffff0fff;	/* Rd = r0 */
+	insn &= 0xfff00fff;	/* Rn = r0 and Rd = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x03100000) {
+		/*
+		 * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests_imm;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 			emulate_alu_imm_rwflags : emulate_alu_imm_rflags;
+	}
 	return INSN_GOOD;
 }
 
@@ -1192,6 +1287,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */
 	if ((insn & 0x0ff000f0) == 0x068000b0) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 		insn |= 0x00000001;	/* Rm = r1 */
 		asi->insn[0] = insn;
@@ -1205,6 +1302,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */
 	if ((insn & 0x0fa00030) == 0x06a00010 ||
 	    (insn & 0x0fb000f0) == 0x06a00030) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 		asi->insn[0] = insn;
 		asi->insn_handler = emulate_sat;
@@ -1213,57 +1312,101 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* REV    : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
 	/* REV16  : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
+	/* RBIT   : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
 	/* REVSH  : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
 	if ((insn & 0x0ff00070) == 0x06b00030 ||
-	    (insn & 0x0ff000f0) == 0x06f000b0)
+	    (insn & 0x0ff00070) == 0x06f00030)
 		return prep_emulate_rd12rm0(insn, asi);
 
+	/* ???       : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* SADD16    : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* SADDSUBX  : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* SSUBADDX  : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* SSUB16    : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* SADD8     : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SSUB8     : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* QADD16    : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx :   */
 	/* QADDSUBX  : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx :   */
 	/* QSUBADDX  : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx :   */
 	/* QSUB16    : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx :   */
 	/* QADD8     : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx :   */
 	/* QSUB8     : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx :   */
 	/* SHADD16   : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx :   */
 	/* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx :   */
 	/* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx :   */
 	/* SHSUB16   : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SHADD8    : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SHSUB8    : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx :   */
+	/* ???       : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* UADD16    : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* UADDSUBX  : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* USUBADDX  : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* USUB16    : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* UADD8     : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx :   */
 	/* USUB8     : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* UQADD16   : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UQSUB16   : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UQADD8    : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UQSUB8    : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx :   */
 	/* UHADD16   : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UHSUB16   : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UHADD8    : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UHSUB8    : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx :   */
+	if ((insn & 0x0f800010) == 0x06000010) {
+		if ((insn & 0x00300000) == 0x00000000 ||
+		    (insn & 0x000000e0) == 0x000000a0 ||
+		    (insn & 0x000000e0) == 0x000000c0)
+			return INSN_REJECTED;	/* Unallocated space */
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
 	/* PKHBT     : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx :   */
 	/* PKHTB     : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx :   */
+	if ((insn & 0x0ff00030) == 0x06800010)
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
 	/* SXTAB16   : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTB      : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB16    : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTAB     : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB      : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx :   */
 	/* SXTAH     : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTH      : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAB16   : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB16    : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAB     : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB      : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAH     : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx :   */
-	return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	/* UXTH      : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx :   */
+	if ((insn & 0x0f8000f0) == 0x06800070) {
+		if ((insn & 0x00300000) == 0x00100000)
+			return INSN_REJECTED;	/* Unallocated space */
+
+		if ((insn & 0x000f0000) == 0x000f0000)
+			return prep_emulate_rd12rm0(insn, asi);
+		else
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
+	/* Other instruction encodings aren't yet defined */
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1273,29 +1416,49 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	if ((insn & 0x0ff000f0) == 0x03f000f0)
 		return INSN_REJECTED;
 
-	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
-	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
-	if ((insn & 0x0ff000f0) == 0x07800010)
-		 return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-
 	/* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
 	/* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
 	if ((insn & 0x0ff00090) == 0x07400010)
 		return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
 
 	/* SMLAD  : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */
+	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
 	/* SMLSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */
+	/* SMUSD  : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx :  */
 	/* SMMLA  : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx :  */
-	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
+	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx :  */
+	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx :  */
 	if ((insn & 0x0ff00090) == 0x07000010 ||
 	    (insn & 0x0ff000d0) == 0x07500010 ||
-	    (insn & 0x0ff000d0) == 0x075000d0)
+	    (insn & 0x0ff000f0) == 0x07800010) {
+
+		if ((insn & 0x0000f000) == 0x0000f000)
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		else
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+	}
+
+	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	if ((insn & 0x0ff000d0) == 0x075000d0)
 		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 
-	/* SMUSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :  */
-	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
-	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
-	return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+	/* SBFX   : cccc 0111 101x xxxx xxxx xxxx x101 xxxx :  */
+	/* UBFX   : cccc 0111 111x xxxx xxxx xxxx x101 xxxx :  */
+	if ((insn & 0x0fa00070) == 0x07a00050)
+		return prep_emulate_rd12rm0(insn, asi);
+
+	/* BFI    : cccc 0111 110x xxxx xxxx xxxx x001 xxxx :  */
+	/* BFC    : cccc 0111 110x xxxx xxxx xxxx x001 1111 :  */
+	if ((insn & 0x0fe00070) == 0x07c00010) {
+
+		if ((insn & 0x0000000f) == 0x0000000f)
+			return prep_emulate_rd12_modify(insn, asi);
+		else
+			return prep_emulate_rd12rn0_modify(insn, asi);
+	}
+
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1309,6 +1472,10 @@ space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* STRB  : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */
 	/* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
 	/* STRT  : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
+
+	if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12))
+		return INSN_REJECTED;	/* LDRB into PC */
+
 	return prep_emulate_ldr_str(insn, asi);
 }
 
@@ -1323,10 +1490,9 @@ space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
 	/* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */
 				simulate_stm1_pc : simulate_ldm1stm1;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
@@ -1334,58 +1500,117 @@ space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* B  : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
 	/* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = simulate_bbl;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
-space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* Coprocessor instructions... */
 	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
 	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	insn &= 0xfff00fff;
-	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-	return INSN_GOOD;
+	/* LDC  : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC  : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP  : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR  : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC  : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
+
+	/* SVC  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
+
+	return INSN_REJECTED;
 }
 
-static enum kprobe_insn __kprobes
-space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static unsigned long __kprobes __check_eq(unsigned long cpsr)
 {
-	/* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	insn &= 0xfff0ffff;	/* Rn = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_ldcstc;
-	return INSN_GOOD;
+	return cpsr & PSR_Z_BIT;
 }
 
-static enum kprobe_insn __kprobes
-space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+static unsigned long __kprobes __check_ne(unsigned long cpsr)
 {
-	/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-	/* SWI  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfff000f0) == 0xe1200070 ||
-	    (insn & 0x0f000000) == 0x0f000000)
-		return INSN_REJECTED;
+	return (~cpsr) & PSR_Z_BIT;
+}
 
-	/* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0x0f000010) == 0x0e000000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
+static unsigned long __kprobes __check_cs(unsigned long cpsr)
+{
+	return cpsr & PSR_C_BIT;
+}
 
-	/* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+static unsigned long __kprobes __check_cc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_mi(unsigned long cpsr)
+{
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_pl(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_vs(unsigned long cpsr)
+{
+	return cpsr & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_vc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_hi(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return cpsr & PSR_C_BIT;
 }
 
+static unsigned long __kprobes __check_ls(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ge(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_lt(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_gt(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return (~temp) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_le(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return temp & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_al(unsigned long cpsr)
+{
+	return true;
+}
+
+static kprobe_check_cc * const condition_checks[16] = {
+	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
+	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
+	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
+	&__check_gt, &__check_le, &__check_al, &__check_al
+};
+
 /* Return:
  *   INSN_REJECTED     If instruction is one not allowed to kprobe,
  *   INSN_GOOD         If instruction is supported and uses instruction slot,
@@ -1401,133 +1626,45 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 enum kprobe_insn __kprobes
 arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	asi->insn_check_cc = condition_checks[insn>>28];
 	asi->insn[1] = KPROBE_RETURN_INSTRUCTION;
 
-	if ((insn & 0xf0000000) == 0xf0000000) {
+	if ((insn & 0xf0000000) == 0xf0000000)
 
 		return space_1111(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x00000000) {
+	else if ((insn & 0x0e000000) == 0x00000000)
 
 		return space_cccc_000x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x02000000) {
+	else if ((insn & 0x0e000000) == 0x02000000)
 
 		return space_cccc_001x(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x06000010) {
+	else if ((insn & 0x0f000010) == 0x06000010)
 
 		return space_cccc_0110__1(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x07000010) {
+	else if ((insn & 0x0f000010) == 0x07000010)
 
 		return space_cccc_0111__1(insn, asi);
 
-	} else if ((insn & 0x0c000000) == 0x04000000) {
+	else if ((insn & 0x0c000000) == 0x04000000)
 
 		return space_cccc_01xx(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x08000000) {
+	else if ((insn & 0x0e000000) == 0x08000000)
 
 		return space_cccc_100x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x0a000000) {
+	else if ((insn & 0x0e000000) == 0x0a000000)
 
 		return space_cccc_101x(insn, asi);
 
-	} else if ((insn & 0x0fe00000) == 0x0c400000) {
-
-		return space_cccc_1100_010x(insn, asi);
-
-	} else if ((insn & 0x0e000000) == 0x0c000000) {
-
-		return space_cccc_110x(insn, asi);
-
-	}
-
-	return space_cccc_111x(insn, asi);
+	return space_cccc_11xx(insn, asi);
 }
 
 void __init arm_kprobe_decode_init(void)
 {
 	find_str_pc_offset();
 }
-
-
-/*
- * All ARM instructions listed below.
- *
- * Instructions and their general purpose registers are given.
- * If a particular register may not use R15, it is prefixed with a "!".
- * If marked with a "*" means the value returned by reading R15
- * is implementation defined.
- *
- * ADC/ADD/AND/BIC/CMN/CMP/EOR/MOV/MVN/ORR/RSB/RSC/SBC/SUB/TEQ
- *     TST: Rd, Rn, Rm, !Rs
- * BX: Rm
- * BLX(2): !Rm
- * BX: Rm (R15 legal, but discouraged)
- * BXJ: !Rm,
- * CLZ: !Rd, !Rm
- * CPY: Rd, Rm
- * LDC/2,STC/2 immediate offset & unindex: Rn
- * LDC/2,STC/2 immediate pre/post-indexed: !Rn
- * LDM(1/3): !Rn, register_list
- * LDM(2): !Rn, !register_list
- * LDR,STR,PLD immediate offset: Rd, Rn
- * LDR,STR,PLD register offset: Rd, Rn, !Rm
- * LDR,STR,PLD scaled register offset: Rd, !Rn, !Rm
- * LDR,STR immediate pre/post-indexed: Rd, !Rn
- * LDR,STR register pre/post-indexed: Rd, !Rn, !Rm
- * LDR,STR scaled register pre/post-indexed: Rd, !Rn, !Rm
- * LDRB,STRB immediate offset: !Rd, Rn
- * LDRB,STRB register offset: !Rd, Rn, !Rm
- * LDRB,STRB scaled register offset: !Rd, !Rn, !Rm
- * LDRB,STRB immediate pre/post-indexed: !Rd, !Rn
- * LDRB,STRB register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRB,STRB scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT immediate pre/post-indexed: !Rd, !Rn
- * LDRT,LDRBT,STRBT register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate offset: !Rd, Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register offset: !Rd, Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate pre/post-indexed: !Rd, !Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register pre/post-indexed: !Rd, !Rn, !Rm
- * LDREX: !Rd, !Rn
- * MCR/2: !Rd
- * MCRR/2,MRRC/2: !Rd, !Rn
- * MLA: !Rd, !Rn, !Rm, !Rs
- * MOV: Rd
- * MRC/2: !Rd (if Rd==15, only changes cond codes, not the register)
- * MRS,MSR: !Rd
- * MUL: !Rd, !Rm, !Rs
- * PKH{BT,TB}: !Rd, !Rn, !Rm
- * QDADD,[U]QADD/16/8/SUBX: !Rd, !Rm, !Rn
- * QDSUB,[U]QSUB/16/8/ADDX: !Rd, !Rm, !Rn
- * REV/16/SH: !Rd, !Rm
- * RFE: !Rn
- * {S,U}[H]ADD{16,8,SUBX},{S,U}[H]SUB{16,8,ADDX}: !Rd, !Rn, !Rm
- * SEL: !Rd, !Rn, !Rm
- * SMLA<x><y>,SMLA{D,W<y>},SMLSD,SMML{A,S}: !Rd, !Rn, !Rm, !Rs
- * SMLAL<x><y>,SMLA{D,LD},SMLSLD,SMMULL,SMULW<y>: !RdHi, !RdLo, !Rm, !Rs
- * SMMUL,SMUAD,SMUL<x><y>,SMUSD: !Rd, !Rm, !Rs
- * SSAT/16: !Rd, !Rm
- * STM(1/2): !Rn, register_list* (R15 in reg list not recommended)
- * STRT immediate pre/post-indexed: Rd*, !Rn
- * STRT register pre/post-indexed: Rd*, !Rn, !Rm
- * STRT scaled register pre/post-indexed: Rd*, !Rn, !Rm
- * STREX: !Rd, !Rn, !Rm
- * SWP/B: !Rd, !Rn, !Rm
- * {S,U}XTA{B,B16,H}: !Rd, !Rn, !Rm
- * {S,U}XT{B,B16,H}: !Rd, !Rm
- * UM{AA,LA,UL}L: !RdHi, !RdLo, !Rm, !Rs
- * USA{D8,A8,T,T16}: !Rd, !Rm, !Rs
- *
- * May transfer control by writing R15 (possible mode changes or alternate
- * mode accesses marked by "*"):
- * ALU op (* with s-bit), B, BL, BKPT, BLX(1/2), BX, BXJ, CPS*, CPY,
- * LDM(1), LDM(2/3)*, LDR, MOV, RFE*, SWI*
- *
- * Instructions that do not take general registers, nor transfer control:
- * CDP/2, SETEND, SRS*
- */
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 2ba7deb3072..1656c87501c 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -134,7 +134,8 @@ static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs,
 				 struct kprobe_ctlblk *kcb)
 {
 	regs->ARM_pc += 4;
-	p->ainsn.insn_handler(p, regs);
+	if (p->ainsn.insn_check_cc(regs->ARM_cpsr))
+		p->ainsn.insn_handler(p, regs);
 }
 
 /*
diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c
index 31a316c1777..0f107dcb034 100644
--- a/arch/arm/kernel/leds.c
+++ b/arch/arm/kernel/leds.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/leds.h>
 
@@ -69,36 +70,37 @@ static ssize_t leds_store(struct sys_device *dev,
 
 static SYSDEV_ATTR(event, 0200, NULL, leds_store);
 
-static int leds_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_class leds_sysclass = {
+	.name		= "leds",
+};
+
+static struct sys_device leds_device = {
+	.id		= 0,
+	.cls		= &leds_sysclass,
+};
+
+static int leds_suspend(void)
 {
 	leds_event(led_stop);
 	return 0;
 }
 
-static int leds_resume(struct sys_device *dev)
+static void leds_resume(void)
 {
 	leds_event(led_start);
-	return 0;
 }
 
-static int leds_shutdown(struct sys_device *dev)
+static void leds_shutdown(void)
 {
 	leds_event(led_halted);
-	return 0;
 }
 
-static struct sysdev_class leds_sysclass = {
-	.name		= "leds",
+static struct syscore_ops leds_syscore_ops = {
 	.shutdown	= leds_shutdown,
 	.suspend	= leds_suspend,
 	.resume		= leds_resume,
 };
 
-static struct sys_device leds_device = {
-	.id		= 0,
-	.cls		= &leds_sysclass,
-};
-
 static int __init leds_init(void)
 {
 	int ret;
@@ -107,6 +109,8 @@ static int __init leds_init(void)
 		ret = sysdev_register(&leds_device);
 	if (ret == 0)
 		ret = sysdev_create_file(&leds_device, &attr_event);
+	if (ret == 0)
+		register_syscore_ops(&leds_syscore_ops);
 	return ret;
 }
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 979da3947f4..139e3c82736 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -746,7 +746,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
-	while (tail && !((unsigned long)tail & 0x3))
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	       tail && !((unsigned long)tail & 0x3))
 		tail = user_backtrace(tail, entry);
 }
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2bf27f364d0..8182f45ca49 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -767,12 +767,20 @@ long arch_ptrace(struct task_struct *child, long request,
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		case PTRACE_GETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_gethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 		case PTRACE_SETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_sethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 #endif
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index cb839831764..0340224cf73 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -597,19 +597,13 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
 	return err;
 }
 
-static inline void setup_syscall_restart(struct pt_regs *regs)
-{
-	regs->ARM_r0 = regs->ARM_ORIG_r0;
-	regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
-}
-
 /*
  * OK, we're invoking a handler
  */	
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset,
-	      struct pt_regs * regs, int syscall)
+	      struct pt_regs * regs)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
@@ -617,26 +611,6 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	int ret;
 
 	/*
-	 * If we were from a system call, check for system call restarting...
-	 */
-	if (syscall) {
-		switch (regs->ARM_r0) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->ARM_r0 = -EINTR;
-			break;
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->ARM_r0 = -EINTR;
-				break;
-			}
-			/* fallthrough */
-		case -ERESTARTNOINTR:
-			setup_syscall_restart(regs);
-		}
-	}
-
-	/*
 	 * translate the signal
 	 */
 	if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
@@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
  */
 static void do_signal(struct pt_regs *regs, int syscall)
 {
+	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
 	struct k_sigaction ka;
 	siginfo_t info;
 	int signr;
@@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	if (!user_mode(regs))
 		return;
 
+	/*
+	 * If we were from a system call, check for system call restarting...
+	 */
+	if (syscall) {
+		continue_addr = regs->ARM_pc;
+		restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4);
+		retval = regs->ARM_r0;
+
+		/*
+		 * Prepare for system call restart.  We do this here so that a
+		 * debugger will see the already changed PSW.
+		 */
+		switch (retval) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->ARM_r0 = regs->ARM_ORIG_r0;
+			regs->ARM_pc = restart_addr;
+			break;
+		case -ERESTART_RESTARTBLOCK:
+			regs->ARM_r0 = -EINTR;
+			break;
+		}
+	}
+
 	if (try_to_freeze())
 		goto no_signal;
 
+	/*
+	 * Get the signal to deliver.  When running under ptrace, at this
+	 * point the debugger may change all our registers ...
+	 */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		sigset_t *oldset;
 
+		/*
+		 * Depending on the signal settings we may need to revert the
+		 * decision to restart the system call.  But skip this if a
+		 * debugger has chosen to restart at a different PC.
+		 */
+		if (regs->ARM_pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND
+			    || (retval == -ERESTARTSYS
+				&& !(ka.sa.sa_flags & SA_RESTART))) {
+				regs->ARM_r0 = -EINTR;
+				regs->ARM_pc = continue_addr;
+			}
+		}
+
 		if (test_thread_flag(TIF_RESTORE_SIGMASK))
 			oldset = &current->saved_sigmask;
 		else
 			oldset = &current->blocked;
-		if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) {
+		if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
@@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	}
 
  no_signal:
-	/*
-	 * No signal to deliver to the process - restart the syscall.
-	 */
 	if (syscall) {
-		if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
+		/*
+		 * Handle restarting a different system call.  As above,
+		 * if a debugger has chosen to restart at a different PC,
+		 * ignore the restart.
+		 */
+		if (retval == -ERESTART_RESTARTBLOCK
+		    && regs->ARM_pc == continue_addr) {
 			if (thumb_mode(regs)) {
 				regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE;
 				regs->ARM_pc -= 2;
@@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall)
 #endif
 			}
 		}
-		if (regs->ARM_r0 == -ERESTARTNOHAND ||
-		    regs->ARM_r0 == -ERESTARTSYS ||
-		    regs->ARM_r0 == -ERESTARTNOINTR) {
-			setup_syscall_restart(regs);
-		}
 
 		/* If there's no signal to deliver, we just put the saved sigmask
 		 * back.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e..007a0a950e7 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -479,7 +479,7 @@ static void broadcast_timer_set_mode(enum clock_event_mode mode,
 {
 }
 
-static void broadcast_timer_setup(struct clock_event_device *evt)
+static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt)
 {
 	evt->name	= "dummy_timer";
 	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_RESCHEDULE:
-		/*
-		 * nothing more to do - eveything is
-		 * done on the interrupt return path
-		 */
+		scheduler_ipi();
 		break;
 
 	case IPI_CALL_FUNC:
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 4ad8da15ef2..af0aaebf4de 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -311,7 +311,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 	long err;
 	int i;
 
-	if (nsops < 1)
+	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
 	if (!sops)
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 1ff46cabc7e..cb634c3e28e 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -21,7 +21,7 @@
 #include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/profile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/timer.h>
 #include <linux/irq.h>
 
@@ -115,48 +115,37 @@ void timer_tick(void)
 #endif
 
 #if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS)
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
+static int timer_suspend(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->suspend != NULL)
-		timer->suspend();
+	if (system_timer->suspend)
+		system_timer->suspend();
 
 	return 0;
 }
 
-static int timer_resume(struct sys_device *dev)
+static void timer_resume(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->resume != NULL)
-		timer->resume();
-
-	return 0;
+	if (system_timer->resume)
+		system_timer->resume();
 }
 #else
 #define timer_suspend NULL
 #define timer_resume NULL
 #endif
 
-static struct sysdev_class timer_sysclass = {
-	.name		= "timer",
+static struct syscore_ops timer_syscore_ops = {
 	.suspend	= timer_suspend,
 	.resume		= timer_resume,
 };
 
-static int __init timer_init_sysfs(void)
+static int __init timer_init_syscore_ops(void)
 {
-	int ret = sysdev_class_register(&timer_sysclass);
-	if (ret == 0) {
-		system_timer->dev.cls = &timer_sysclass;
-		ret = sysdev_register(&system_timer->dev);
-	}
+	register_syscore_ops(&timer_syscore_ops);
 
-	return ret;
+	return 0;
 }
 
-device_initcall(timer_init_sysfs);
+device_initcall(timer_init_syscore_ops);
 
 void __init time_init(void)
 {
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3b54ad19d48..d52eec268b4 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 19390231a0e..2d299bf5d72 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -83,6 +83,7 @@ config ARCH_AT91CAP9
 	select CPU_ARM926T
 	select GENERIC_CLOCKEVENTS
 	select HAVE_FB_ATMEL
+	select HAVE_NET_MACB
 
 config ARCH_AT572D940HF
 	bool "AT572D940HF"
diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c
index 1f9d3cb64c5..d8df59a3426 100644
--- a/arch/arm/mach-at91/board-eb01.c
+++ b/arch/arm/mach-at91/board-eb01.c
@@ -30,6 +30,11 @@
 #include <mach/board.h>
 #include "generic.h"
 
+static void __init at91eb01_init_irq(void)
+{
+	at91x40_init_interrupts(NULL);
+}
+
 static void __init at91eb01_map_io(void)
 {
 	at91x40_initialize(40000000);
@@ -38,7 +43,7 @@ static void __init at91eb01_map_io(void)
 MACHINE_START(AT91EB01, "Atmel AT91 EB01")
 	/* Maintainer: Greg Ungerer <gerg@snapgear.com> */
 	.timer		= &at91x40_timer,
-	.init_irq	= at91x40_init_interrupts,
+	.init_irq	= at91eb01_init_irq,
 	.map_io		= at91eb01_map_io,
 MACHINE_END
 
diff --git a/arch/arm/mach-at91/include/mach/cpu.h b/arch/arm/mach-at91/include/mach/cpu.h
index 3bef931d0b1..0700f212530 100644
--- a/arch/arm/mach-at91/include/mach/cpu.h
+++ b/arch/arm/mach-at91/include/mach/cpu.h
@@ -27,6 +27,7 @@
 #define ARCH_ID_AT91SAM9G45	0x819b05a0
 #define ARCH_ID_AT91SAM9G45MRL	0x819b05a2	/* aka 9G45-ES2 & non ES lots */
 #define ARCH_ID_AT91SAM9G45ES	0x819b05a1	/* 9G45-ES (Engineering Sample) */
+#define ARCH_ID_AT91SAM9X5	0x819a05a0
 #define ARCH_ID_AT91CAP9	0x039A03A0
 
 #define ARCH_ID_AT91SAM9XE128	0x329973a0
@@ -55,6 +56,12 @@ static inline unsigned long at91_cpu_fully_identify(void)
 #define ARCH_EXID_AT91SAM9G46	0x00000003
 #define ARCH_EXID_AT91SAM9G45	0x00000004
 
+#define ARCH_EXID_AT91SAM9G15	0x00000000
+#define ARCH_EXID_AT91SAM9G35	0x00000001
+#define ARCH_EXID_AT91SAM9X35	0x00000002
+#define ARCH_EXID_AT91SAM9G25	0x00000003
+#define ARCH_EXID_AT91SAM9X25	0x00000004
+
 static inline unsigned long at91_exid_identify(void)
 {
 	return at91_sys_read(AT91_DBGU_EXID);
@@ -143,6 +150,27 @@ static inline unsigned long at91cap9_rev_identify(void)
 #define cpu_is_at91sam9m11()	(0)
 #endif
 
+#ifdef CONFIG_ARCH_AT91SAM9X5
+#define cpu_is_at91sam9x5()	(at91_cpu_identify() == ARCH_ID_AT91SAM9X5)
+#define cpu_is_at91sam9g15()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G15))
+#define cpu_is_at91sam9g35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G35))
+#define cpu_is_at91sam9x35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X35))
+#define cpu_is_at91sam9g25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G25))
+#define cpu_is_at91sam9x25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X25))
+#else
+#define cpu_is_at91sam9x5()	(0)
+#define cpu_is_at91sam9g15()	(0)
+#define cpu_is_at91sam9g35()	(0)
+#define cpu_is_at91sam9x35()	(0)
+#define cpu_is_at91sam9g25()	(0)
+#define cpu_is_at91sam9x25()	(0)
+#endif
+
 #ifdef CONFIG_ARCH_AT91CAP9
 #define cpu_is_at91cap9()	(at91_cpu_identify() == ARCH_ID_AT91CAP9)
 #define cpu_is_at91cap9_revB()	(at91cap9_rev_identify() == ARCH_REVISION_CAP9_B)
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 32f147998cd..c0deacae778 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -63,6 +63,7 @@ config MACH_DAVINCI_EVM
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM644x EVM
@@ -72,6 +73,7 @@ config MACH_SFFSDR
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Lyrtech Small Form Factor
 	  Software Defined Radio (SFFSDR) board.
@@ -105,6 +107,7 @@ config MACH_DAVINCI_DM6467_EVM
 	select MACH_DAVINCI_DM6467TEVM
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM6467 EVM
@@ -118,6 +121,7 @@ config MACH_DAVINCI_DM365_EVM
 	depends on ARCH_DAVINCI_DM365
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify whether the board used
 	  for development is a DM365 EVM
@@ -129,6 +133,7 @@ config MACH_DAVINCI_DA830_EVM
 	select GPIO_PCF857X
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the TI DA830/OMAP-L137/AM17x Evaluation Module.
 
@@ -205,6 +210,7 @@ config MACH_MITYOMAPL138
 	depends on ARCH_DAVINCI_DA850
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Critical Link MityDSP-L138/MityARM-1808
 	  System on Module.  Information on this SoM may be found at
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 2aa79c54f98..606a6f27ed6 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -29,7 +29,7 @@
 #include <mach/mux.h>
 #include <mach/spi.h>
 
-#define MITYOMAPL138_PHY_ID		"0:03"
+#define MITYOMAPL138_PHY_ID		""
 
 #define FACTORY_CONFIG_MAGIC	0x012C0138
 #define FACTORY_CONFIG_VERSION	0x00010001
@@ -414,7 +414,7 @@ static struct resource mityomapl138_nandflash_resource[] = {
 
 static struct platform_device mityomapl138_nandflash_device = {
 	.name		= "davinci_nand",
-	.id		= 0,
+	.id		= 1,
 	.dev		= {
 		.platform_data	= &mityomapl138_nandflash_data,
 	},
diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 0a95be1512b..41669ecc1f9 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return ret;
 
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,
-			dev_driver_string(cpufreq.dev),
-			"transition: %u --> %u\n", freqs.old, freqs.new);
+	dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
 	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
 						freqs.new, relation, &idx);
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 625d4b66718..58a02dc7b15 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -39,7 +39,8 @@
 #define DA8XX_GPIO_BASE			0x01e26000
 #define DA8XX_I2C1_BASE			0x01e28000
 #define DA8XX_SPI0_BASE			0x01c41000
-#define DA8XX_SPI1_BASE			0x01f0e000
+#define DA830_SPI1_BASE			0x01e12000
+#define DA850_SPI1_BASE			0x01f0e000
 
 #define DA8XX_EMAC_CTRL_REG_OFFSET	0x3000
 #define DA8XX_EMAC_MOD_REG_OFFSET	0x2000
@@ -762,8 +763,8 @@ static struct resource da8xx_spi0_resources[] = {
 
 static struct resource da8xx_spi1_resources[] = {
 	[0] = {
-		.start	= DA8XX_SPI1_BASE,
-		.end	= DA8XX_SPI1_BASE + SZ_4K - 1,
+		.start	= DA830_SPI1_BASE,
+		.end	= DA830_SPI1_BASE + SZ_4K - 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -832,5 +833,10 @@ int __init da8xx_register_spi(int instance, struct spi_board_info *info,
 
 	da8xx_spi_pdata[instance].num_chipselect = len;
 
+	if (instance == 1 && cpu_is_davinci_da850()) {
+		da8xx_spi1_resources[0].start = DA850_SPI1_BASE;
+		da8xx_spi1_resources[0].end = DA850_SPI1_BASE + SZ_4K - 1;
+	}
+
 	return platform_device_register(&da8xx_spi_device[instance]);
 }
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index f6801223964..a3a94e9c937 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -314,7 +314,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk timer3_clk = {
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 5f8a6542418..4c82c271629 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -274,7 +274,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk_lookup dm644x_clks[] = {
diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S
index 9f1befc5ac3..f8b7ea4f623 100644
--- a/arch/arm/mach-davinci/include/mach/debug-macro.S
+++ b/arch/arm/mach-davinci/include/mach/debug-macro.S
@@ -24,6 +24,9 @@
 
 #define UART_SHIFT	2
 
+#define davinci_uart_v2p(x)	((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET)
+#define davinci_uart_p2v(x)	((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET)
+
 		.pushsection .data
 davinci_uart_phys:	.word	0
 davinci_uart_virt:	.word	0
@@ -34,7 +37,7 @@ davinci_uart_virt:	.word	0
 		/* Use davinci_uart_phys/virt if already configured */
 10:		mrc	p15, 0, \rp, c1, c0
 		tst	\rp, #1			@ MMU enabled?
-		ldreq	\rp, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rp, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rp, =davinci_uart_phys
 		add	\rv, \rp, #4		@ davinci_uart_virt
 		ldr	\rp, [\rp, #0]
@@ -48,18 +51,18 @@ davinci_uart_virt:	.word	0
 		tst	\rp, #1			@ MMU enabled?
 
 		/* Copy uart phys address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rv, =davinci_uart_phys
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #0]
 		str	\rp, [\rv]
 
 		/* Copy uart virt address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_virt)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_virt)
 		ldrne	\rv, =davinci_uart_virt
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #4]
 		str	\rp, [\rv]
 
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index 8051110b8ac..c9e6ce185a6 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -22,7 +22,7 @@
  *
  * This area sits just below the page tables (see arch/arm/kernel/head.S).
  */
-#define DAVINCI_UART_INFO	(PHYS_OFFSET + 0x3ff8)
+#define DAVINCI_UART_INFO	(PLAT_PHYS_OFFSET + 0x3ff8)
 
 #define DAVINCI_UART0_BASE	(IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c
index 10d917d9e3a..8755ca8dd48 100644
--- a/arch/arm/mach-exynos4/pm.c
+++ b/arch/arm/mach-exynos4/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -372,7 +373,27 @@ void exynos4_scu_enable(void __iomem *scu_base)
 	flush_cache_all();
 }
 
-static int exynos4_pm_resume(struct sys_device *dev)
+static struct sysdev_driver exynos4_pm_driver = {
+	.add		= exynos4_pm_add,
+};
+
+static __init int exynos4_pm_drvinit(void)
+{
+	unsigned int tmp;
+
+	s3c_pm_init();
+
+	/* All wakeup disable */
+
+	tmp = __raw_readl(S5P_WAKEUP_MASK);
+	tmp |= ((0xFF << 8) | (0x1F << 1));
+	__raw_writel(tmp, S5P_WAKEUP_MASK);
+
+	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+}
+arch_initcall(exynos4_pm_drvinit);
+
+static void exynos4_pm_resume(void)
 {
 	/* For release retention */
 
@@ -394,27 +415,15 @@ static int exynos4_pm_resume(struct sys_device *dev)
 	/* enable L2X0*/
 	writel_relaxed(1, S5P_VA_L2CC + L2X0_CTRL);
 #endif
-
-	return 0;
 }
 
-static struct sysdev_driver exynos4_pm_driver = {
-	.add		= exynos4_pm_add,
+static struct syscore_ops exynos4_pm_syscore_ops = {
 	.resume		= exynos4_pm_resume,
 };
 
-static __init int exynos4_pm_drvinit(void)
+static __init int exynos4_pm_syscore_init(void)
 {
-	unsigned int tmp;
-
-	s3c_pm_init();
-
-	/* All wakeup disable */
-
-	tmp = __raw_readl(S5P_WAKEUP_MASK);
-	tmp |= ((0xFF << 8) | (0x1F << 1));
-	__raw_writel(tmp, S5P_WAKEUP_MASK);
-
-	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+	register_syscore_ops(&exynos4_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(exynos4_pm_drvinit);
+arch_initcall(exynos4_pm_syscore_init);
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index bdd257921cf..46adca068f2 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,6 +4,7 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 441c6ce0d55..7020f1a3fec 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -10,53 +10,16 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/spinlock.h>
 #include <linux/timex.h>
 
 #include <asm/irq.h>
-
+#include <asm/i8253.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
 
-#define PIT_MODE	0x43
-#define PIT_CH0		0x40
-
-#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
-
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	static int old_count;
-	static u32 old_jifs;
-	int count;
-	u32 jifs;
-
-	raw_local_irq_save(flags);
-
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);		/* latch the count */
-	count = inb_p(PIT_CH0);		/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_local_irq_restore(flags);
-
-	count = (PIT_LATCH - 1) - count;
-
-	return (cycle_t)(jifs * PIT_LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-};
+DEFINE_RAW_SPINLOCK(i8253_lock);
 
 static void pit_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
@@ -121,7 +84,7 @@ static void __init isa_timer_init(void)
 	pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce);
 	pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce);
 
-	clocksource_register_hz(&pit_cs, PIT_TICK_RATE);
+	clocksource_i8253_init();
 
 	setup_irq(pit_ce.irq, &pit_timer_irq);
 	clockevents_register_device(&pit_ce);
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 980803ff348..d3e96451529 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -24,7 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/clocksource.h>
@@ -180,13 +180,13 @@ static void __init ap_init_irq(void)
 #ifdef CONFIG_PM
 static unsigned long ic_irq_enable;
 
-static int irq_suspend(struct sys_device *dev, pm_message_t state)
+static int irq_suspend(void)
 {
 	ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE);
 	return 0;
 }
 
-static int irq_resume(struct sys_device *dev)
+static void irq_resume(void)
 {
 	/* disable all irq sources */
 	writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR);
@@ -194,33 +194,25 @@ static int irq_resume(struct sys_device *dev)
 	writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR);
 
 	writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET);
-	return 0;
 }
 #else
 #define irq_suspend NULL
 #define irq_resume NULL
 #endif
 
-static struct sysdev_class irq_class = {
-	.name		= "irq",
+static struct syscore_ops irq_syscore_ops = {
 	.suspend	= irq_suspend,
 	.resume		= irq_resume,
 };
 
-static struct sys_device irq_device = {
-	.id	= 0,
-	.cls	= &irq_class,
-};
-
-static int __init irq_init_sysfs(void)
+static int __init irq_syscore_init(void)
 {
-	int ret = sysdev_class_register(&irq_class);
-	if (ret == 0)
-		ret = sysdev_register(&irq_device);
-	return ret;
+	register_syscore_ops(&irq_syscore_ops);
+
+	return 0;
 }
 
-device_initcall(irq_init_sysfs);
+device_initcall(irq_syscore_init);
 
 /*
  * Flash handling.
diff --git a/arch/arm/mach-mx3/mach-vpr200.c b/arch/arm/mach-mx3/mach-vpr200.c
index 2cf390fbd98..47a69cbc31a 100644
--- a/arch/arm/mach-mx3/mach-vpr200.c
+++ b/arch/arm/mach-mx3/mach-vpr200.c
@@ -257,11 +257,16 @@ static const struct fsl_usb2_platform_data otg_device_pdata __initconst = {
 	.workaround	= FLS_USB2_WORKAROUND_ENGCM09152,
 };
 
+static int vpr200_usbh_init(struct platform_device *pdev)
+{
+	return mx35_initialize_usb_hw(pdev->id,
+			MXC_EHCI_INTERFACE_SINGLE_UNI | MXC_EHCI_INTERNAL_PHY);
+}
+
 /* USB HOST config */
 static const struct mxc_usbh_platform_data usb_host_pdata __initconst = {
-	.portsc		= MXC_EHCI_MODE_SERIAL,
-	.flags		= MXC_EHCI_INTERFACE_SINGLE_UNI |
-			  MXC_EHCI_INTERNAL_PHY,
+	.init = vpr200_usbh_init,
+	.portsc = MXC_EHCI_MODE_SERIAL,
 };
 
 static struct platform_device *devices[] __initdata = {
diff --git a/arch/arm/mach-mx5/board-mx53_loco.c b/arch/arm/mach-mx5/board-mx53_loco.c
index 10a1bea1054..6206b1191fe 100644
--- a/arch/arm/mach-mx5/board-mx53_loco.c
+++ b/arch/arm/mach-mx5/board-mx53_loco.c
@@ -193,7 +193,7 @@ static iomux_v3_cfg_t mx53_loco_pads[] = {
 	.wakeup		= wake,					\
 }
 
-static const struct gpio_keys_button loco_buttons[] __initconst = {
+static struct gpio_keys_button loco_buttons[] = {
 	GPIO_BUTTON(MX53_LOCO_POWER, KEY_POWER, 1, "power", 0),
 	GPIO_BUTTON(MX53_LOCO_UI1, KEY_VOLUMEUP, 1, "volume-up", 0),
 	GPIO_BUTTON(MX53_LOCO_UI2, KEY_VOLUMEDOWN, 1, "volume-down", 0),
diff --git a/arch/arm/mach-mxs/clock-mx28.c b/arch/arm/mach-mxs/clock-mx28.c
index 1ad97fed1e9..5dcc59d5b9e 100644
--- a/arch/arm/mach-mxs/clock-mx28.c
+++ b/arch/arm/mach-mxs/clock-mx28.c
@@ -295,11 +295,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 	unsigned long diff, parent_rate, calc_rate;			\
 	int i;								\
 									\
-	parent_rate = clk_get_rate(clk->parent);			\
 	div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV;	\
 	bm_busy = BM_CLKCTRL_##dr##_BUSY;				\
 									\
 	if (clk->parent == &ref_xtal_clk) {				\
+		parent_rate = clk_get_rate(clk->parent);		\
 		div = DIV_ROUND_UP(parent_rate, rate);			\
 		if (clk == &cpu_clk) {					\
 			div_max = BM_CLKCTRL_CPU_DIV_XTAL >>		\
@@ -309,6 +309,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 		if (div == 0 || div > div_max)				\
 			return -EINVAL;					\
 	} else {							\
+		/*							\
+		 * hack alert: this block modifies clk->parent, too,	\
+		 * so the base to use it the grand parent.		\
+		 */							\
+		parent_rate = clk_get_rate(clk->parent->parent);	\
 		rate >>= PARENT_RATE_SHIFT;				\
 		parent_rate >>= PARENT_RATE_SHIFT;			\
 		diff = parent_rate;					\
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 6588c22b8a6..fe31d933f0e 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -24,75 +24,50 @@
 #ifdef CONFIG_PM_RUNTIME
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-	int ret = 0;
+	int ret;
 
 	dev_dbg(dev, "%s\n", __func__);
 
 	ret = pm_generic_runtime_suspend(dev);
+	if (ret)
+		return ret;
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_disable(fclk);
-		clk_put(fclk);
-	}
-
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_disable(iclk);
-		clk_put(iclk);
+	ret = pm_runtime_clk_suspend(dev);
+	if (ret) {
+		pm_generic_runtime_resume(dev);
+		return ret;
 	}
 
 	return 0;
-};
+}
 
 static int omap1_pm_runtime_resume(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-
 	dev_dbg(dev, "%s\n", __func__);
 
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_enable(iclk);
-		clk_put(iclk);
-	}
+	pm_runtime_clk_resume(dev);
+	return pm_generic_runtime_resume(dev);
+}
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_enable(fclk);
-		clk_put(fclk);
-	}
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = omap1_pm_runtime_suspend,
+		.runtime_resume = omap1_pm_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
 
-	return pm_generic_runtime_resume(dev);
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = &default_power_domain,
+	.con_ids = { "ick", "fck", NULL, },
 };
 
 static int __init omap1_pm_runtime_init(void)
 {
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
 	if (!cpu_class_is_omap1())
 		return -ENODEV;
 
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap1_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap1_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a45cd640968..66dfbccacd2 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -59,16 +59,16 @@ endif
 # Power Management
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_ARCH_OMAP2)		+= pm24xx.o
-obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o pm_bus.o
+obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= pm34xx.o sleep34xx.o \
-					   cpuidle34xx.o pm_bus.o
-obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o pm_bus.o
+					   cpuidle34xx.o
+obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o
 obj-$(CONFIG_PM_DEBUG)			+= pm-debug.o
 obj-$(CONFIG_OMAP_SMARTREFLEX)          += sr_device.o smartreflex.o
 obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3)	+= smartreflex-class3.o
 
 AFLAGS_sleep24xx.o			:=-Wa,-march=armv6
-AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a
+AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a$(plus_sec)
 
 ifeq ($(CONFIG_PM_VERBOSE),y)
 CFLAGS_pm_bus.o				+= -DDEBUG
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index e964895b80e..f8ba20a14e6 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -141,14 +141,19 @@ static void __init rx51_init(void)
 static void __init rx51_map_io(void)
 {
 	omap2_set_globals_3xxx();
-	rx51_video_mem_init();
 	omap34xx_map_common_io();
 }
 
+static void __init rx51_reserve(void)
+{
+	rx51_video_mem_init();
+	omap_reserve();
+}
+
 MACHINE_START(NOKIA_RX51, "Nokia RX-51 board")
 	/* Maintainer: Lauri Leukkunen <lauri.leukkunen@nokia.com> */
 	.boot_params	= 0x80000100,
-	.reserve	= omap_reserve,
+	.reserve	= rx51_reserve,
 	.map_io		= rx51_map_io,
 	.init_early	= rx51_init_early,
 	.init_irq	= omap_init_irq,
diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
index b2b1e37bb6b..d6e34dd9e7e 100644
--- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
+++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
@@ -115,6 +115,7 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 				  sdrc_cs0->rfr_ctrl, sdrc_cs0->actim_ctrla,
 				  sdrc_cs0->actim_ctrlb, sdrc_cs0->mr,
 				  0, 0, 0, 0);
+	clk->rate = rate;
 
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index 276992d3b7f..8c965671b4d 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3116,14 +3116,9 @@ static struct omap_clk omap44xx_clks[] = {
 	CLK(NULL,	"dsp_fck",			&dsp_fck,	CK_443X),
 	CLK("omapdss_dss",	"sys_clk",			&dss_sys_clk,	CK_443X),
 	CLK("omapdss_dss",	"tv_clk",			&dss_tv_clk,	CK_443X),
-	CLK("omapdss_dss",	"dss_clk",			&dss_dss_clk,	CK_443X),
 	CLK("omapdss_dss",	"video_clk",			&dss_48mhz_clk,	CK_443X),
-	CLK("omapdss_dss",	"fck",				&dss_fck,	CK_443X),
-	/*
-	 * On OMAP4, DSS ick is a dummy clock; this is needed for compatibility
-	 * with OMAP2/3.
-	 */
-	CLK("omapdss_dss",	"ick",				&dummy_ck,	CK_443X),
+	CLK("omapdss_dss",	"fck",				&dss_dss_clk,	CK_443X),
+	CLK("omapdss_dss",	"ick",				&dss_fck,	CK_443X),
 	CLK(NULL,	"efuse_ctrl_cust_fck",		&efuse_ctrl_cust_fck,	CK_443X),
 	CLK(NULL,	"emif1_fck",			&emif1_fck,	CK_443X),
 	CLK(NULL,	"emif2_fck",			&emif2_fck,	CK_443X),
diff --git a/arch/arm/mach-omap2/cm2xxx_3xxx.c b/arch/arm/mach-omap2/cm2xxx_3xxx.c
index 9d0dec806e9..38830d8d478 100644
--- a/arch/arm/mach-omap2/cm2xxx_3xxx.c
+++ b/arch/arm/mach-omap2/cm2xxx_3xxx.c
@@ -247,6 +247,7 @@ struct omap3_cm_regs {
 	u32 per_cm_clksel;
 	u32 emu_cm_clksel;
 	u32 emu_cm_clkstctrl;
+	u32 pll_cm_autoidle;
 	u32 pll_cm_autoidle2;
 	u32 pll_cm_clksel4;
 	u32 pll_cm_clksel5;
@@ -319,6 +320,15 @@ void omap3_cm_save_context(void)
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, CM_CLKSEL1);
 	cm_context.emu_cm_clkstctrl =
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, even though this register has been saved in
+	 * scratchpad contents, we need to restore AUTO_PERIPH_DPLL
+	 * by ourselves. So, we need to save it anyway.
+	 */
+	cm_context.pll_cm_autoidle =
+		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE);
 	cm_context.pll_cm_autoidle2 =
 		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE2);
 	cm_context.pll_cm_clksel4 =
@@ -441,6 +451,13 @@ void omap3_cm_restore_context(void)
 			       CM_CLKSEL1);
 	omap2_cm_write_mod_reg(cm_context.emu_cm_clkstctrl, OMAP3430_EMU_MOD,
 			       OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, we need to restore AUTO_PERIPH_DPLL by ourselves.
+	 */
+	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle, PLL_MOD,
+			       CM_AUTOIDLE);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle2, PLL_MOD,
 			       CM_AUTOIDLE2);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_clksel4, PLL_MOD,
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index 69527941902..da53ba3917c 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -316,8 +316,14 @@ void omap3_save_scratchpad_contents(void)
 			omap2_cm_read_mod_reg(WKUP_MOD, CM_CLKSEL);
 	prcm_block_contents.cm_clken_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, CM_CLKEN);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL..AUTO_PERIPH_DPLL == 1.
+	 * Then,  in anycase, clear these bits to avoid extra latencies.
+	 */
 	prcm_block_contents.cm_autoidle_pll =
-			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_AUTOIDLE_PLL);
+			omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE) &
+			~OMAP3430_AUTO_PERIPH_DPLL_MASK;
 	prcm_block_contents.cm_clksel1_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_CLKSEL1_PLL);
 	prcm_block_contents.cm_clksel2_pll =
diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index 8eb3ce1bbfb..c4d0ae87d62 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -1639,6 +1639,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1669,6 +1670,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1699,6 +1701,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1729,6 +1732,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
@@ -1782,7 +1786,7 @@ static struct omap_hwmod_irq_info omap2420_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2420_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index e6e3810db77..9682dd519f8 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -1742,6 +1742,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1772,6 +1773,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1802,6 +1804,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1832,6 +1835,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
@@ -1862,6 +1866,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio5_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio5_irqs),
 	.main_clk	= "gpio5_fck",
@@ -1915,7 +1920,7 @@ static struct omap_hwmod_irq_info omap2430_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2430_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index b98e2dfcba2..909a84de668 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2141,6 +2141,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio1_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio1_irqs),
 	.main_clk	= "gpio1_ick",
@@ -2177,6 +2178,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio2_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio2_irqs),
 	.main_clk	= "gpio2_ick",
@@ -2213,6 +2215,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio3_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio3_irqs),
 	.main_clk	= "gpio3_ick",
@@ -2249,6 +2252,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio4_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio4_irqs),
 	.main_clk	= "gpio4_ick",
@@ -2285,6 +2289,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio5_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio5_irqs),
 	.main_clk	= "gpio5_ick",
@@ -2321,6 +2326,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio6_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio6_hwmod = {
 	.name		= "gpio6",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio6_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio6_irqs),
 	.main_clk	= "gpio6_ick",
@@ -2386,7 +2392,7 @@ static struct omap_hwmod_irq_info omap3xxx_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap3xxx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 3e88dd3f8ef..abc548a0c98 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -885,7 +885,7 @@ static struct omap_hwmod_ocp_if *omap44xx_dma_system_masters[] = {
 static struct omap_hwmod_addr_space omap44xx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x4a056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x4a056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c
index 5f2da7565b6..4321e793892 100644
--- a/arch/arm/mach-omap2/omap_l3_smx.c
+++ b/arch/arm/mach-omap2/omap_l3_smx.c
@@ -196,11 +196,11 @@ static irqreturn_t omap3_l3_app_irq(int irq, void *_l3)
 		/* No timeout error for debug sources */
 	}
 
-	base = ((l3->rt) + (*(omap3_l3_bases[int_type] + err_source)));
-
 	/* identify the error source */
 	for (err_source = 0; !(status & (1 << err_source)); err_source++)
 									;
+
+	base = l3->rt + *(omap3_l3_bases[int_type] + err_source);
 	error = omap3_l3_readll(base, L3_ERROR_LOG);
 
 	if (error) {
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 30af3351c2d..49486f522dc 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -89,6 +89,7 @@ static void omap2_init_processor_devices(void)
 	if (cpu_is_omap44xx()) {
 		_init_omap_device("l3_main_1", &l3_dev);
 		_init_omap_device("dsp", &dsp_dev);
+		_init_omap_device("iva", &iva_dev);
 	} else {
 		_init_omap_device("l3_main", &l3_dev);
 	}
diff --git a/arch/arm/mach-omap2/pm_bus.c b/arch/arm/mach-omap2/pm_bus.c
deleted file mode 100644
index 5acd2ab298b..00000000000
--- a/arch/arm/mach-omap2/pm_bus.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Runtime PM support code for OMAP
- *
- * Author: Kevin Hilman, Deep Root Systems, LLC
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/pm_runtime.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-
-#include <plat/omap_device.h>
-#include <plat/omap-pm.h>
-
-#ifdef CONFIG_PM_RUNTIME
-static int omap_pm_runtime_suspend(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r, ret = 0;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	ret = pm_generic_runtime_suspend(dev);
-
-	if (!ret && dev->parent == &omap_device_parent) {
-		r = omap_device_idle(pdev);
-		WARN_ON(r);
-	}
-
-	return ret;
-};
-
-static int omap_pm_runtime_resume(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	if (dev->parent == &omap_device_parent) {
-		r = omap_device_enable(pdev);
-		WARN_ON(r);
-	}
-
-	return pm_generic_runtime_resume(dev);
-};
-#else
-#define omap_pm_runtime_suspend NULL
-#define omap_pm_runtime_resume NULL
-#endif /* CONFIG_PM_RUNTIME */
-
-static int __init omap_pm_runtime_init(void)
-{
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
-
-	return 0;
-}
-core_initcall(omap_pm_runtime_init);
diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c
index 6fb520999b6..0c1552d9d99 100644
--- a/arch/arm/mach-omap2/voltage.c
+++ b/arch/arm/mach-omap2/voltage.c
@@ -114,7 +114,6 @@ static int __init _config_common_vdd_data(struct omap_vdd_info *vdd)
 	sys_clk_speed /= 1000;
 
 	/* Generic voltage parameters */
-	vdd->curr_volt = 1200000;
 	vdd->volt_scale = vp_forceupdate_scale_voltage;
 	vdd->vp_enabled = false;
 
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index bfbecec6d05..810a982a66f 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -15,7 +15,6 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/clock-pxa2xx.c b/arch/arm/mach-pxa/clock-pxa2xx.c
index 1ce09044849..1d5859d9a0e 100644
--- a/arch/arm/mach-pxa/clock-pxa2xx.c
+++ b/arch/arm/mach-pxa/clock-pxa2xx.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa2xx-regs.h>
 
@@ -33,32 +33,22 @@ const struct clkops clk_pxa2xx_cken_ops = {
 #ifdef CONFIG_PM
 static uint32_t saved_cken;
 
-static int pxa2xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_clock_suspend(void)
 {
 	saved_cken = CKEN;
 	return 0;
 }
 
-static int pxa2xx_clock_resume(struct sys_device *d)
+static void pxa2xx_clock_resume(void)
 {
 	CKEN = saved_cken;
-	return 0;
 }
 #else
 #define pxa2xx_clock_suspend	NULL
 #define pxa2xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_clock_sysclass = {
-	.name		= "pxa2xx-clock",
+struct syscore_ops pxa2xx_clock_syscore_ops = {
 	.suspend	= pxa2xx_clock_suspend,
 	.resume		= pxa2xx_clock_resume,
 };
-
-static int __init pxa2xx_clock_init(void)
-{
-	if (cpu_is_pxa2xx())
-		return sysdev_class_register(&pxa2xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa2xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock-pxa3xx.c b/arch/arm/mach-pxa/clock-pxa3xx.c
index 3f864cd0bd2..2a37a9a8f62 100644
--- a/arch/arm/mach-pxa/clock-pxa3xx.c
+++ b/arch/arm/mach-pxa/clock-pxa3xx.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/smemc.h>
 #include <mach/pxa3xx-regs.h>
@@ -182,7 +183,7 @@ const struct clkops clk_pxa3xx_pout_ops = {
 static uint32_t cken[2];
 static uint32_t accr;
 
-static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_clock_suspend(void)
 {
 	cken[0] = CKENA;
 	cken[1] = CKENB;
@@ -190,28 +191,18 @@ static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_clock_resume(struct sys_device *d)
+static void pxa3xx_clock_resume(void)
 {
 	ACCR = accr;
 	CKENA = cken[0];
 	CKENB = cken[1];
-	return 0;
 }
 #else
 #define pxa3xx_clock_suspend	NULL
 #define pxa3xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_clock_sysclass = {
-	.name		= "pxa3xx-clock",
+struct syscore_ops pxa3xx_clock_syscore_ops = {
 	.suspend	= pxa3xx_clock_suspend,
 	.resume		= pxa3xx_clock_resume,
 };
-
-static int __init pxa3xx_clock_init(void)
-{
-	if (cpu_is_pxa3xx() || cpu_is_pxa95x())
-		return sysdev_class_register(&pxa3xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa3xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index f9f349a21b5..1f2fb9c43f0 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -1,5 +1,5 @@
 #include <linux/clkdev.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
@@ -54,7 +54,7 @@ extern const struct clkops clk_pxa2xx_cken_ops;
 void clk_pxa2xx_cken_enable(struct clk *clk);
 void clk_pxa2xx_cken_disable(struct clk *clk);
 
-extern struct sysdev_class pxa2xx_clock_sysclass;
+extern struct syscore_ops pxa2xx_clock_syscore_ops;
 
 #if defined(CONFIG_PXA3xx) || defined(CONFIG_PXA95x)
 #define DEFINE_PXA3_CKEN(_name, _cken, _rate, _delay)	\
@@ -74,5 +74,6 @@ extern const struct clkops clk_pxa3xx_smemc_ops;
 extern void clk_pxa3xx_cken_enable(struct clk *);
 extern void clk_pxa3xx_cken_disable(struct clk *);
 
-extern struct sysdev_class pxa3xx_clock_sysclass;
+extern struct syscore_ops pxa3xx_clock_syscore_ops;
+
 #endif
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index b88d601a809..13518a70539 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index 8225e2e58c6..a1099678247 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 
@@ -388,7 +388,7 @@ static inline void cmx2xx_init_display(void) {}
 #ifdef CONFIG_PM
 static unsigned long sleep_save_msc[10];
 
-static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
+static int cmx2xx_suspend(void)
 {
 	cmx2xx_pci_suspend();
 
@@ -412,7 +412,7 @@ static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int cmx2xx_resume(struct sys_device *dev)
+static void cmx2xx_resume(void)
 {
 	cmx2xx_pci_resume();
 
@@ -420,27 +420,18 @@ static int cmx2xx_resume(struct sys_device *dev)
 	__raw_writel(sleep_save_msc[0], MSC0);
 	__raw_writel(sleep_save_msc[1], MSC1);
 	__raw_writel(sleep_save_msc[2], MSC2);
-
-	return 0;
 }
 
-static struct sysdev_class cmx2xx_pm_sysclass = {
-	.name = "pm",
+static struct syscore_ops cmx2xx_pm_syscore_ops = {
 	.resume = cmx2xx_resume,
 	.suspend = cmx2xx_suspend,
 };
 
-static struct sys_device cmx2xx_pm_device = {
-	.cls = &cmx2xx_pm_sysclass,
-};
-
 static int __init cmx2xx_pm_init(void)
 {
-	int error;
-	error = sysdev_class_register(&cmx2xx_pm_sysclass);
-	if (error == 0)
-		error = sysdev_register(&cmx2xx_pm_device);
-	return error;
+	register_syscore_ops(&cmx2xx_pm_syscore_ops);
+
+	return 0;
 }
 #else
 static int __init cmx2xx_pm_init(void) { return 0; }
diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c
index 81c3c433e2d..d28e802e244 100644
--- a/arch/arm/mach-pxa/colibri-evalboard.c
+++ b/arch/arm/mach-pxa/colibri-evalboard.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c
index 44c1b77ece6..80538b8806e 100644
--- a/arch/arm/mach-pxa/colibri-pxa270-income.c
+++ b/arch/arm/mach-pxa/colibri-pxa270-income.c
@@ -22,7 +22,6 @@
 #include <linux/platform_device.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c/pxa-i2c.h>
-#include <linux/sysdev.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index 6fc5d328ba7..7545a48ed88 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -17,7 +17,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/ucb1400.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index a079d8baa45..e6c9344a95a 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -61,10 +61,10 @@ extern unsigned pxa3xx_get_clk_frequency_khz(int);
 #define pxa3xx_get_clk_frequency_khz(x)		(0)
 #endif
 
-extern struct sysdev_class pxa_irq_sysclass;
-extern struct sysdev_class pxa_gpio_sysclass;
-extern struct sysdev_class pxa2xx_mfp_sysclass;
-extern struct sysdev_class pxa3xx_mfp_sysclass;
+extern struct syscore_ops pxa_irq_syscore_ops;
+extern struct syscore_ops pxa_gpio_syscore_ops;
+extern struct syscore_ops pxa2xx_mfp_syscore_ops;
+extern struct syscore_ops pxa3xx_mfp_syscore_ops;
 
 void __init pxa_set_ffuart_info(void *info);
 void __init pxa_set_btuart_info(void *info);
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 6de0ad0eea6..9cdcca59792 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -711,7 +711,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT|REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 6251e3f5c62..32ed551bf9c 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 
@@ -183,7 +183,7 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn)
 static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
 static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
 
-static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_irq_suspend(void)
 {
 	int i;
 
@@ -202,7 +202,7 @@ static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_irq_resume(struct sys_device *dev)
+static void pxa_irq_resume(void)
 {
 	int i;
 
@@ -218,22 +218,13 @@ static int pxa_irq_resume(struct sys_device *dev)
 			__raw_writel(saved_ipr[i], IRQ_BASE + IPR(i));
 
 	__raw_writel(1, IRQ_BASE + ICCR);
-	return 0;
 }
 #else
 #define pxa_irq_suspend		NULL
 #define pxa_irq_resume		NULL
 #endif
 
-struct sysdev_class pxa_irq_sysclass = {
-	.name		= "irq",
+struct syscore_ops pxa_irq_syscore_ops = {
 	.suspend	= pxa_irq_suspend,
 	.resume		= pxa_irq_resume,
 };
-
-static int __init pxa_irq_init(void)
-{
-	return sysdev_class_register(&pxa_irq_sysclass);
-}
-
-core_initcall(pxa_irq_init);
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index f5de541725b..6cf8180bf5b 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -159,30 +159,22 @@ static void __init lpd270_init_irq(void)
 
 
 #ifdef CONFIG_PM
-static int lpd270_irq_resume(struct sys_device *dev)
+static void lpd270_irq_resume(void)
 {
 	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
-	return 0;
 }
 
-static struct sysdev_class lpd270_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lpd270_irq_syscore_ops = {
 	.resume = lpd270_irq_resume,
 };
 
-static struct sys_device lpd270_irq_device = {
-	.cls = &lpd270_irq_sysclass,
-};
-
 static int __init lpd270_irq_device_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_logicpd_pxa270()) {
-		ret = sysdev_class_register(&lpd270_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lpd270_irq_device);
+		register_syscore_ops(&lpd270_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lpd270_irq_device_init);
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 3ede978c83d..e10ddb82714 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/major.h>
 #include <linux/fb.h>
 #include <linux/interrupt.h>
@@ -176,31 +176,22 @@ static void __init lubbock_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int lubbock_irq_resume(struct sys_device *dev)
+static void lubbock_irq_resume(void)
 {
 	LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class lubbock_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lubbock_irq_syscore_ops = {
 	.resume = lubbock_irq_resume,
 };
 
-static struct sys_device lubbock_irq_device = {
-	.cls = &lubbock_irq_sysclass,
-};
-
 static int __init lubbock_irq_device_init(void)
 {
-	int ret = -ENODEV;
-
 	if (machine_is_lubbock()) {
-		ret = sysdev_class_register(&lubbock_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lubbock_irq_device);
+		register_syscore_ops(&lubbock_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lubbock_irq_device_init);
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index a72993dde2b..9984ef70bd7 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -599,7 +599,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT | REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 95163baca29..3479e2b3b51 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -185,31 +185,21 @@ static void __init mainstone_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int mainstone_irq_resume(struct sys_device *dev)
+static void mainstone_irq_resume(void)
 {
 	MST_INTMSKENA = mainstone_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class mainstone_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops mainstone_irq_syscore_ops = {
 	.resume = mainstone_irq_resume,
 };
 
-static struct sys_device mainstone_irq_device = {
-	.cls = &mainstone_irq_sysclass,
-};
-
 static int __init mainstone_irq_device_init(void)
 {
-	int ret = -ENODEV;
+	if (machine_is_mainstone())
+		register_syscore_ops(&mainstone_irq_syscore_ops);
 
-	if (machine_is_mainstone()) {
-		ret = sysdev_class_register(&mainstone_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&mainstone_irq_device);
-	}
-	return ret;
+	return 0;
 }
 
 device_initcall(mainstone_irq_device_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 1d1419b7345..87ae3129f4f 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/gpio.h>
 #include <mach/pxa2xx-regs.h>
@@ -338,7 +338,7 @@ static unsigned long saved_gafr[2][4];
 static unsigned long saved_gpdr[4];
 static unsigned long saved_pgsr[4];
 
-static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_mfp_suspend(void)
 {
 	int i;
 
@@ -365,7 +365,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa2xx_mfp_resume(struct sys_device *d)
+static void pxa2xx_mfp_resume(void)
 {
 	int i;
 
@@ -376,15 +376,13 @@ static int pxa2xx_mfp_resume(struct sys_device *d)
 		PGSR(i) = saved_pgsr[i];
 	}
 	PSSR = PSSR_RDH | PSSR_PH;
-	return 0;
 }
 #else
 #define pxa2xx_mfp_suspend	NULL
 #define pxa2xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa2xx_mfp_syscore_ops = {
 	.suspend	= pxa2xx_mfp_suspend,
 	.resume		= pxa2xx_mfp_resume,
 };
@@ -409,6 +407,6 @@ static int __init pxa2xx_mfp_init(void)
 	for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++)
 		gpdr_lpm[i] = GPDR(i * 32);
 
-	return sysdev_class_register(&pxa2xx_mfp_sysclass);
+	return 0;
 }
 postcore_initcall(pxa2xx_mfp_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa3xx.c b/arch/arm/mach-pxa/mfp-pxa3xx.c
index 7a270eecd48..89863a01ecd 100644
--- a/arch/arm/mach-pxa/mfp-pxa3xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa3xx.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/mfp-pxa3xx.h>
@@ -31,13 +31,13 @@
  * a pull-down mode if they're an active low chip select, and we're
  * just entering standby.
  */
-static int pxa3xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_mfp_suspend(void)
 {
 	mfp_config_lpm();
 	return 0;
 }
 
-static int pxa3xx_mfp_resume(struct sys_device *d)
+static void pxa3xx_mfp_resume(void)
 {
 	mfp_config_run();
 
@@ -47,24 +47,13 @@ static int pxa3xx_mfp_resume(struct sys_device *d)
 	 * preserve them here in case they will be referenced later
 	 */
 	ASCR &= ~(ASCR_RDH | ASCR_D1S | ASCR_D2S | ASCR_D3S);
-	return 0;
 }
 #else
 #define pxa3xx_mfp_suspend	NULL
 #define pxa3xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa3xx_mfp_syscore_ops = {
 	.suspend	= pxa3xx_mfp_suspend,
-	.resume 	= pxa3xx_mfp_resume,
+	.resume		= pxa3xx_mfp_resume,
 };
-
-static int __init mfp_init_devicefs(void)
-{
-	if (cpu_is_pxa3xx())
-		return sysdev_class_register(&pxa3xx_mfp_sysclass);
-
-	return 0;
-}
-postcore_initcall(mfp_init_devicefs);
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 23925db8ff7..e3470137c93 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/input.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
@@ -488,7 +488,7 @@ static void install_bootstrap(void)
 }
 
 
-static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+static int mioa701_sys_suspend(void)
 {
 	int i = 0, is_bt_on;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -514,7 +514,7 @@ static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
 	return 0;
 }
 
-static int mioa701_sys_resume(struct sys_device *sysdev)
+static void mioa701_sys_resume(void)
 {
 	int i = 0;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -527,43 +527,18 @@ static int mioa701_sys_resume(struct sys_device *sysdev)
 	*mem_resume_enabler = save_buffer[i++];
 	*mem_resume_bt	    = save_buffer[i++];
 	*mem_resume_unknown = save_buffer[i++];
-
-	return 0;
 }
 
-static struct sysdev_class mioa701_sysclass = {
-	.name = "mioa701",
-};
-
-static struct sys_device sysdev_bootstrap = {
-	.cls		= &mioa701_sysclass,
-};
-
-static struct sysdev_driver driver_bootstrap = {
-	.suspend	= &mioa701_sys_suspend,
-	.resume		= &mioa701_sys_resume,
+static struct syscore_ops mioa701_syscore_ops = {
+	.suspend	= mioa701_sys_suspend,
+	.resume		= mioa701_sys_resume,
 };
 
 static int __init bootstrap_init(void)
 {
-	int rc;
 	int save_size = mioa701_bootstrap_lg + (sizeof(u32) * 3);
 
-	rc = sysdev_class_register(&mioa701_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&sysdev_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys device\n");
-		return -ENODEV;
-	}
-	rc = sysdev_driver_register(&mioa701_sysclass, &driver_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys driver\n");
-		return -ENODEV;
-	}
+	register_syscore_ops(&mioa701_syscore_ops);
 
 	save_buffer = kmalloc(save_size, GFP_KERNEL);
 	if (!save_buffer)
@@ -576,9 +551,7 @@ static int __init bootstrap_init(void)
 static void bootstrap_exit(void)
 {
 	kfree(save_buffer);
-	sysdev_driver_unregister(&mioa701_sysclass, &driver_bootstrap);
-	sysdev_unregister(&sysdev_bootstrap);
-	sysdev_class_unregister(&mioa701_sysclass);
+	unregister_syscore_ops(&mioa701_syscore_ops);
 
 	printk(KERN_CRIT "Unregistering mioa701 suspend will hang next"
 	       "resume !!!\n");
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index a6f898cbfac..4061ecddee7 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -24,7 +24,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index 8aadad55fbe..20d1b18b173 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -25,7 +25,6 @@
 #include <linux/pwm_backlight.h>
 #include <linux/gpio.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/w1-gpio.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 3b8a4f37dbb..65f24f0b77e 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/gpio_keys.h>
@@ -233,9 +233,9 @@ static struct palmz72_resume_info palmz72_resume_info = {
 
 static unsigned long store_ptr;
 
-/* sys_device for Palm Zire 72 PM */
+/* syscore_ops for Palm Zire 72 PM */
 
-static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
+static int palmz72_pm_suspend(void)
 {
 	/* setup the resume_info struct for the original bootloader */
 	palmz72_resume_info.resume_addr = (u32) cpu_resume;
@@ -249,31 +249,23 @@ static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
 	return 0;
 }
 
-static int palmz72_pm_resume(struct sys_device *dev)
+static void palmz72_pm_resume(void)
 {
 	*PALMZ72_SAVE_DWORD = store_ptr;
-	return 0;
 }
 
-static struct sysdev_class palmz72_pm_sysclass = {
-	.name = "palmz72_pm",
+static struct syscore_ops palmz72_pm_syscore_ops = {
 	.suspend = palmz72_pm_suspend,
 	.resume = palmz72_pm_resume,
 };
 
-static struct sys_device palmz72_pm_device = {
-	.cls = &palmz72_pm_sysclass,
-};
-
 static int __init palmz72_pm_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_palmz72()) {
-		ret = sysdev_class_register(&palmz72_pm_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&palmz72_pm_device);
+		register_syscore_ops(&palmz72_pm_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(palmz72_pm_init);
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index a4af8c52d7e..fed363cec9c 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 
 #include <asm/mach/map.h>
@@ -350,21 +350,9 @@ static struct platform_device *pxa25x_devices[] __initdata = {
 	&pxa_device_asoc_platform,
 };
 
-static struct sys_device pxa25x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa25x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa25x()) {
 
@@ -377,11 +365,10 @@ static int __init pxa25x_init(void)
 
 		pxa25x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa25x_sysdev); i++) {
-			ret = sysdev_register(&pxa25x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(pxa25x_devices,
 					   ARRAY_SIZE(pxa25x_devices));
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 909756eaf4b..2fecbec58d8 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/i2c/pxa-i2c.h>
@@ -428,21 +428,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa27x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa27x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa27x()) {
 
@@ -455,11 +443,10 @@ static int __init pxa27x_init(void)
 
 		pxa27x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa27x_sysdev); i++) {
-			ret = sysdev_register(&pxa27x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 8dd10739115..8521d7d6f1d 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -20,7 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
@@ -427,21 +427,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa3xx_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa3xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa3xx_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa3xx()) {
 
@@ -462,11 +450,10 @@ static int __init pxa3xx_init(void)
 
 		pxa3xx_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa3xx_sysdev); i++) {
-			ret = sysdev_register(&pxa3xx_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa3xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c
index 23b229bd06e..ecc82a330fa 100644
--- a/arch/arm/mach-pxa/pxa95x.c
+++ b/arch/arm/mach-pxa/pxa95x.c
@@ -18,7 +18,7 @@
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/gpio.h>
@@ -260,16 +260,6 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa95x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa95x_init(void)
 {
 	int ret = 0, i;
@@ -293,11 +283,9 @@ static int __init pxa95x_init(void)
 		if ((ret = pxa_init_dma(IRQ_DMA, 32)))
 			return ret;
 
-		for (i = 0; i < ARRAY_SIZE(pxa95x_sysdev); i++) {
-			ret = sysdev_register(&pxa95x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index cd1861351f7..d130f77b6d1 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -18,7 +18,6 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c
index 232b7316ec0..79923058d10 100644
--- a/arch/arm/mach-pxa/smemc.c
+++ b/arch/arm/mach-pxa/smemc.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/smemc.h>
@@ -16,7 +16,7 @@ static unsigned long msc[2];
 static unsigned long sxcnfg, memclkcfg;
 static unsigned long csadrcfg[4];
 
-static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa3xx_smemc_suspend(void)
 {
 	msc[0] = __raw_readl(MSC0);
 	msc[1] = __raw_readl(MSC1);
@@ -30,7 +30,7 @@ static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_smemc_resume(struct sys_device *dev)
+static void pxa3xx_smemc_resume(void)
 {
 	__raw_writel(msc[0], MSC0);
 	__raw_writel(msc[1], MSC1);
@@ -40,34 +40,19 @@ static int pxa3xx_smemc_resume(struct sys_device *dev)
 	__raw_writel(csadrcfg[1], CSADRCFG1);
 	__raw_writel(csadrcfg[2], CSADRCFG2);
 	__raw_writel(csadrcfg[3], CSADRCFG3);
-
-	return 0;
 }
 
-static struct sysdev_class smemc_sysclass = {
-	.name		= "smemc",
+static struct syscore_ops smemc_syscore_ops = {
 	.suspend	= pxa3xx_smemc_suspend,
 	.resume		= pxa3xx_smemc_resume,
 };
 
-static struct sys_device smemc_sysdev = {
-	.id		= 0,
-	.cls		= &smemc_sysclass,
-};
-
 static int __init smemc_init(void)
 {
-	int ret = 0;
+	if (cpu_is_pxa3xx())
+		register_syscore_ops(&smemc_syscore_ops);
 
-	if (cpu_is_pxa3xx()) {
-		ret = sysdev_class_register(&smemc_sysclass);
-		if (ret)
-			return ret;
-
-		ret = sysdev_register(&smemc_sysdev);
-	}
-
-	return ret;
+	return 0;
 }
 subsys_initcall(smemc_init);
 #endif
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index b9cfbebdfe9..687417a9369 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index b523f119e0f..903218eab56 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -44,6 +44,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa25x.h>
 #include <mach/audio.h>
@@ -130,20 +131,19 @@ static u8 viper_hw_version(void)
 	return v1;
 }
 
-/* CPU sysdev */
-static int viper_cpu_suspend(struct sys_device *sysdev, pm_message_t state)
+/* CPU system core operations. */
+static int viper_cpu_suspend(void)
 {
 	viper_icr_set_bit(VIPER_ICR_R_DIS);
 	return 0;
 }
 
-static int viper_cpu_resume(struct sys_device *sysdev)
+static void viper_cpu_resume(void)
 {
 	viper_icr_clear_bit(VIPER_ICR_R_DIS);
-	return 0;
 }
 
-static struct sysdev_driver viper_cpu_sysdev_driver = {
+static struct syscore_ops viper_cpu_syscore_ops = {
 	.suspend	= viper_cpu_suspend,
 	.resume		= viper_cpu_resume,
 };
@@ -945,7 +945,7 @@ static void __init viper_init(void)
 	viper_init_vcore_gpios();
 	viper_init_cpufreq();
 
-	sysdev_driver_register(&cpu_sysdev_class, &viper_cpu_sysdev_driver);
+	register_syscore_ops(&viper_cpu_syscore_ops);
 
 	if (version) {
 		pr_info("viper: hardware v%di%d detected. "
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index f71d377c864..67bd41488bf 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -16,7 +16,6 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/mach-realview/include/mach/barriers.h b/arch/arm/mach-realview/include/mach/barriers.h
index 0c5d749d7b5..9a732195aa1 100644
--- a/arch/arm/mach-realview/include/mach/barriers.h
+++ b/arch/arm/mach-realview/include/mach/barriers.h
@@ -4,5 +4,5 @@
  * operation to deadlock the system.
  */
 #define mb()		dsb()
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
diff --git a/arch/arm/mach-s3c2410/irq.c b/arch/arm/mach-s3c2410/irq.c
index 5e2f3533205..2854129f8cc 100644
--- a/arch/arm/mach-s3c2410/irq.c
+++ b/arch/arm/mach-s3c2410/irq.c
@@ -23,38 +23,12 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <plat/cpu.h>
 #include <plat/pm.h>
 
-static int s3c2410_irq_add(struct sys_device *sysdev)
-{
-	return 0;
-}
-
-static struct sysdev_driver s3c2410_irq_driver = {
-	.add		= s3c2410_irq_add,
+struct syscore_ops s3c24xx_irq_syscore_ops = {
 	.suspend	= s3c24xx_irq_suspend,
 	.resume		= s3c24xx_irq_resume,
 };
-
-static int __init s3c2410_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410_sysclass, &s3c2410_irq_driver);
-}
-
-arch_initcall(s3c2410_irq_init);
-
-static struct sysdev_driver s3c2410a_irq_driver = {
-	.add		= s3c2410_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
-};
-
-static int __init s3c2410a_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410a_sysclass, &s3c2410a_irq_driver);
-}
-
-arch_initcall(s3c2410a_irq_init);
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 2970ea9f7c2..1e2d536adda 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/dm9000.h>
@@ -214,17 +214,16 @@ static struct s3c2410_uartcfg bast_uartcfgs[] __initdata = {
 /* NAND Flash on BAST board */
 
 #ifdef CONFIG_PM
-static int bast_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int bast_pm_suspend(void)
 {
 	/* ensure that an nRESET is not generated on resume. */
 	gpio_direction_output(S3C2410_GPA(21), 1);
 	return 0;
 }
 
-static int bast_pm_resume(struct sys_device *sd)
+static void bast_pm_resume(void)
 {
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-	return 0;
 }
 
 #else
@@ -232,16 +231,11 @@ static int bast_pm_resume(struct sys_device *sd)
 #define bast_pm_resume NULL
 #endif
 
-static struct sysdev_class bast_pm_sysclass = {
-	.name		= "mach-bast",
+static struct syscore_ops bast_pm_syscore_ops = {
 	.suspend	= bast_pm_suspend,
 	.resume		= bast_pm_resume,
 };
 
-static struct sys_device bast_pm_sysdev = {
-	.cls		= &bast_pm_sysclass,
-};
-
 static int smartmedia_map[] = { 0 };
 static int chip0_map[] = { 1 };
 static int chip1_map[] = { 2 };
@@ -642,8 +636,7 @@ static void __init bast_map_io(void)
 
 static void __init bast_init(void)
 {
-	sysdev_class_register(&bast_pm_sysclass);
-	sysdev_register(&bast_pm_sysdev);
+	register_syscore_ops(&bast_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(&bast_i2c_info);
 	s3c_nand_set_platdata(&bast_nand_info);
diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c
index 725636fc4dc..4728f9aa7df 100644
--- a/arch/arm/mach-s3c2410/pm.c
+++ b/arch/arm/mach-s3c2410/pm.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
 
@@ -92,7 +93,7 @@ static void s3c2410_pm_prepare(void)
 	}
 }
 
-static int s3c2410_pm_resume(struct sys_device *dev)
+static void s3c2410_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -104,10 +105,12 @@ static int s3c2410_pm_resume(struct sys_device *dev)
 
 	if ( machine_is_aml_m5900() )
 		s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
-
-	return 0;
 }
 
+struct syscore_ops s3c2410_pm_syscore_ops = {
+	.resume		= s3c2410_pm_resume,
+};
+
 static int s3c2410_pm_add(struct sys_device *dev)
 {
 	pm_cpu_prep = s3c2410_pm_prepare;
@@ -119,7 +122,6 @@ static int s3c2410_pm_add(struct sys_device *dev)
 #if defined(CONFIG_CPU_S3C2410)
 static struct sysdev_driver s3c2410_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 /* register ourselves */
@@ -133,7 +135,6 @@ arch_initcall(s3c2410_pm_drvinit);
 
 static struct sysdev_driver s3c2410a_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2410a_pm_drvinit(void)
@@ -147,7 +148,6 @@ arch_initcall(s3c2410a_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2440)
 static struct sysdev_driver s3c2440_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2440_pm_drvinit(void)
@@ -161,7 +161,6 @@ arch_initcall(s3c2440_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2442)
 static struct sysdev_driver s3c2442_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2442_pm_drvinit(void)
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index adc90a3c589..f1d3bd8f6f1 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -40,6 +41,7 @@
 #include <plat/devs.h>
 #include <plat/clock.h>
 #include <plat/pll.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -168,6 +170,9 @@ int __init s3c2410_init(void)
 {
 	printk("S3C2410: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2410_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2412/irq.c b/arch/arm/mach-s3c2412/irq.c
index f3355d2ec63..1a1aa220972 100644
--- a/arch/arm/mach-s3c2412/irq.c
+++ b/arch/arm/mach-s3c2412/irq.c
@@ -202,8 +202,6 @@ static int s3c2412_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2412_irq_driver = {
 	.add		= s3c2412_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2412_irq_init(void)
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 923e01bdf01..85dcaeb9e62 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
@@ -486,7 +486,7 @@ static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = {
 /* Jive power management device */
 
 #ifdef CONFIG_PM
-static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int jive_pm_suspend(void)
 {
 	/* Write the magic value u-boot uses to check for resume into
 	 * the INFORM0 register, and ensure INFORM1 is set to the
@@ -498,10 +498,9 @@ static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int jive_pm_resume(struct sys_device *sd)
+static void jive_pm_resume(void)
 {
 	__raw_writel(0x0, S3C2412_INFORM0);
-	return 0;
 }
 
 #else
@@ -509,16 +508,11 @@ static int jive_pm_resume(struct sys_device *sd)
 #define jive_pm_resume NULL
 #endif
 
-static struct sysdev_class jive_pm_sysclass = {
-	.name		= "jive-pm",
+static struct syscore_ops jive_pm_syscore_ops = {
 	.suspend	= jive_pm_suspend,
 	.resume		= jive_pm_resume,
 };
 
-static struct sys_device jive_pm_sysdev = {
-	.cls		= &jive_pm_sysclass,
-};
-
 static void __init jive_map_io(void)
 {
 	s3c24xx_init_io(jive_iodesc, ARRAY_SIZE(jive_iodesc));
@@ -536,10 +530,9 @@ static void jive_power_off(void)
 
 static void __init jive_machine_init(void)
 {
-	/* register system devices for managing low level suspend */
+	/* register system core operations for managing low level suspend */
 
-	sysdev_class_register(&jive_pm_sysclass);
-	sysdev_register(&jive_pm_sysdev);
+	register_syscore_ops(&jive_pm_syscore_ops);
 
 	/* write our sleep configurations for the IO. Pull down all unused
 	 * IO, ensure that we have turned off all peripherals we do not
diff --git a/arch/arm/mach-s3c2412/pm.c b/arch/arm/mach-s3c2412/pm.c
index a7417c479ff..752b13a7b3d 100644
--- a/arch/arm/mach-s3c2412/pm.c
+++ b/arch/arm/mach-s3c2412/pm.c
@@ -17,6 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
@@ -86,13 +87,24 @@ static struct sleep_save s3c2412_sleep[] = {
 	SAVE_ITEM(S3C2413_GPJSLPCON),
 };
 
-static int s3c2412_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2412_pm_driver = {
+	.add		= s3c2412_pm_add,
+};
+
+static __init int s3c2412_pm_init(void)
+{
+	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
+}
+
+arch_initcall(s3c2412_pm_init);
+
+static int s3c2412_pm_suspend(void)
 {
 	s3c_pm_do_save(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
 	return 0;
 }
 
-static int s3c2412_pm_resume(struct sys_device *dev)
+static void s3c2412_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -102,18 +114,9 @@ static int s3c2412_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp, S3C2412_PWRCFG);
 
 	s3c_pm_do_restore(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
-	return 0;
 }
 
-static struct sysdev_driver s3c2412_pm_driver = {
-	.add		= s3c2412_pm_add,
+struct syscore_ops s3c2412_pm_syscore_ops = {
 	.suspend	= s3c2412_pm_suspend,
 	.resume		= s3c2412_pm_resume,
 };
-
-static __init int s3c2412_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
-}
-
-arch_initcall(s3c2412_pm_init);
diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c
index 4c6df51ddf3..ef0958d3e5c 100644
--- a/arch/arm/mach-s3c2412/s3c2412.c
+++ b/arch/arm/mach-s3c2412/s3c2412.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -244,5 +245,8 @@ int __init s3c2412_init(void)
 {
 	printk("S3C2412: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2412_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2412_sysdev);
 }
diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 77b38f2381c..28ad20d4244 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -236,8 +236,6 @@ static int __init s3c2416_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2416_irq_driver = {
 	.add		= s3c2416_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int __init s3c2416_irq_init(void)
diff --git a/arch/arm/mach-s3c2416/pm.c b/arch/arm/mach-s3c2416/pm.c
index 4a04205b04d..41db2b21e21 100644
--- a/arch/arm/mach-s3c2416/pm.c
+++ b/arch/arm/mach-s3c2416/pm.c
@@ -11,6 +11,7 @@
 */
 
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -55,30 +56,26 @@ static int s3c2416_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s3c2416_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2416_pm_driver = {
+	.add		= s3c2416_pm_add,
+};
+
+static __init int s3c2416_pm_init(void)
 {
-	return 0;
+	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
 }
 
-static int s3c2416_pm_resume(struct sys_device *dev)
+arch_initcall(s3c2416_pm_init);
+
+
+static void s3c2416_pm_resume(void)
 {
 	/* unset the return-from-sleep amd inform flags */
 	__raw_writel(0x0, S3C2443_PWRMODE);
 	__raw_writel(0x0, S3C2412_INFORM0);
 	__raw_writel(0x0, S3C2412_INFORM1);
-
-	return 0;
 }
 
-static struct sysdev_driver s3c2416_pm_driver = {
-	.add		= s3c2416_pm_add,
-	.suspend	= s3c2416_pm_suspend,
+struct syscore_ops s3c2416_pm_syscore_ops = {
 	.resume		= s3c2416_pm_resume,
 };
-
-static __init int s3c2416_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
-}
-
-arch_initcall(s3c2416_pm_init);
diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c
index ba7fd873743..494ce913dc9 100644
--- a/arch/arm/mach-s3c2416/s3c2416.c
+++ b/arch/arm/mach-s3c2416/s3c2416.c
@@ -32,6 +32,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -54,6 +55,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/sdhci.h>
+#include <plat/pm.h>
 
 #include <plat/iic-core.h>
 #include <plat/fb-core.h>
@@ -95,6 +97,9 @@ int __init s3c2416_init(void)
 
 	s3c_fb_setname("s3c2443-fb");
 
+	register_syscore_ops(&s3c2416_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2416_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index 14dc6789775..d8853639331 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/gpio.h>
 #include <linux/device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/clk.h>
 #include <linux/i2c.h>
@@ -284,7 +284,7 @@ static struct platform_device osiris_pcmcia = {
 #ifdef CONFIG_PM
 static unsigned char pm_osiris_ctrl0;
 
-static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int osiris_pm_suspend(void)
 {
 	unsigned int tmp;
 
@@ -304,7 +304,7 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int osiris_pm_resume(struct sys_device *sd)
+static void osiris_pm_resume(void)
 {
 	if (pm_osiris_ctrl0 & OSIRIS_CTRL0_FIX8)
 		__raw_writeb(OSIRIS_CTRL1_FIX8, OSIRIS_VA_CTRL1);
@@ -312,8 +312,6 @@ static int osiris_pm_resume(struct sys_device *sd)
 	__raw_writeb(pm_osiris_ctrl0, OSIRIS_VA_CTRL0);
 
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-
-	return 0;
 }
 
 #else
@@ -321,16 +319,11 @@ static int osiris_pm_resume(struct sys_device *sd)
 #define osiris_pm_resume NULL
 #endif
 
-static struct sysdev_class osiris_pm_sysclass = {
-	.name		= "mach-osiris",
+static struct syscore_ops osiris_pm_syscore_ops = {
 	.suspend	= osiris_pm_suspend,
 	.resume		= osiris_pm_resume,
 };
 
-static struct sys_device osiris_pm_sysdev = {
-	.cls		= &osiris_pm_sysclass,
-};
-
 /* Link for DVS driver to TPS65011 */
 
 static void osiris_tps_release(struct device *dev)
@@ -439,8 +432,7 @@ static void __init osiris_map_io(void)
 
 static void __init osiris_init(void)
 {
-	sysdev_class_register(&osiris_pm_sysclass);
-	sysdev_register(&osiris_pm_sysdev);
+	register_syscore_ops(&osiris_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(NULL);
 	s3c_nand_set_platdata(&osiris_nand_info);
diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c
index f7663f731ea..ce99ff72838 100644
--- a/arch/arm/mach-s3c2440/s3c2440.c
+++ b/arch/arm/mach-s3c2440/s3c2440.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -33,6 +34,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -51,6 +53,12 @@ int __init s3c2440_init(void)
 	s3c_device_wdt.resource[1].start = IRQ_S3C2440_WDT;
 	s3c_device_wdt.resource[1].end   = IRQ_S3C2440_WDT;
 
+	/* register suspend/resume handlers */
+
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	/* register our system device for everything else */
 
 	return sysdev_register(&s3c2440_sysdev);
diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c
index ecf81354655..6224bad4d60 100644
--- a/arch/arm/mach-s3c2440/s3c2442.c
+++ b/arch/arm/mach-s3c2440/s3c2442.c
@@ -29,6 +29,7 @@
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/mutex.h>
@@ -45,6 +46,7 @@
 #include <plat/clock.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -167,6 +169,10 @@ int __init s3c2442_init(void)
 {
 	printk("S3C2442: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2442_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/s3c244x-irq.c b/arch/arm/mach-s3c2440/s3c244x-irq.c
index de07c2feaa3..c63e8f26d90 100644
--- a/arch/arm/mach-s3c2440/s3c244x-irq.c
+++ b/arch/arm/mach-s3c2440/s3c244x-irq.c
@@ -116,8 +116,6 @@ static int s3c244x_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2440_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2440_irq_init(void)
@@ -129,8 +127,6 @@ arch_initcall(s3c2440_irq_init);
 
 static struct sysdev_driver s3c2442_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 
diff --git a/arch/arm/mach-s3c2440/s3c244x.c b/arch/arm/mach-s3c2440/s3c244x.c
index 90c1707b9c9..7e8a23d2098 100644
--- a/arch/arm/mach-s3c2440/s3c244x.c
+++ b/arch/arm/mach-s3c2440/s3c244x.c
@@ -19,6 +19,7 @@
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -134,45 +135,14 @@ void __init s3c244x_init_clocks(int xtal)
 	s3c2410_baseclk_add();
 }
 
-#ifdef CONFIG_PM
-
-static struct sleep_save s3c244x_sleep[] = {
-	SAVE_ITEM(S3C2440_DSC0),
-	SAVE_ITEM(S3C2440_DSC1),
-	SAVE_ITEM(S3C2440_GPJDAT),
-	SAVE_ITEM(S3C2440_GPJCON),
-	SAVE_ITEM(S3C2440_GPJUP)
-};
-
-static int s3c244x_suspend(struct sys_device *dev, pm_message_t state)
-{
-	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-static int s3c244x_resume(struct sys_device *dev)
-{
-	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-#else
-#define s3c244x_suspend NULL
-#define s3c244x_resume  NULL
-#endif
-
 /* Since the S3C2442 and S3C2440 share  items, put both sysclasses here */
 
 struct sysdev_class s3c2440_sysclass = {
 	.name		= "s3c2440-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 struct sysdev_class s3c2442_sysclass = {
 	.name		= "s3c2442-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 /* need to register class before we actually register the device, and
@@ -194,3 +164,33 @@ static int __init s3c2442_core_init(void)
 }
 
 core_initcall(s3c2442_core_init);
+
+
+#ifdef CONFIG_PM
+static struct sleep_save s3c244x_sleep[] = {
+	SAVE_ITEM(S3C2440_DSC0),
+	SAVE_ITEM(S3C2440_DSC1),
+	SAVE_ITEM(S3C2440_GPJDAT),
+	SAVE_ITEM(S3C2440_GPJCON),
+	SAVE_ITEM(S3C2440_GPJUP)
+};
+
+static int s3c244x_suspend(void)
+{
+	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+	return 0;
+}
+
+static void s3c244x_resume(void)
+{
+	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+}
+#else
+#define s3c244x_suspend NULL
+#define s3c244x_resume  NULL
+#endif
+
+struct syscore_ops s3c244x_pm_syscore_ops = {
+	.suspend	= s3c244x_suspend,
+	.resume		= s3c244x_resume,
+};
diff --git a/arch/arm/mach-s3c64xx/irq-pm.c b/arch/arm/mach-s3c64xx/irq-pm.c
index da1bec64b9d..8bec61e242c 100644
--- a/arch/arm/mach-s3c64xx/irq-pm.c
+++ b/arch/arm/mach-s3c64xx/irq-pm.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/serial_core.h>
 #include <linux/irq.h>
@@ -54,7 +54,7 @@ static struct irq_grp_save {
 
 static u32 irq_uart_mask[CONFIG_SERIAL_SAMSUNG_UARTS];
 
-static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int s3c64xx_irq_pm_suspend(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -75,7 +75,7 @@ static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int s3c64xx_irq_pm_resume(struct sys_device *dev)
+static void s3c64xx_irq_pm_resume(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -94,18 +94,18 @@ static int s3c64xx_irq_pm_resume(struct sys_device *dev)
 	}
 
 	S3C_PMDBG("%s: IRQ configuration restored\n", __func__);
-	return 0;
 }
 
-static struct sysdev_driver s3c64xx_irq_driver = {
+struct syscore_ops s3c64xx_irq_syscore_ops = {
 	.suspend = s3c64xx_irq_pm_suspend,
 	.resume	 = s3c64xx_irq_pm_resume,
 };
 
-static int __init s3c64xx_irq_pm_init(void)
+static __init int s3c64xx_syscore_init(void)
 {
-	return sysdev_driver_register(&s3c64xx_sysclass, &s3c64xx_irq_driver);
-}
+	register_syscore_ops(&s3c64xx_irq_syscore_ops);
 
-arch_initcall(s3c64xx_irq_pm_init);
+	return 0;
+}
 
+core_initcall(s3c64xx_syscore_init);
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 549d7924fd4..24febae3d4c 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <plat/cpu.h>
@@ -140,7 +141,17 @@ static int s5pv210_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s5pv210_pm_resume(struct sys_device *dev)
+static struct sysdev_driver s5pv210_pm_driver = {
+	.add		= s5pv210_pm_add,
+};
+
+static __init int s5pv210_pm_drvinit(void)
+{
+	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+}
+arch_initcall(s5pv210_pm_drvinit);
+
+static void s5pv210_pm_resume(void)
 {
 	u32 tmp;
 
@@ -150,17 +161,15 @@ static int s5pv210_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp , S5P_OTHERS);
 
 	s3c_pm_do_restore_core(s5pv210_core_save, ARRAY_SIZE(s5pv210_core_save));
-
-	return 0;
 }
 
-static struct sysdev_driver s5pv210_pm_driver = {
-	.add		= s5pv210_pm_add,
+static struct syscore_ops s5pv210_pm_syscore_ops = {
 	.resume		= s5pv210_pm_resume,
 };
 
-static __init int s5pv210_pm_drvinit(void)
+static __init int s5pv210_pm_syscore_init(void)
 {
-	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+	register_syscore_ops(&s5pv210_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(s5pv210_pm_drvinit);
+arch_initcall(s5pv210_pm_syscore_init);
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 423ddb3d65e..dfbf824a69f 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/irq.h>
@@ -234,7 +234,7 @@ static struct sa1100irq_state {
 	unsigned int	iccr;
 } sa1100irq_state;
 
-static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
+static int sa1100irq_suspend(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -264,7 +264,7 @@ static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int sa1100irq_resume(struct sys_device *dev)
+static void sa1100irq_resume(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -277,24 +277,17 @@ static int sa1100irq_resume(struct sys_device *dev)
 
 		ICMR = st->icmr;
 	}
-	return 0;
 }
 
-static struct sysdev_class sa1100irq_sysclass = {
-	.name		= "sa11x0-irq",
+static struct syscore_ops sa1100irq_syscore_ops = {
 	.suspend	= sa1100irq_suspend,
 	.resume		= sa1100irq_resume,
 };
 
-static struct sys_device sa1100irq_device = {
-	.id		= 0,
-	.cls		= &sa1100irq_sysclass,
-};
-
 static int __init sa1100irq_init_devicefs(void)
 {
-	sysdev_class_register(&sa1100irq_sysclass);
-	return sysdev_register(&sa1100irq_device);
+	register_syscore_ops(&sa1100irq_syscore_ops);
+	return 0;
 }
 
 device_initcall(sa1100irq_init_devicefs);
diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 94912d3944d..2d1b67a59e4 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -18,152 +18,41 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
-#define BIT_ONCE 0
-#define BIT_ACTIVE 1
-#define BIT_CLK_ENABLED 2
 
-struct pm_runtime_data {
-	unsigned long flags;
-	struct clk *clk;
-};
-
-static void __devres_release(struct device *dev, void *res)
-{
-	struct pm_runtime_data *prd = res;
-
-	dev_dbg(dev, "__devres_release()\n");
-
-	if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-		clk_disable(prd->clk);
-
-	if (test_bit(BIT_ACTIVE, &prd->flags))
-		clk_put(prd->clk);
-}
-
-static struct pm_runtime_data *__to_prd(struct device *dev)
-{
-	return devres_find(dev, __devres_release, NULL, NULL);
-}
-
-static void platform_pm_runtime_init(struct device *dev,
-				     struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) {
-		prd->clk = clk_get(dev, NULL);
-		if (!IS_ERR(prd->clk)) {
-			set_bit(BIT_ACTIVE, &prd->flags);
-			dev_info(dev, "clocks managed by runtime pm\n");
-		}
-	}
-}
-
-static void platform_pm_runtime_bug(struct device *dev,
-				    struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags))
-		dev_err(dev, "runtime pm suspend before resume\n");
-}
-
-int platform_pm_runtime_suspend(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "platform_pm_runtime_suspend()\n");
-
-	platform_pm_runtime_bug(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_disable(prd->clk);
-		clear_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-int platform_pm_runtime_resume(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "platform_pm_runtime_resume()\n");
-
-	platform_pm_runtime_init(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_enable(prd->clk);
-		set_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	/* suspend synchronously to disable clocks immediately */
 	return pm_runtime_suspend(dev);
 }
 
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct pm_runtime_data *prd;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	if (action == BUS_NOTIFY_BIND_DRIVER) {
-		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
-		if (prd)
-			devres_add(dev, prd);
-		else
-			dev_err(dev, "unable to alloc memory for runtime pm\n");
-	}
-
-	return 0;
-}
-
-#else /* CONFIG_PM_RUNTIME */
-
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct clk *clk;
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = pm_runtime_clk_suspend,
+		.runtime_resume = pm_runtime_clk_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
 
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
+#define DEFAULT_PWR_DOMAIN_PTR	(&default_power_domain)
 
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_enable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced on\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_disable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced off\n");
-		}
-		break;
-	}
+#else
 
-	return 0;
-}
+#define DEFAULT_PWR_DOMAIN_PTR	NULL
 
 #endif /* CONFIG_PM_RUNTIME */
 
-static struct notifier_block platform_bus_notifier = {
-	.notifier_call = platform_bus_notify
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = DEFAULT_PWR_DOMAIN_PTR,
+	.con_ids = { NULL, },
 };
 
 static int __init sh_pm_runtime_init(void)
 {
-	bus_register_notifier(&platform_bus_type, &platform_bus_notifier);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
 core_initcall(sh_pm_runtime_init);
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 3cdeffc97b4..5ec1846aa1d 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -27,12 +27,14 @@ comment "Tegra board type"
 
 config MACH_HARMONY
        bool "Harmony board"
+       select MACH_HAS_SND_SOC_TEGRA_WM8903
        help
          Support for nVidia Harmony development platform
 
 config MACH_KAEN
        bool "Kaen board"
        select MACH_SEABOARD
+       select MACH_HAS_SND_SOC_TEGRA_WM8903
        help
          Support for the Kaen version of Seaboard
 
@@ -43,6 +45,7 @@ config MACH_PAZ00
 
 config MACH_SEABOARD
        bool "Seaboard board"
+       select MACH_HAS_SND_SOC_TEGRA_WM8903
        help
          Support for nVidia Seaboard development platform. It will
 	 also be included for some of the derivative boards that
diff --git a/arch/arm/mach-tegra/board-harmony.c b/arch/arm/mach-tegra/board-harmony.c
index 75c918a86a3..30e18bc6064 100644
--- a/arch/arm/mach-tegra/board-harmony.c
+++ b/arch/arm/mach-tegra/board-harmony.c
@@ -34,7 +34,7 @@
 #include <asm/mach/time.h>
 #include <asm/setup.h>
 
-#include <mach/harmony_audio.h>
+#include <mach/tegra_wm8903_pdata.h>
 #include <mach/iomap.h>
 #include <mach/irqs.h>
 #include <mach/sdhci.h>
@@ -67,15 +67,16 @@ static struct platform_device debug_uart = {
 	},
 };
 
-static struct harmony_audio_platform_data harmony_audio_pdata = {
+static struct tegra_wm8903_platform_data harmony_audio_pdata = {
 	.gpio_spkr_en		= TEGRA_GPIO_SPKR_EN,
 	.gpio_hp_det		= TEGRA_GPIO_HP_DET,
+	.gpio_hp_mute		= -1,
 	.gpio_int_mic_en	= TEGRA_GPIO_INT_MIC_EN,
 	.gpio_ext_mic_en	= TEGRA_GPIO_EXT_MIC_EN,
 };
 
 static struct platform_device harmony_audio_device = {
-	.name	= "tegra-snd-harmony",
+	.name	= "tegra-snd-wm8903",
 	.id	= 0,
 	.dev	= {
 		.platform_data  = &harmony_audio_pdata,
diff --git a/arch/arm/mach-tegra/include/mach/barriers.h b/arch/arm/mach-tegra/include/mach/barriers.h
index cc115174899..425b42e91ef 100644
--- a/arch/arm/mach-tegra/include/mach/barriers.h
+++ b/arch/arm/mach-tegra/include/mach/barriers.h
@@ -23,7 +23,7 @@
 
 #include <asm/outercache.h>
 
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		do { dsb(); outer_sync(); } while (0)
 #define mb()		wmb()
 
diff --git a/arch/arm/mach-tegra/include/mach/harmony_audio.h b/arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h
index af086500ab7..9d293344a7f 100644
--- a/arch/arm/mach-tegra/include/mach/harmony_audio.h
+++ b/arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-tegra/include/mach/harmony_audio.h
+ * arch/arm/mach-tegra/include/mach/tegra_wm8903_pdata.h
  *
  * Copyright 2011 NVIDIA, Inc.
  *
@@ -14,9 +14,10 @@
  *
  */
 
-struct harmony_audio_platform_data {
+struct tegra_wm8903_platform_data {
 	int gpio_spkr_en;
 	int gpio_hp_det;
+	int gpio_hp_mute;
 	int gpio_int_mic_en;
 	int gpio_ext_mic_en;
 };
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e5f6fc42834..e591513bb53 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -392,7 +392,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * Convert start_pfn/end_pfn to a struct page pointer.
 	 */
 	start_pg = pfn_to_page(start_pfn - 1) + 1;
-	end_pg = pfn_to_page(end_pfn);
+	end_pg = pfn_to_page(end_pfn - 1) + 1;
 
 	/*
 	 * Convert to physical addresses, and
@@ -426,6 +426,14 @@ static void __init free_unused_memmap(struct meminfo *mi)
 
 		bank_start = bank_pfn_start(bank);
 
+#ifdef CONFIG_SPARSEMEM
+		/*
+		 * Take care not to free memmap entries that don't exist
+		 * due to SPARSEMEM sections which aren't present.
+		 */
+		bank_start = min(bank_start,
+				 ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 		/*
 		 * If we had a previous bank, and there is a space
 		 * between the current bank and the previous, free it.
@@ -440,6 +448,12 @@ static void __init free_unused_memmap(struct meminfo *mi)
 		 */
 		prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
 	}
+
+#ifdef CONFIG_SPARSEMEM
+	if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
+		free_memmap(prev_bank_end,
+			    ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 }
 
 static void __init free_highpages(void)
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index ce233bcbf50..42af97664c9 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -395,7 +395,7 @@ ENTRY(xscale_dma_a0_map_area)
 	teq	r2, #DMA_TO_DEVICE
 	beq	xscale_dma_clean_range
 	b	xscale_dma_flush_range
-ENDPROC(xscsale_dma_a0_map_area)
+ENDPROC(xscale_dma_a0_map_area)
 
 /*
  *	dma_unmap_area(start, size, dir)
diff --git a/arch/arm/plat-mxc/gpio.c b/arch/arm/plat-mxc/gpio.c
index 7a107246fd9..6cd6d7f686f 100644
--- a/arch/arm/plat-mxc/gpio.c
+++ b/arch/arm/plat-mxc/gpio.c
@@ -295,6 +295,12 @@ static int mxc_gpio_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different
+ * category than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 {
 	int i, j;
@@ -311,6 +317,7 @@ int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 		__raw_writel(~0, port[i].base + GPIO_ISR);
 		for (j = port[i].virtual_irq_start;
 			j < port[i].virtual_irq_start + 32; j++) {
+			irq_set_lockdep_class(j, &gpio_lock_class);
 			irq_set_chip_and_handler(j, &gpio_irq_chip,
 						 handle_level_irq);
 			set_irq_flags(j, IRQF_VALID);
diff --git a/arch/arm/plat-mxc/ssi-fiq.S b/arch/arm/plat-mxc/ssi-fiq.S
index 4ddce565b35..8397a2dd19f 100644
--- a/arch/arm/plat-mxc/ssi-fiq.S
+++ b/arch/arm/plat-mxc/ssi-fiq.S
@@ -124,6 +124,8 @@ imx_ssi_fiq_start:
 1:
 		@ return from FIQ
 		subs	pc, lr, #4
+
+		.align
 imx_ssi_fiq_base:
 		.word 0x0
 imx_ssi_fiq_rx_buffer:
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index d2adcdda23c..bd9e32187ea 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -1372,9 +1372,7 @@ static const struct dev_pm_ops omap_mpuio_dev_pm_ops = {
 	.resume_noirq = omap_mpuio_resume_noirq,
 };
 
-/* use platform_driver for this, now that there's no longer any
- * point to sys_device (other than not disturbing old code).
- */
+/* use platform_driver for this. */
 static struct platform_driver omap_mpuio_driver = {
 	.driver		= {
 		.name	= "mpuio",
@@ -1745,7 +1743,7 @@ static int __devinit omap_gpio_probe(struct platform_device *pdev)
 }
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
+static int omap_gpio_suspend(void)
 {
 	int i;
 
@@ -1795,12 +1793,12 @@ static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
 	return 0;
 }
 
-static int omap_gpio_resume(struct sys_device *dev)
+static void omap_gpio_resume(void)
 {
 	int i;
 
 	if (!cpu_class_is_omap2() && !cpu_is_omap16xx())
-		return 0;
+		return;
 
 	for (i = 0; i < gpio_bank_count; i++) {
 		struct gpio_bank *bank = &gpio_bank[i];
@@ -1836,21 +1834,13 @@ static int omap_gpio_resume(struct sys_device *dev)
 		__raw_writel(bank->saved_wakeup, wake_set);
 		spin_unlock_irqrestore(&bank->lock, flags);
 	}
-
-	return 0;
 }
 
-static struct sysdev_class omap_gpio_sysclass = {
-	.name		= "gpio",
+static struct syscore_ops omap_gpio_syscore_ops = {
 	.suspend	= omap_gpio_suspend,
 	.resume		= omap_gpio_resume,
 };
 
-static struct sys_device omap_gpio_device = {
-	.id		= 0,
-	.cls		= &omap_gpio_sysclass,
-};
-
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
@@ -2108,21 +2098,14 @@ postcore_initcall(omap_gpio_drv_reg);
 
 static int __init omap_gpio_sysinit(void)
 {
-	int ret = 0;
-
 	mpuio_init();
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
-		if (ret == 0) {
-			ret = sysdev_class_register(&omap_gpio_sysclass);
-			if (ret == 0)
-				ret = sysdev_register(&omap_gpio_device);
-		}
-	}
+	if (cpu_is_omap16xx() || cpu_class_is_omap2())
+		register_syscore_ops(&omap_gpio_syscore_ops);
 #endif
 
-	return ret;
+	return 0;
 }
 
 arch_initcall(omap_gpio_sysinit);
diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
index 8a51fd58f65..34fc31ee908 100644
--- a/arch/arm/plat-omap/iommu.c
+++ b/arch/arm/plat-omap/iommu.c
@@ -793,6 +793,8 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	clk_enable(obj->clk);
 	errs = iommu_report_fault(obj, &da);
 	clk_disable(obj->clk);
+	if (errs == 0)
+		return IRQ_HANDLED;
 
 	/* Fault callback or TLB/PTE Dynamic loading */
 	if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv))
diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index 9bbda9acb73..a37b8eb65b7 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -536,6 +536,28 @@ int omap_early_device_register(struct omap_device *od)
 	return 0;
 }
 
+static int _od_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_idle(pdev);
+}
+
+static int _od_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_enable(pdev);
+}
+
+static struct dev_power_domain omap_device_power_domain = {
+	.ops = {
+		.runtime_suspend = _od_runtime_suspend,
+		.runtime_resume = _od_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	}
+};
+
 /**
  * omap_device_register - register an omap_device with one omap_hwmod
  * @od: struct omap_device * to register
@@ -549,6 +571,7 @@ int omap_device_register(struct omap_device *od)
 	pr_debug("omap_device: %s: registering\n", od->pdev.name);
 
 	od->pdev.dev.parent = &omap_device_parent;
+	od->pdev.dev.pwr_domain = &omap_device_power_domain;
 	return platform_device_register(&od->pdev);
 }
 
diff --git a/arch/arm/plat-pxa/gpio.c b/arch/arm/plat-pxa/gpio.c
index dce088f4567..48ebb9479b6 100644
--- a/arch/arm/plat-pxa/gpio.c
+++ b/arch/arm/plat-pxa/gpio.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 
 #include <mach/gpio.h>
@@ -295,7 +295,7 @@ void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn)
 }
 
 #ifdef CONFIG_PM
-static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_gpio_suspend(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -312,7 +312,7 @@ static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_gpio_resume(struct sys_device *dev)
+static void pxa_gpio_resume(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -326,22 +326,13 @@ static int pxa_gpio_resume(struct sys_device *dev)
 		__raw_writel(c->saved_gfer, c->regbase + GFER_OFFSET);
 		__raw_writel(c->saved_gpdr, c->regbase + GPDR_OFFSET);
 	}
-	return 0;
 }
 #else
 #define pxa_gpio_suspend	NULL
 #define pxa_gpio_resume		NULL
 #endif
 
-struct sysdev_class pxa_gpio_sysclass = {
-	.name		= "gpio",
+struct syscore_ops pxa_gpio_syscore_ops = {
 	.suspend	= pxa_gpio_suspend,
 	.resume		= pxa_gpio_resume,
 };
-
-static int __init pxa_gpio_init(void)
-{
-	return sysdev_class_register(&pxa_gpio_sysclass);
-}
-
-core_initcall(pxa_gpio_init);
diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c
index a9aa5ad3f4e..be12eadcce2 100644
--- a/arch/arm/plat-pxa/mfp.c
+++ b/arch/arm/plat-pxa/mfp.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
 
 #include <plat/mfp.h>
 
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 27ea852e337..c10d10c56e2 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -22,7 +22,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/io.h>
@@ -1195,19 +1195,12 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d
 
 EXPORT_SYMBOL(s3c2410_dma_getposition);
 
-static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev)
-{
-	return container_of(dev, struct s3c2410_dma_chan, dev);
-}
-
-/* system device class */
+/* system core operations */
 
 #ifdef CONFIG_PM
 
-static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
+static void s3c2410_dma_suspend_chan(s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
-
 	printk(KERN_DEBUG "suspending dma channel %d\n", cp->number);
 
 	if (dma_rdreg(cp, S3C2410_DMA_DMASKTRIG) & S3C2410_DMASKTRIG_ON) {
@@ -1222,13 +1215,21 @@ static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
 
 		s3c2410_dma_dostop(cp);
 	}
+}
+
+static int s3c2410_dma_suspend(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans;
+	int channel;
+
+	for (channel = 0; channel < dma_channels; cp++, channel++)
+		s3c2410_dma_suspend_chan(cp);
 
 	return 0;
 }
 
-static int s3c2410_dma_resume(struct sys_device *dev)
+static void s3c2410_dma_resume_chan(struct s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
 	unsigned int no = cp->number | DMACH_LOW_LEVEL;
 
 	/* restore channel's hardware configuration */
@@ -1249,13 +1250,21 @@ static int s3c2410_dma_resume(struct sys_device *dev)
 	return 0;
 }
 
+static void s3c2410_dma_resume(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1;
+	int channel;
+
+	for (channel = dma_channels - 1; channel >= 0; cp++, channel--)
+		s3c2410_dma_resume_chan(cp);
+}
+
 #else
 #define s3c2410_dma_suspend NULL
 #define s3c2410_dma_resume  NULL
 #endif /* CONFIG_PM */
 
-struct sysdev_class dma_sysclass = {
-	.name		= "s3c24xx-dma",
+struct syscore_ops dma_syscore_ops = {
 	.suspend	= s3c2410_dma_suspend,
 	.resume		= s3c2410_dma_resume,
 };
@@ -1269,39 +1278,14 @@ static void s3c2410_dma_cache_ctor(void *p)
 
 /* initialisation code */
 
-static int __init s3c24xx_dma_sysclass_init(void)
+static int __init s3c24xx_dma_syscore_init(void)
 {
-	int ret = sysdev_class_register(&dma_sysclass);
-
-	if (ret != 0)
-		printk(KERN_ERR "dma sysclass registration failed\n");
-
-	return ret;
-}
-
-core_initcall(s3c24xx_dma_sysclass_init);
-
-static int __init s3c24xx_dma_sysdev_register(void)
-{
-	struct s3c2410_dma_chan *cp = s3c2410_chans;
-	int channel, ret;
-
-	for (channel = 0; channel < dma_channels; cp++, channel++) {
-		cp->dev.cls = &dma_sysclass;
-		cp->dev.id  = channel;
-		ret = sysdev_register(&cp->dev);
-
-		if (ret) {
-			printk(KERN_ERR "error registering dev for dma %d\n",
-			       channel);
- 			return ret;
-		}
-	}
+	register_syscore_ops(&dma_syscore_ops);
 
 	return 0;
 }
 
-late_initcall(s3c24xx_dma_sysdev_register);
+late_initcall(s3c24xx_dma_syscore_init);
 
 int __init s3c24xx_dma_init(unsigned int channels, unsigned int irq,
 			    unsigned int stride)
diff --git a/arch/arm/plat-s3c24xx/irq-pm.c b/arch/arm/plat-s3c24xx/irq-pm.c
index c3624d89863..0efb2e2848c 100644
--- a/arch/arm/plat-s3c24xx/irq-pm.c
+++ b/arch/arm/plat-s3c24xx/irq-pm.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 
 #include <plat/cpu.h>
@@ -65,7 +64,7 @@ static unsigned long save_extint[3];
 static unsigned long save_eintflt[4];
 static unsigned long save_eintmask;
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	unsigned int i;
 
@@ -81,7 +80,7 @@ int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	unsigned int i;
 
@@ -93,6 +92,4 @@ int s3c24xx_irq_resume(struct sys_device *dev)
 
 	s3c_pm_do_restore(irq_save, ARRAY_SIZE(irq_save));
 	__raw_writel(save_eintmask, S3C24XX_EINTMASK);
-
-	return 0;
 }
diff --git a/arch/arm/plat-s5p/irq-pm.c b/arch/arm/plat-s5p/irq-pm.c
index 5259ad458bc..327acb3a446 100644
--- a/arch/arm/plat-s5p/irq-pm.c
+++ b/arch/arm/plat-s5p/irq-pm.c
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 
 #include <plat/cpu.h>
 #include <plat/irqs.h>
@@ -77,17 +76,15 @@ static struct sleep_save eint_save[] = {
 	SAVE_ITEM(S5P_EINT_MASK(3)),
 };
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	s3c_pm_do_save(eint_save, ARRAY_SIZE(eint_save));
 
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	s3c_pm_do_restore(eint_save, ARRAY_SIZE(eint_save));
-
-	return 0;
 }
 
diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h
index cedfff51c82..3aedac0034b 100644
--- a/arch/arm/plat-samsung/include/plat/cpu.h
+++ b/arch/arm/plat-samsung/include/plat/cpu.h
@@ -68,6 +68,12 @@ extern void s3c24xx_init_uartdevs(char *name,
 struct sys_timer;
 extern struct sys_timer s3c24xx_timer;
 
+extern struct syscore_ops s3c2410_pm_syscore_ops;
+extern struct syscore_ops s3c2412_pm_syscore_ops;
+extern struct syscore_ops s3c2416_pm_syscore_ops;
+extern struct syscore_ops s3c244x_pm_syscore_ops;
+extern struct syscore_ops s3c64xx_irq_syscore_ops;
+
 /* system device classes */
 
 extern struct sysdev_class s3c2410_sysclass;
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 937cc2ace51..7fb6f6be8c8 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -103,14 +103,16 @@ extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count);
 
 #ifdef CONFIG_PM
 extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
-extern int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state);
-extern int s3c24xx_irq_resume(struct sys_device *dev);
+extern int s3c24xx_irq_suspend(void);
+extern void s3c24xx_irq_resume(void);
 #else
 #define s3c_irqext_wake NULL
 #define s3c24xx_irq_suspend NULL
 #define s3c24xx_irq_resume  NULL
 #endif
 
+extern struct syscore_ops s3c24xx_irq_syscore_ops;
+
 /* PM debug functions */
 
 #ifdef CONFIG_SAMSUNG_PM_DEBUG
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index f74695075e6..f25e7ec8941 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -398,9 +398,9 @@ static void vfp_enable(void *unused)
 }
 
 #ifdef CONFIG_PM
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
-static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int vfp_pm_suspend(void)
 {
 	struct thread_info *ti = current_thread_info();
 	u32 fpexc = fmrx(FPEXC);
@@ -420,34 +420,25 @@ static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int vfp_pm_resume(struct sys_device *dev)
+static void vfp_pm_resume(void)
 {
 	/* ensure we have access to the vfp */
 	vfp_enable(NULL);
 
 	/* and disable it to ensure the next usage restores the state */
 	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
-
-	return 0;
 }
 
-static struct sysdev_class vfp_pm_sysclass = {
-	.name		= "vfp",
+static struct syscore_ops vfp_pm_syscore_ops = {
 	.suspend	= vfp_pm_suspend,
 	.resume		= vfp_pm_resume,
 };
 
-static struct sys_device vfp_pm_sysdev = {
-	.cls	= &vfp_pm_sysclass,
-};
-
 static void vfp_pm_init(void)
 {
-	sysdev_class_register(&vfp_pm_sysclass);
-	sysdev_register(&vfp_pm_sysdev);
+	register_syscore_ops(&vfp_pm_syscore_ops);
 }
 
-
 #else
 static inline void vfp_pm_init(void) { }
 #endif /* CONFIG_PM */
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
index 21ce35f33aa..3e3646186c9 100644
--- a/arch/avr32/mach-at32ap/intc.c
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/io.h>
 
@@ -21,7 +21,6 @@
 struct intc {
 	void __iomem		*regs;
 	struct irq_chip		chip;
-	struct sys_device	sysdev;
 #ifdef CONFIG_PM
 	unsigned long		suspend_ipr;
 	unsigned long		saved_ipr[64];
@@ -146,9 +145,8 @@ void intc_set_suspend_handler(unsigned long offset)
 	intc0.suspend_ipr = offset;
 }
 
-static int intc_suspend(struct sys_device *sdev, pm_message_t state)
+static int intc_suspend(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
 	if (unlikely(!irqs_disabled())) {
@@ -156,28 +154,25 @@ static int intc_suspend(struct sys_device *sdev, pm_message_t state)
 		return -EINVAL;
 	}
 
-	if (unlikely(!intc->suspend_ipr)) {
+	if (unlikely(!intc0.suspend_ipr)) {
 		pr_err("intc_suspend: suspend_ipr not initialized\n");
 		return -EINVAL;
 	}
 
 	for (i = 0; i < 64; i++) {
-		intc->saved_ipr[i] = intc_readl(intc, INTPR0 + 4 * i);
-		intc_writel(intc, INTPR0 + 4 * i, intc->suspend_ipr);
+		intc0.saved_ipr[i] = intc_readl(&intc0, INTPR0 + 4 * i);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.suspend_ipr);
 	}
 
 	return 0;
 }
 
-static int intc_resume(struct sys_device *sdev)
+static int intc_resume(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
-	WARN_ON(!irqs_disabled());
-
 	for (i = 0; i < 64; i++)
-		intc_writel(intc, INTPR0 + 4 * i, intc->saved_ipr[i]);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.saved_ipr[i]);
 
 	return 0;
 }
@@ -186,27 +181,18 @@ static int intc_resume(struct sys_device *sdev)
 #define intc_resume	NULL
 #endif
 
-static struct sysdev_class intc_class = {
-	.name		= "intc",
+static struct syscore_ops intc_syscore_ops = {
 	.suspend	= intc_suspend,
 	.resume		= intc_resume,
 };
 
-static int __init intc_init_sysdev(void)
+static int __init intc_init_syscore(void)
 {
-	int ret;
-
-	ret = sysdev_class_register(&intc_class);
-	if (ret)
-		return ret;
+	register_syscore_ops(&intc_syscore_ops);
 
-	intc0.sysdev.id = 0;
-	intc0.sysdev.cls = &intc_class;
-	ret = sysdev_register(&intc0.sysdev);
-
-	return ret;
+	return 0;
 }
-device_initcall(intc_init_sysdev);
+device_initcall(intc_init_syscore);
 
 unsigned long intc_get_pending(unsigned int group)
 {
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 0b5f72f17fd..401eb1d8e3b 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -12,7 +12,7 @@
 
 #include <linux/bitops.h>
 #include <linux/hardirq.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/pm.h>
 #include <linux/nmi.h>
 #include <linux/smp.h>
@@ -196,43 +196,31 @@ void touch_nmi_watchdog(void)
 
 /* Suspend/resume support */
 #ifdef CONFIG_PM
-static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
+static int nmi_wdt_suspend(void)
 {
 	nmi_wdt_stop();
 	return 0;
 }
 
-static int nmi_wdt_resume(struct sys_device *dev)
+static void nmi_wdt_resume(void)
 {
 	if (nmi_active)
 		nmi_wdt_start();
-	return 0;
 }
 
-static struct sysdev_class nmi_sysclass = {
-	.name		= DRV_NAME,
+static struct syscore_ops nmi_syscore_ops = {
 	.resume		= nmi_wdt_resume,
 	.suspend	= nmi_wdt_suspend,
 };
 
-static struct sys_device device_nmi_wdt = {
-	.id	= 0,
-	.cls	= &nmi_sysclass,
-};
-
-static int __init init_nmi_wdt_sysfs(void)
+static int __init init_nmi_wdt_syscore(void)
 {
-	int error;
-
-	if (!nmi_active)
-		return 0;
+	if (nmi_active)
+		register_syscore_ops(&nmi_syscore_ops);
 
-	error = sysdev_class_register(&nmi_sysclass);
-	if (!error)
-		error = sysdev_register(&device_nmi_wdt);
-	return error;
+	return 0;
 }
-late_initcall(init_nmi_wdt_sysfs);
+late_initcall(init_nmi_wdt_syscore);
 
 #endif	/* CONFIG_PM */
 
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index cdb4beb6bc8..9e9b60d969d 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -23,29 +23,6 @@
 #include <asm/gptimers.h>
 #include <asm/nmi.h>
 
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
 #if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
@@ -63,7 +40,6 @@ static struct clocksource bfin_cs_cycles = {
 	.rating		= 400,
 	.read		= bfin_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -75,10 +51,7 @@ static inline unsigned long long bfin_cs_cycles_sched_clock(void)
 
 static int __init bfin_cs_cycles_init(void)
 {
-	bfin_cs_cycles.mult = \
-		clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift);
-
-	if (clocksource_register(&bfin_cs_cycles))
+	if (clocksource_register_hz(&bfin_cs_cycles, get_cclk()))
 		panic("failed to register clocksource");
 
 	return 0;
@@ -111,7 +84,6 @@ static struct clocksource bfin_cs_gptimer0 = {
 	.rating		= 350,
 	.read		= bfin_read_gptimer0,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -125,10 +97,7 @@ static int __init bfin_cs_gptimer0_init(void)
 {
 	setup_gptimer0();
 
-	bfin_cs_gptimer0.mult = \
-		clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift);
-
-	if (clocksource_register(&bfin_cs_gptimer0))
+	if (clocksource_register_hz(&bfin_cs_gptimer0, get_sclk()))
 		panic("failed to register clocksource");
 
 	return 0;
diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index 382099fd556..5e4112e518a 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c
@@ -19,9 +19,6 @@
 
 #define DRIVER_NAME "bfin dpmc"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg)
-
 struct bfin_dpmc_platform_data *pdata;
 
 /**
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 8bce5ed031e..1fbd94c4445 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -177,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
 		case BFIN_IPI_CALL_FUNC:
 			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 4c9e3e1ba5d..66cc75657e2 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
 
+	if (ipi.vector & IPI_SCHEDULE) {
+		scheduler_ipi();
+	}
 	if (ipi.vector & IPI_CALL) {
-	         func(info);
+		func(info);
 	}
 	if (ipi.vector & IPI_FLUSH_TLB) {
-		     if (flush_mm == FLUSH_ALL)
-			 __flush_tlb_all();
-		     else if (flush_vma == FLUSH_ALL)
+		if (flush_mm == FLUSH_ALL)
+			__flush_tlb_all();
+		else if (flush_vma == FLUSH_ALL)
 			__flush_tlb_mm(flush_mm);
-		     else
+		else
 			__flush_tlb_page(flush_vma, flush_addr);
 	}
 
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index ab3ccab132c..80241fe03f5 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -37,6 +37,7 @@
 #include <linux/crash_dump.h>
 #include <linux/iommu-helper.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 22f61526a8e..f09b174244d 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -23,8 +23,6 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -47,12 +45,12 @@ processor_set_pstate (
 {
 	s64 retval;
 
-	dprintk("processor_set_pstate\n");
+	pr_debug("processor_set_pstate\n");
 
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -67,14 +65,14 @@ processor_get_pstate (
 	u64	pstate_index = 0;
 	s64 	retval;
 
-	dprintk("processor_get_pstate\n");
+	pr_debug("processor_get_pstate\n");
 
 	retval = ia64_pal_get_pstate(&pstate_index,
 	                             PAL_GET_PSTATE_TYPE_INSTANT);
 	*value = (u32) pstate_index;
 
 	if (retval)
-		dprintk("Failed to get current freq with "
+		pr_debug("Failed to get current freq with "
 			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
@@ -90,7 +88,7 @@ extract_clock (
 {
 	unsigned long i;
 
-	dprintk("extract_clock\n");
+	pr_debug("extract_clock\n");
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		if (value == data->acpi_data.states[i].status)
@@ -110,7 +108,7 @@ processor_get_freq (
 	cpumask_t		saved_mask;
 	unsigned long 		clock_freq;
 
-	dprintk("processor_get_freq\n");
+	pr_debug("processor_get_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -148,7 +146,7 @@ processor_set_freq (
 	cpumask_t		saved_mask;
 	int			retval;
 
-	dprintk("processor_set_freq\n");
+	pr_debug("processor_set_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -159,16 +157,16 @@ processor_set_freq (
 
 	if (state == data->acpi_data.state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
+			pr_debug("Called after resume, resetting to P%d\n", state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n", state);
+			pr_debug("Already at target state (P%d)\n", state);
 			retval = 0;
 			goto migrate_end;
 		}
 	}
 
-	dprintk("Transitioning from P%d to P%d\n",
+	pr_debug("Transitioning from P%d to P%d\n",
 		data->acpi_data.state, state);
 
 	/* cpufreq frequency struct */
@@ -186,7 +184,7 @@ processor_set_freq (
 
 	value = (u32) data->acpi_data.states[state].control;
 
-	dprintk("Transitioning to state: 0x%08x\n", value);
+	pr_debug("Transitioning to state: 0x%08x\n", value);
 
 	ret = processor_set_pstate(value);
 	if (ret) {
@@ -219,7 +217,7 @@ acpi_cpufreq_get (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
 
-	dprintk("acpi_cpufreq_get\n");
+	pr_debug("acpi_cpufreq_get\n");
 
 	return processor_get_freq(data, cpu);
 }
@@ -235,7 +233,7 @@ acpi_cpufreq_target (
 	unsigned int next_state = 0;
 	unsigned int result = 0;
 
-	dprintk("acpi_cpufreq_setpolicy\n");
+	pr_debug("acpi_cpufreq_setpolicy\n");
 
 	result = cpufreq_frequency_table_target(policy,
 			data->freq_table, target_freq, relation, &next_state);
@@ -255,7 +253,7 @@ acpi_cpufreq_verify (
 	unsigned int result = 0;
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	result = cpufreq_frequency_table_verify(policy,
 			data->freq_table);
@@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init (
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
 	if (!data)
@@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init (
 
 	/* capability check */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init (
 					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (data->acpi_data.status_register.space_id !=
 					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
+		pr_debug("Unsupported address space [%d, %d]\n",
 			(u32) (data->acpi_data.control_register.space_id),
 			(u32) (data->acpi_data.status_register.space_id));
 		result = -ENODEV;
@@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init (
 	       "activated.\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
 			(i == data->acpi_data.state?'*':' '), i,
 			(u32) data->acpi_data.states[i].core_frequency,
 			(u32) data->acpi_data.states[i].power,
@@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
  	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
@@ -427,7 +425,7 @@ acpi_cpufreq_init (void)
 static void __exit
 acpi_cpufreq_exit (void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 	return;
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 1b811c61bdc..f64097b5118 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = {
         .rating         = 300,
         .read           = read_cyclone,
         .mask           = (1LL << 40) - 1,
-        .mult           = 0, /*to be calculated*/
-        .shift          = 16,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -118,9 +116,7 @@ int __init init_cyclone_clock(void)
 	/* initialize last tick */
 	cyclone_mc = cyclone_timer;
 	clocksource_cyclone.fsys_mmio = cyclone_timer;
-	clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
-						clocksource_cyclone.shift);
-	clocksource_register(&clocksource_cyclone);
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5b704740f16..782c3a357f2 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 #include <linux/irq.h>
 #include <linux/ratelimit.h>
 #include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			smp_local_flush_tlb();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 156ad803d5b..04440cc09b4 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -73,8 +73,6 @@ static struct clocksource clocksource_itc = {
 	.rating         = 350,
 	.read           = itc_get_cycles,
 	.mask           = CLOCKSOURCE_MASK(64),
-	.mult           = 0, /*to be calculated*/
-	.shift          = 16,
 	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 #ifdef CONFIG_PARAVIRT
 	.resume		= paravirt_clocksource_resume,
@@ -365,11 +363,8 @@ ia64_init_itm (void)
 	ia64_cpu_local_tick();
 
 	if (!itc_clocksource) {
-		/* Sort out mult/shift values: */
-		clocksource_itc.mult =
-			clocksource_hz2mult(local_cpu_data->itc_freq,
-						clocksource_itc.shift);
-		clocksource_register(&clocksource_itc);
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
 		itc_clocksource = &clocksource_itc;
 	}
 }
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 787de4a77d8..53c0ba004e9 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -209,6 +209,7 @@ SECTIONS {
 	data : {
 	} :data
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
 		INIT_TASK_DATA(PAGE_SIZE)
 		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
 		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
index f6c5617e16a..b214b5b0432 100644
--- a/arch/ia64/kvm/vti.h
+++ b/arch/ia64/kvm/vti.h
@@ -83,13 +83,13 @@
 union vac {
 	unsigned long value;
 	struct {
-		int a_int:1;
-		int a_from_int_cr:1;
-		int a_to_int_cr:1;
-		int a_from_psr:1;
-		int a_from_cpuid:1;
-		int a_cover:1;
-		int a_bsw:1;
+		unsigned int a_int:1;
+		unsigned int a_from_int_cr:1;
+		unsigned int a_to_int_cr:1;
+		unsigned int a_from_psr:1;
+		unsigned int a_from_cpuid:1;
+		unsigned int a_cover:1;
+		unsigned int a_bsw:1;
 		long reserved:57;
 	};
 };
@@ -97,12 +97,12 @@ union vac {
 union vdc {
 	unsigned long value;
 	struct {
-		int d_vmsw:1;
-		int d_extint:1;
-		int d_ibr_dbr:1;
-		int d_pmc:1;
-		int d_to_pmd:1;
-		int d_itm:1;
+		unsigned int d_vmsw:1;
+		unsigned int d_extint:1;
+		unsigned int d_ibr_dbr:1;
+		unsigned int d_pmc:1;
+		unsigned int d_to_pmd:1;
+		unsigned int d_itm:1;
 		long reserved:58;
 	};
 };
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 0799fea4c58..20b35937612 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/prefetch.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index 5cdd7e4a597..f7b798993ce 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -29,7 +29,7 @@ typedef struct
 	unsigned int depth;
 	struct pt_regs *regs;
 	struct unw_frame_info frame;
-	u64 *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
+	unsigned long *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
 /* Returns non-zero if the PC is in the Interrupt Vector Table */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index 21d6f09e344..c34efda122e 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -33,8 +33,6 @@ static struct clocksource clocksource_sn2 = {
         .rating         = 450,
         .read           = read_sn2,
         .mask           = (1LL << 55) - 1,
-        .mult           = 0,
-        .shift          = 10,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -57,9 +55,7 @@ ia64_sn_udelay (unsigned long usecs)
 void __init sn_timer_init(void)
 {
 	clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR;
-	clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-							clocksource_sn2.shift);
-	clocksource_register(&clocksource_sn2);
+	clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second);
 
 	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index 108bb858acf..b279e142c63 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 static int xen_slab_ready;
 
 #ifdef CONFIG_SMP
+#include <linux/sched.h>
+
 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
  * it ends up to issue several memory accesses upon percpu data and
  * thus adds unnecessary traffic to other paths.
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 static irqreturn_t
 xen_dummy_handler(int irq, void *dev_id)
 {
+	return IRQ_HANDLED;
+}
 
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 };
 
 static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_dummy_handler,
+	.handler =	xen_resched_handler,
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 31cef20b299..fc10b39893d 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
  *
  * Description:  This routine executes on CPU which received
  *               'RESCHEDULE_IPI'.
- *               Rescheduling is processed at the exit of interrupt
- *               operation.
  *
  * Born on Date: 2002.02.05
  *
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
  *==========================================================================*/
 void smp_reschedule_interrupt(void)
 {
-	/* nothing to do */
+	scheduler_ipi();
 }
 
 /*==========================================================================*
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index c194d64cdbb..cf95aec7746 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -44,6 +44,7 @@ SECTIONS
   EXCEPTION_TABLE(16)
   NOTES
 
+  _sdata = .;			/* Start of data section */
   RODATA
   RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
   _edata = .;			/* End of data section */
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index b995513d527..95022b04b62 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -36,13 +36,10 @@
 
 /* Hook for MIDI serial driver */
 void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
 void (*atari_input_mouse_interrupt_hook) (char *);
-EXPORT_SYMBOL(atari_mouse_interrupt_hook);
 EXPORT_SYMBOL(atari_input_keyboard_interrupt_hook);
 EXPORT_SYMBOL(atari_input_mouse_interrupt_hook);
 
@@ -263,8 +260,8 @@ repeat:
 			kb_state.buf[kb_state.len++] = scancode;
 			if (kb_state.len == 3) {
 				kb_state.state = KEYBOARD;
-				if (atari_mouse_interrupt_hook)
-					atari_mouse_interrupt_hook(kb_state.buf);
+				if (atari_input_mouse_interrupt_hook)
+					atari_input_mouse_interrupt_hook(kb_state.buf);
 			}
 			break;
 
@@ -575,7 +572,7 @@ int atari_keyb_init(void)
 	kb_state.len = 0;
 
 	error = request_irq(IRQ_MFP_ACIA, atari_keyboard_interrupt,
-			    IRQ_TYPE_SLOW, "keyboard/mouse/MIDI",
+			    IRQ_TYPE_SLOW, "keyboard,mouse,MIDI",
 			    atari_keyboard_interrupt);
 	if (error)
 		return error;
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index 604329fafbb..ddbf43ca885 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -180,7 +180,7 @@ void __init stdma_init(void)
 {
 	stdma_isr = NULL;
 	if (request_irq(IRQ_MFP_FDC, stdma_int, IRQ_TYPE_SLOW | IRQF_SHARED,
-			"ST-DMA: floppy/ACSI/IDE/Falcon-SCSI", stdma_int))
+			"ST-DMA floppy,ACSI,IDE,Falcon-SCSI", stdma_int))
 		pr_err("Couldn't register ST-DMA interrupt\n");
 }
 
diff --git a/arch/m68k/include/asm/atarikb.h b/arch/m68k/include/asm/atarikb.h
index 546e7da5804..68f3622bf59 100644
--- a/arch/m68k/include/asm/atarikb.h
+++ b/arch/m68k/include/asm/atarikb.h
@@ -34,8 +34,6 @@ void ikbd_joystick_disable(void);
 
 /* Hook for MIDI serial driver */
 extern void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-extern void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 extern void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h
index 9d69f6e6236..e9020f88a74 100644
--- a/arch/m68k/include/asm/bitops_mm.h
+++ b/arch/m68k/include/asm/bitops_mm.h
@@ -181,14 +181,15 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = ~*p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -196,7 +197,8 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
@@ -215,27 +217,32 @@ static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
 		/* Look for zero in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
-	res = find_first_zero_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_zero_bit(p, size - offset);
 }
 
 static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = *p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -243,7 +250,8 @@ static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_bit(const unsigned long *vaddr, int size,
@@ -262,13 +270,17 @@ static inline int find_next_bit(const unsigned long *vaddr, int size,
 		/* Look for one in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No one yet, search remaining full bytes for a one */
-	res = find_first_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_bit(p, size - offset);
 }
 
 /*
@@ -366,23 +378,25 @@ static inline int test_bit_le(int nr, const void *vaddr)
 static inline int find_first_zero_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
-	while (*p++ == ~0UL)
-	{
-		if (--size == 0)
-			return (p - addr) << 5;
+	words = (size >> 5) + ((size & 31) > 0);
+	while (*p++ == ~0UL) {
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (!test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_zero_bit_le(const void *addr,
@@ -400,10 +414,15 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for zero in first longword */
 		for (res = bit; res < 32; res++)
-			if (!test_bit_le(res, p))
-				return offset + res;
+			if (!test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
 	return offset + find_first_zero_bit_le(p, size - offset);
@@ -412,22 +431,25 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 static inline int find_first_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
+	words = (size >> 5) + ((size & 31) > 0);
 	while (*p++ == 0UL) {
-		if (--size == 0)
-			return (p - addr) << 5;
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_bit_le(const void *addr,
@@ -445,10 +467,15 @@ static inline unsigned long find_next_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for one in first longword */
 		for (res = bit; res < 32; res++)
-			if (test_bit_le(res, p))
-				return offset + res;
+			if (test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No set bit yet, search remaining full bytes for a set bit */
 	return offset + find_first_bit_le(p, size - offset);
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 29e17907d9f..f3b649de2a1 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -22,7 +22,7 @@
 #define __NR_mknod		 14
 #define __NR_chmod		 15
 #define __NR_chown		 16
-#define __NR_break		 17
+/*#define __NR_break		 17*/
 #define __NR_oldstat		 18
 #define __NR_lseek		 19
 #define __NR_getpid		 20
@@ -36,11 +36,11 @@
 #define __NR_oldfstat		 28
 #define __NR_pause		 29
 #define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
+/*#define __NR_stty		 31*/
+/*#define __NR_gtty		 32*/
 #define __NR_access		 33
 #define __NR_nice		 34
-#define __NR_ftime		 35
+/*#define __NR_ftime		 35*/
 #define __NR_sync		 36
 #define __NR_kill		 37
 #define __NR_rename		 38
@@ -49,7 +49,7 @@
 #define __NR_dup		 41
 #define __NR_pipe		 42
 #define __NR_times		 43
-#define __NR_prof		 44
+/*#define __NR_prof		 44*/
 #define __NR_brk		 45
 #define __NR_setgid		 46
 #define __NR_getgid		 47
@@ -58,13 +58,13 @@
 #define __NR_getegid		 50
 #define __NR_acct		 51
 #define __NR_umount2		 52
-#define __NR_lock		 53
+/*#define __NR_lock		 53*/
 #define __NR_ioctl		 54
 #define __NR_fcntl		 55
-#define __NR_mpx		 56
+/*#define __NR_mpx		 56*/
 #define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
+/*#define __NR_ulimit		 58*/
+/*#define __NR_oldolduname	 59*/
 #define __NR_umask		 60
 #define __NR_chroot		 61
 #define __NR_ustat		 62
@@ -103,10 +103,10 @@
 #define __NR_fchown		 95
 #define __NR_getpriority	 96
 #define __NR_setpriority	 97
-#define __NR_profil		 98
+/*#define __NR_profil		 98*/
 #define __NR_statfs		 99
 #define __NR_fstatfs		100
-#define __NR_ioperm		101
+/*#define __NR_ioperm		101*/
 #define __NR_socketcall		102
 #define __NR_syslog		103
 #define __NR_setitimer		104
@@ -114,11 +114,11 @@
 #define __NR_stat		106
 #define __NR_lstat		107
 #define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		/* 110 */ not supported
+/*#define __NR_olduname		109*/
+/*#define __NR_iopl		110*/ /* not supported */
 #define __NR_vhangup		111
-#define __NR_idle		/* 112 */ Obsolete
-#define __NR_vm86		/* 113 */ not supported
+/*#define __NR_idle		112*/ /* Obsolete */
+/*#define __NR_vm86		113*/ /* not supported */
 #define __NR_wait4		114
 #define __NR_swapoff		115
 #define __NR_sysinfo		116
@@ -132,17 +132,17 @@
 #define __NR_adjtimex		124
 #define __NR_mprotect		125
 #define __NR_sigprocmask	126
-#define __NR_create_module	127
+/*#define __NR_create_module	127*/
 #define __NR_init_module	128
 #define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
+/*#define __NR_get_kernel_syms	130*/
 #define __NR_quotactl		131
 #define __NR_getpgid		132
 #define __NR_fchdir		133
 #define __NR_bdflush		134
 #define __NR_sysfs		135
 #define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+/*#define __NR_afs_syscall	137*/ /* Syscall for Andrew File System */
 #define __NR_setfsuid		138
 #define __NR_setfsgid		139
 #define __NR__llseek		140
@@ -172,7 +172,7 @@
 #define __NR_setresuid		164
 #define __NR_getresuid		165
 #define __NR_getpagesize	166
-#define __NR_query_module	167
+/*#define __NR_query_module	167*/
 #define __NR_poll		168
 #define __NR_nfsservctl		169
 #define __NR_setresgid		170
@@ -193,8 +193,8 @@
 #define __NR_capset		185
 #define __NR_sigaltstack	186
 #define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
+/*#define __NR_getpmsg		188*/	/* some people actually want streams */
+/*#define __NR_putpmsg		189*/	/* some people actually want streams */
 #define __NR_vfork		190
 #define __NR_ugetrlimit		191
 #define __NR_mmap2		192
@@ -223,6 +223,8 @@
 #define __NR_setfsuid32		215
 #define __NR_setfsgid32		216
 #define __NR_pivot_root		217
+/* 218*/
+/* 219*/
 #define __NR_getdents64		220
 #define __NR_gettid		221
 #define __NR_tkill		222
@@ -281,7 +283,7 @@
 #define __NR_mq_notify		275
 #define __NR_mq_getsetattr	276
 #define __NR_waitid		277
-#define __NR_vserver		278
+/*#define __NR_vserver		278*/
 #define __NR_add_key		279
 #define __NR_request_key	280
 #define __NR_keyctl		281
diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm
index 55d5d6b680a..aced6780457 100644
--- a/arch/m68k/kernel/Makefile_mm
+++ b/arch/m68k/kernel/Makefile_mm
@@ -10,7 +10,7 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o syscalltable.o
 
 devres-y = ../../../kernel/irq/devres.o
 
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1359ee65957..bd0ec05263b 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -407,351 +407,3 @@ resume:
 
 	rts
 
-.data
-ALIGN
-sys_call_table:
-	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
-	.long sys_exit
-	.long sys_fork
-	.long sys_read
-	.long sys_write
-	.long sys_open		/* 5 */
-	.long sys_close
-	.long sys_waitpid
-	.long sys_creat
-	.long sys_link
-	.long sys_unlink	/* 10 */
-	.long sys_execve
-	.long sys_chdir
-	.long sys_time
-	.long sys_mknod
-	.long sys_chmod		/* 15 */
-	.long sys_chown16
-	.long sys_ni_syscall				/* old break syscall holder */
-	.long sys_stat
-	.long sys_lseek
-	.long sys_getpid	/* 20 */
-	.long sys_mount
-	.long sys_oldumount
-	.long sys_setuid16
-	.long sys_getuid16
-	.long sys_stime		/* 25 */
-	.long sys_ptrace
-	.long sys_alarm
-	.long sys_fstat
-	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall				/* old stty syscall holder */
-	.long sys_ni_syscall				/* old gtty syscall holder */
-	.long sys_access
-	.long sys_nice
-	.long sys_ni_syscall	/* 35 */	/* old ftime syscall holder */
-	.long sys_sync
-	.long sys_kill
-	.long sys_rename
-	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
-	.long sys_dup
-	.long sys_pipe
-	.long sys_times
-	.long sys_ni_syscall				/* old prof syscall holder */
-	.long sys_brk		/* 45 */
-	.long sys_setgid16
-	.long sys_getgid16
-	.long sys_signal
-	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
-	.long sys_acct
-	.long sys_umount				/* recycled never used phys() */
-	.long sys_ni_syscall				/* old lock syscall holder */
-	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall				/* old mpx syscall holder */
-	.long sys_setpgid
-	.long sys_ni_syscall				/* old ulimit syscall holder */
-	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
-	.long sys_chroot
-	.long sys_ustat
-	.long sys_dup2
-	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
-	.long sys_setsid
-	.long sys_sigaction
-	.long sys_sgetmask
-	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
-	.long sys_setregid16
-	.long sys_sigsuspend
-	.long sys_sigpending
-	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
-	.long sys_old_getrlimit
-	.long sys_getrusage
-	.long sys_gettimeofday
-	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
-	.long sys_setgroups16
-	.long sys_old_select
-	.long sys_symlink
-	.long sys_lstat
-	.long sys_readlink	/* 85 */
-	.long sys_uselib
-	.long sys_swapon
-	.long sys_reboot
-	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
-	.long sys_munmap
-	.long sys_truncate
-	.long sys_ftruncate
-	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
-	.long sys_getpriority
-	.long sys_setpriority
-	.long sys_ni_syscall				/* old profil syscall holder */
-	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall				/* ioperm for i386 */
-	.long sys_socketcall
-	.long sys_syslog
-	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
-	.long sys_newstat
-	.long sys_newlstat
-	.long sys_newfstat
-	.long sys_ni_syscall
-	.long sys_ni_syscall	/* 110 */	/* iopl for i386 */
-	.long sys_vhangup
-	.long sys_ni_syscall				/* obsolete idle() syscall */
-	.long sys_ni_syscall				/* vm86old for i386 */
-	.long sys_wait4
-	.long sys_swapoff	/* 115 */
-	.long sys_sysinfo
-	.long sys_ipc
-	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone		/* 120 */
-	.long sys_setdomainname
-	.long sys_newuname
-	.long sys_cacheflush				/* modify_ldt for i386 */
-	.long sys_adjtimex
-	.long sys_mprotect	/* 125 */
-	.long sys_sigprocmask
-	.long sys_ni_syscall		/* old "create_module" */
-	.long sys_init_module
-	.long sys_delete_module
-	.long sys_ni_syscall	/* 130 - old "get_kernel_syms" */
-	.long sys_quotactl
-	.long sys_getpgid
-	.long sys_fchdir
-	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
-	.long sys_personality
-	.long sys_ni_syscall				/* for afs_syscall */
-	.long sys_setfsuid16
-	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
-	.long sys_getdents
-	.long sys_select
-	.long sys_flock
-	.long sys_msync
-	.long sys_readv		/* 145 */
-	.long sys_writev
-	.long sys_getsid
-	.long sys_fdatasync
-	.long sys_sysctl
-	.long sys_mlock		/* 150 */
-	.long sys_munlock
-	.long sys_mlockall
-	.long sys_munlockall
-	.long sys_sched_setparam
-	.long sys_sched_getparam	/* 155 */
-	.long sys_sched_setscheduler
-	.long sys_sched_getscheduler
-	.long sys_sched_yield
-	.long sys_sched_get_priority_max
-	.long sys_sched_get_priority_min  /* 160 */
-	.long sys_sched_rr_get_interval
-	.long sys_nanosleep
-	.long sys_mremap
-	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize
-	.long sys_ni_syscall		/* old sys_query_module */
-	.long sys_poll
-	.long sys_nfsservctl
-	.long sys_setresgid16	/* 170 */
-	.long sys_getresgid16
-	.long sys_prctl
-	.long sys_rt_sigreturn
-	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask	/* 175 */
-	.long sys_rt_sigpending
-	.long sys_rt_sigtimedwait
-	.long sys_rt_sigqueueinfo
-	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
-	.long sys_pwrite64
-	.long sys_lchown16;
-	.long sys_getcwd
-	.long sys_capget
-	.long sys_capset	/* 185 */
-	.long sys_sigaltstack
-	.long sys_sendfile
-	.long sys_ni_syscall				/* streams1 */
-	.long sys_ni_syscall				/* streams2 */
-	.long sys_vfork		/* 190 */
-	.long sys_getrlimit
-	.long sys_mmap2
-	.long sys_truncate64
-	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
-	.long sys_lstat64
-	.long sys_fstat64
-	.long sys_chown
-	.long sys_getuid
-	.long sys_getgid	/* 200 */
-	.long sys_geteuid
-	.long sys_getegid
-	.long sys_setreuid
-	.long sys_setregid
-	.long sys_getgroups	/* 205 */
-	.long sys_setgroups
-	.long sys_fchown
-	.long sys_setresuid
-	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
-	.long sys_getresgid
-	.long sys_lchown
-	.long sys_setuid
-	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
-	.long sys_setfsgid
-	.long sys_pivot_root
-	.long sys_ni_syscall
-	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
-	.long sys_gettid
-	.long sys_tkill
-	.long sys_setxattr
-	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
-	.long sys_getxattr
-	.long sys_lgetxattr
-	.long sys_fgetxattr
-	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
-	.long sys_flistxattr
-	.long sys_removexattr
-	.long sys_lremovexattr
-	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
-	.long sys_sendfile64
-	.long sys_mincore
-	.long sys_madvise
-	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
-	.long sys_io_setup
-	.long sys_io_destroy
-	.long sys_io_getevents
-	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
-	.long sys_fadvise64
-	.long sys_exit_group
-	.long sys_lookup_dcookie
-	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
-	.long sys_epoll_wait
-	.long sys_remap_file_pages
-	.long sys_set_tid_address
-	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
-	.long sys_timer_gettime
-	.long sys_timer_getoverrun
-	.long sys_timer_delete
-	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
-	.long sys_clock_getres
-	.long sys_clock_nanosleep
-	.long sys_statfs64
-	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
-	.long sys_utimes
-	.long sys_fadvise64_64
-	.long sys_mbind
-	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
-	.long sys_mq_open
-	.long sys_mq_unlink
-	.long sys_mq_timedsend
-	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
-	.long sys_mq_getsetattr
-	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
-	.long sys_add_key
-	.long sys_request_key	/* 280 */
-	.long sys_keyctl
-	.long sys_ioprio_set
-	.long sys_ioprio_get
-	.long sys_inotify_init
-	.long sys_inotify_add_watch	/* 285 */
-	.long sys_inotify_rm_watch
-	.long sys_migrate_pages
-	.long sys_openat
-	.long sys_mkdirat
-	.long sys_mknodat		/* 290 */
-	.long sys_fchownat
-	.long sys_futimesat
-	.long sys_fstatat64
-	.long sys_unlinkat
-	.long sys_renameat		/* 295 */
-	.long sys_linkat
-	.long sys_symlinkat
-	.long sys_readlinkat
-	.long sys_fchmodat
-	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
-	.long sys_unshare
-	.long sys_set_robust_list
-	.long sys_get_robust_list	/* 305 */
-	.long sys_splice
-	.long sys_sync_file_range
-	.long sys_tee
-	.long sys_vmsplice
-	.long sys_move_pages		/* 310 */
-	.long sys_sched_setaffinity
-	.long sys_sched_getaffinity
-	.long sys_kexec_load
-	.long sys_getcpu
-	.long sys_epoll_pwait		/* 315 */
-	.long sys_utimensat
-	.long sys_signalfd
-	.long sys_timerfd_create
-	.long sys_eventfd
-	.long sys_fallocate		/* 320 */
-	.long sys_timerfd_settime
-	.long sys_timerfd_gettime
-	.long sys_signalfd4
-	.long sys_eventfd2
-	.long sys_epoll_create1		/* 325 */
-	.long sys_dup3
-	.long sys_pipe2
-	.long sys_inotify_init1
-	.long sys_preadv
-	.long sys_pwritev		/* 330 */
-	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_event_open
-	.long sys_get_thread_area
-	.long sys_set_thread_area
-	.long sys_atomic_cmpxchg_32	/* 335 */
-	.long sys_atomic_barrier
-	.long sys_fanotify_init
-	.long sys_fanotify_mark
-	.long sys_prlimit64
-	.long sys_name_to_handle_at	/* 340 */
-	.long sys_open_by_handle_at
-	.long sys_clock_adjtime
-	.long sys_syncfs
-
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 9b8393d8adb..5909e392cb1 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/m68knommu/kernel/syscalltable.S
- *
  *  Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com)
  *
  *  Based on older entry.S files, the following copyrights apply:
@@ -9,171 +7,176 @@
  *                      Kenneth Albanowski <kjahds@kjahds.com>,
  *  Copyright (C) 2000  Lineo Inc. (www.lineo.com) 
  *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Linux/m68k support by Hamish Macdonald
  */
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
-#include <asm/unistd.h>
 
-.text
+#ifndef CONFIG_MMU
+#define sys_mmap2		sys_mmap_pgoff
+#endif
+
+.section .rodata
 ALIGN
 ENTRY(sys_call_table)
-	.long sys_restart_syscall	/* 0  -  old "setup()" system call */
+	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
 	.long sys_exit
 	.long sys_fork
 	.long sys_read
 	.long sys_write
-	.long sys_open		/* 5 */
+	.long sys_open			/* 5 */
 	.long sys_close
 	.long sys_waitpid
 	.long sys_creat
 	.long sys_link
-	.long sys_unlink	/* 10 */
+	.long sys_unlink		/* 10 */
 	.long sys_execve
 	.long sys_chdir
 	.long sys_time
 	.long sys_mknod
-	.long sys_chmod		/* 15 */
+	.long sys_chmod			/* 15 */
 	.long sys_chown16
-	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_ni_syscall		/* old break syscall holder */
 	.long sys_stat
 	.long sys_lseek
-	.long sys_getpid	/* 20 */
+	.long sys_getpid		/* 20 */
 	.long sys_mount
 	.long sys_oldumount
 	.long sys_setuid16
 	.long sys_getuid16
-	.long sys_stime		/* 25 */
+	.long sys_stime			/* 25 */
 	.long sys_ptrace
 	.long sys_alarm
 	.long sys_fstat
 	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall	/* old stty syscall holder */
-	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_utime			/* 30 */
+	.long sys_ni_syscall		/* old stty syscall holder */
+	.long sys_ni_syscall		/* old gtty syscall holder */
 	.long sys_access
 	.long sys_nice
-	.long sys_ni_syscall	/* 35 */ /* old ftime syscall holder */
+	.long sys_ni_syscall		/* 35 - old ftime syscall holder */
 	.long sys_sync
 	.long sys_kill
 	.long sys_rename
 	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
+	.long sys_rmdir			/* 40 */
 	.long sys_dup
 	.long sys_pipe
 	.long sys_times
-	.long sys_ni_syscall	/* old prof syscall holder */
-	.long sys_brk		/* 45 */
+	.long sys_ni_syscall		/* old prof syscall holder */
+	.long sys_brk			/* 45 */
 	.long sys_setgid16
 	.long sys_getgid16
 	.long sys_signal
 	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
+	.long sys_getegid16		/* 50 */
 	.long sys_acct
-	.long sys_umount	/* recycled never used phys() */
-	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_umount		/* recycled never used phys() */
+	.long sys_ni_syscall		/* old lock syscall holder */
 	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_fcntl			/* 55 */
+	.long sys_ni_syscall		/* old mpx syscall holder */
 	.long sys_setpgid
-	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall		/* old ulimit syscall holder */
 	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
+	.long sys_umask			/* 60 */
 	.long sys_chroot
 	.long sys_ustat
 	.long sys_dup2
 	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
+	.long sys_getpgrp		/* 65 */
 	.long sys_setsid
 	.long sys_sigaction
 	.long sys_sgetmask
 	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
+	.long sys_setreuid16		/* 70 */
 	.long sys_setregid16
 	.long sys_sigsuspend
 	.long sys_sigpending
 	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
+	.long sys_setrlimit		/* 75 */
 	.long sys_old_getrlimit
 	.long sys_getrusage
 	.long sys_gettimeofday
 	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
+	.long sys_getgroups16		/* 80 */
 	.long sys_setgroups16
 	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
-	.long sys_readlink	/* 85 */
+	.long sys_readlink		/* 85 */
 	.long sys_uselib
-	.long sys_ni_syscall	/* sys_swapon */
+	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
+	.long sys_old_mmap		/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
 	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
+	.long sys_fchown16		/* 95 */
 	.long sys_getpriority
 	.long sys_setpriority
-	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_ni_syscall		/* old profil syscall holder */
 	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall	/* ioperm for i386 */
+	.long sys_fstatfs		/* 100 */
+	.long sys_ni_syscall		/* ioperm for i386 */
 	.long sys_socketcall
 	.long sys_syslog
 	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
+	.long sys_getitimer		/* 105 */
 	.long sys_newstat
 	.long sys_newlstat
 	.long sys_newfstat
 	.long sys_ni_syscall
-	.long sys_ni_syscall	/* iopl for i386 */ /* 110 */
+	.long sys_ni_syscall		/* 110 - iopl for i386 */
 	.long sys_vhangup
-	.long sys_ni_syscall	/* obsolete idle() syscall */
-	.long sys_ni_syscall	/* vm86old for i386 */
+	.long sys_ni_syscall		/* obsolete idle() syscall */
+	.long sys_ni_syscall		/* vm86old for i386 */
 	.long sys_wait4
-	.long sys_ni_syscall	/* 115 */ /* sys_swapoff */
+	.long sys_swapoff		/* 115 */
 	.long sys_sysinfo
 	.long sys_ipc
 	.long sys_fsync
 	.long sys_sigreturn
-	.long sys_clone		/* 120 */
+	.long sys_clone			/* 120 */
 	.long sys_setdomainname
 	.long sys_newuname
-	.long sys_cacheflush	/* modify_ldt for i386 */
+	.long sys_cacheflush		/* modify_ldt for i386 */
 	.long sys_adjtimex
-	.long sys_ni_syscall	/* 125 */ /* sys_mprotect */
+	.long sys_mprotect		/* 125 */
 	.long sys_sigprocmask
-	.long sys_ni_syscall	/* old "creat_module" */
+	.long sys_ni_syscall		/* old "create_module" */
 	.long sys_init_module
 	.long sys_delete_module
-	.long sys_ni_syscall	/* 130: old "get_kernel_syms" */
+	.long sys_ni_syscall		/* 130 - old "get_kernel_syms" */
 	.long sys_quotactl
 	.long sys_getpgid
 	.long sys_fchdir
 	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
+	.long sys_sysfs			/* 135 */
 	.long sys_personality
-	.long sys_ni_syscall	/* for afs_syscall */
+	.long sys_ni_syscall		/* for afs_syscall */
 	.long sys_setfsuid16
 	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
+	.long sys_llseek		/* 140 */
 	.long sys_getdents
 	.long sys_select
 	.long sys_flock
-	.long sys_ni_syscall	/* sys_msync */
-	.long sys_readv		/* 145 */
+	.long sys_msync
+	.long sys_readv			/* 145 */
 	.long sys_writev
 	.long sys_getsid
 	.long sys_fdatasync
 	.long sys_sysctl
-	.long sys_ni_syscall	/* 150 */ /* sys_mlock */
-	.long sys_ni_syscall	/* sys_munlock */
-	.long sys_ni_syscall	/* sys_mlockall */
-	.long sys_ni_syscall	/* sys_munlockall */
+	.long sys_mlock			/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
 	.long sys_sched_setparam
-	.long sys_sched_getparam /* 155 */
+	.long sys_sched_getparam	/* 155 */
 	.long sys_sched_setscheduler
 	.long sys_sched_getscheduler
 	.long sys_sched_yield
@@ -181,124 +184,124 @@ ENTRY(sys_call_table)
 	.long sys_sched_get_priority_min  /* 160 */
 	.long sys_sched_rr_get_interval
 	.long sys_nanosleep
-	.long sys_ni_syscall	/* sys_mremap */
+	.long sys_mremap
 	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize	/* sys_getpagesize */
-	.long sys_ni_syscall	/* old "query_module" */
+	.long sys_getresuid16		/* 165 */
+	.long sys_getpagesize
+	.long sys_ni_syscall		/* old "query_module" */
 	.long sys_poll
-	.long sys_ni_syscall	/* sys_nfsservctl */
-	.long sys_setresgid16	/* 170 */
+	.long sys_nfsservctl
+	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
 	.long sys_rt_sigreturn
 	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask /* 175 */
+	.long sys_rt_sigprocmask	/* 175 */
 	.long sys_rt_sigpending
 	.long sys_rt_sigtimedwait
 	.long sys_rt_sigqueueinfo
 	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
+	.long sys_pread64		/* 180 */
 	.long sys_pwrite64
 	.long sys_lchown16
 	.long sys_getcwd
 	.long sys_capget
-	.long sys_capset	/* 185 */
+	.long sys_capset		/* 185 */
 	.long sys_sigaltstack
 	.long sys_sendfile
-	.long sys_ni_syscall	/* streams1 */
-	.long sys_ni_syscall	/* streams2 */
-	.long sys_vfork		/* 190 */
+	.long sys_ni_syscall		/* streams1 */
+	.long sys_ni_syscall		/* streams2 */
+	.long sys_vfork			/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap_pgoff
+	.long sys_mmap2
 	.long sys_truncate64
 	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
+	.long sys_stat64		/* 195 */
 	.long sys_lstat64
 	.long sys_fstat64
 	.long sys_chown
 	.long sys_getuid
-	.long sys_getgid	/* 200 */
+	.long sys_getgid		/* 200 */
 	.long sys_geteuid
 	.long sys_getegid
 	.long sys_setreuid
 	.long sys_setregid
-	.long sys_getgroups	/* 205 */
+	.long sys_getgroups		/* 205 */
 	.long sys_setgroups
 	.long sys_fchown
 	.long sys_setresuid
 	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
+	.long sys_setresgid		/* 210 */
 	.long sys_getresgid
 	.long sys_lchown
 	.long sys_setuid
 	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
+	.long sys_setfsuid		/* 215 */
 	.long sys_setfsgid
 	.long sys_pivot_root
 	.long sys_ni_syscall
 	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
+	.long sys_getdents64		/* 220 */
 	.long sys_gettid
 	.long sys_tkill
 	.long sys_setxattr
 	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
+	.long sys_fsetxattr		/* 225 */
 	.long sys_getxattr
 	.long sys_lgetxattr
 	.long sys_fgetxattr
 	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
+	.long sys_llistxattr		/* 230 */
 	.long sys_flistxattr
 	.long sys_removexattr
 	.long sys_lremovexattr
 	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
+	.long sys_futex			/* 235 */
 	.long sys_sendfile64
-	.long sys_ni_syscall	/* sys_mincore */
-	.long sys_ni_syscall	/* sys_madvise */
+	.long sys_mincore
+	.long sys_madvise
 	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
+	.long sys_readahead		/* 240 */
 	.long sys_io_setup
 	.long sys_io_destroy
 	.long sys_io_getevents
 	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
+	.long sys_io_cancel		/* 245 */
 	.long sys_fadvise64
 	.long sys_exit_group
 	.long sys_lookup_dcookie
 	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
+	.long sys_epoll_ctl		/* 250 */
 	.long sys_epoll_wait
-	.long sys_ni_syscall	/* sys_remap_file_pages */
+	.long sys_remap_file_pages
 	.long sys_set_tid_address
 	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
+	.long sys_timer_settime		/* 255 */
 	.long sys_timer_gettime
 	.long sys_timer_getoverrun
 	.long sys_timer_delete
 	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
+	.long sys_clock_gettime		/* 260 */
 	.long sys_clock_getres
 	.long sys_clock_nanosleep
 	.long sys_statfs64
 	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
+	.long sys_tgkill		/* 265 */
 	.long sys_utimes
 	.long sys_fadvise64_64
-	.long sys_mbind	
+	.long sys_mbind
 	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
+	.long sys_set_mempolicy		/* 270 */
 	.long sys_mq_open
 	.long sys_mq_unlink
 	.long sys_mq_timedsend
 	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
+	.long sys_mq_notify		/* 275 */
 	.long sys_mq_getsetattr
 	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
+	.long sys_ni_syscall		/* for sys_vserver */
 	.long sys_add_key
-	.long sys_request_key	/* 280 */
+	.long sys_request_key		/* 280 */
 	.long sys_keyctl
 	.long sys_ioprio_set
 	.long sys_ioprio_get
@@ -319,8 +322,8 @@ ENTRY(sys_call_table)
 	.long sys_readlinkat
 	.long sys_fchmodat
 	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
+	.long sys_pselect6
+	.long sys_ppoll
 	.long sys_unshare
 	.long sys_set_robust_list
 	.long sys_get_robust_list	/* 305 */
@@ -363,7 +366,3 @@ ENTRY(sys_call_table)
 	.long sys_clock_adjtime
 	.long sys_syncfs
 
-	.rept NR_syscalls-(.-sys_call_table)/4
-		.long sys_ni_syscall
-	.endr
-
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 878be5f38ca..d0993594f55 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -25,6 +25,8 @@ SECTIONS
 
   EXCEPTION_TABLE(16)
 
+  _sdata = .;			/* Start of data section */
+
   RODATA
 
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 1ad6b7ad2c1..8080469ee6c 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -25,6 +25,7 @@ SECTIONS
   _etext = .;			/* End of text section */
 
   EXCEPTION_TABLE(16) :data
+  _sdata = .;			/* Start of rw data section */
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
   /* End of data goes *here* so that freeing init code works properly. */
   _edata = .;
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 02b7a03e422..8b3db1c587f 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -300,6 +300,8 @@ void __init paging_init(void)
 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
 		free_area_init_node(i, zones_size,
 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
+		if (node_present_pages(i))
+			node_set_state(i, N_NORMAL_MEMORY);
 	}
 }
 
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index d8a214f11ac..e5550ce4e0e 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -217,16 +217,12 @@ static struct clocksource clocksource_microblaze = {
 	.rating		= 300,
 	.read		= microblaze_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 8, /* I can shift it */
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static int __init microblaze_clocksource_init(void)
 {
-	clocksource_microblaze.mult =
-			clocksource_hz2mult(timer_clock_freq,
-						clocksource_microblaze.shift);
-	if (clocksource_register(&clocksource_microblaze))
+	if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq))
 		panic("failed to register clocksource");
 
 	/* stop timer1 */
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 7ff9b549204..aef6c917b45 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -11,6 +11,7 @@ platforms += dec
 platforms += emma
 platforms += jazz
 platforms += jz4740
+platforms += lantiq
 platforms += lasat
 platforms += loongson
 platforms += mipssim
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8e256cc5dcd..cef1a854487 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -212,6 +212,24 @@ config MACH_JZ4740
 	select HAVE_PWM
 	select HAVE_CLK
 
+config LANTIQ
+	bool "Lantiq based platforms"
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYS_HAS_CPU_MIPS32_R1
+	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_MULTITHREADING
+	select SYS_HAS_EARLY_PRINTK
+	select ARCH_REQUIRE_GPIOLIB
+	select SWAP_IO_SPACE
+	select BOOT_RAW
+	select HAVE_CLK
+	select MIPS_MACHINE
+
 config LASAT
 	bool "LASAT Networks platforms"
 	select CEVT_R4K
@@ -736,6 +754,33 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
 		Hikari
 	  Say Y here for most Octeon reference boards.
 
+config NLM_XLR_BOARD
+	bool "Netlogic XLR/XLS based systems"
+	depends on EXPERIMENTAL
+	select BOOT_ELF32
+	select NLM_COMMON
+	select NLM_XLR
+	select SYS_HAS_CPU_XLR
+	select SYS_SUPPORTS_SMP
+	select HW_HAS_PCI
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select 64BIT_PHYS_ADDR
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
+	select DMA_COHERENT
+	select NR_CPUS_DEFAULT_32
+	select CEVT_R4K
+	select CSRC_R4K
+	select IRQ_CPU
+	select ZONE_DMA if 64BIT
+	select SYNC_R4K
+	select SYS_HAS_EARLY_PRINTK
+	help
+	  Support for systems based on Netlogic XLR and XLS processors.
+	  Say Y here if you have a XLR or XLS based board.
+
 endchoice
 
 source "arch/mips/alchemy/Kconfig"
@@ -743,6 +788,7 @@ source "arch/mips/ath79/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
+source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
 source "arch/mips/pmc-sierra/Kconfig"
 source "arch/mips/powertv/Kconfig"
@@ -752,6 +798,7 @@ source "arch/mips/txx9/Kconfig"
 source "arch/mips/vr41xx/Kconfig"
 source "arch/mips/cavium-octeon/Kconfig"
 source "arch/mips/loongson/Kconfig"
+source "arch/mips/netlogic/Kconfig"
 
 endmenu
 
@@ -997,9 +1044,6 @@ config IRQ_GT641XX
 config IRQ_GIC
 	bool
 
-config IRQ_CPU_OCTEON
-	bool
-
 config MIPS_BOARDS_GEN
 	bool
 
@@ -1359,8 +1403,6 @@ config CPU_SB1
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
-	select IRQ_CPU
-	select IRQ_CPU_OCTEON
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_SMP
@@ -1425,6 +1467,17 @@ config CPU_BMIPS5000
 	help
 	  Broadcom BMIPS5000 processors.
 
+config CPU_XLR
+	bool "Netlogic XLR SoC"
+	depends on SYS_HAS_CPU_XLR
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_64BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select WEAK_ORDERING
+	select WEAK_REORDERING_BEYOND_LLSC
+	select CPU_SUPPORTS_HUGEPAGES
+	help
+	  Netlogic Microsystems XLR/XLS processors.
 endchoice
 
 if CPU_LOONGSON2F
@@ -1555,6 +1608,9 @@ config SYS_HAS_CPU_BMIPS4380
 config SYS_HAS_CPU_BMIPS5000
 	bool
 
+config SYS_HAS_CPU_XLR
+	bool
+
 #
 # CPU may reorder R->R, R->W, W->R, W->W
 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
@@ -2339,6 +2395,7 @@ config MMU
 
 config I8253
 	bool
+	select CLKSRC_I8253
 	select MIPS_EXTERNAL_TIMER
 
 config ZONE_DMA32
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 53e3514ba10..884819cd060 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -191,6 +191,18 @@ endif
 #
 include $(srctree)/arch/mips/Kbuild.platforms
 
+#
+# NETLOGIC SOC Common (common)
+#
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/mach-netlogic
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/netlogic
+
+#
+# NETLOGIC XLR/XLS SoC, Simulator and boards
+#
+core-$(CONFIG_NLM_XLR)      		+= arch/mips/netlogic/xlr/
+load-$(CONFIG_NLM_XLR_BOARD)		+= 0xffffffff84000000
+
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/
 
diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index ca0506a8585..3a5abb54d50 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -36,7 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 
@@ -58,7 +58,8 @@ static DEFINE_SPINLOCK(au1xxx_dbdma_spin_lock);
 /* I couldn't find a macro that did this... */
 #define ALIGN_ADDR(x, a)	((((u32)(x)) + (a-1)) & ~(a-1))
 
-static dbdma_global_t *dbdma_gptr = (dbdma_global_t *)DDMA_GLOBAL_BASE;
+static dbdma_global_t *dbdma_gptr =
+			(dbdma_global_t *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
 static int dbdma_initialized;
 
 static dbdev_tab_t dbdev_tab[] = {
@@ -299,7 +300,7 @@ u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
 	if (ctp != NULL) {
 		memset(ctp, 0, sizeof(chan_tab_t));
 		ctp->chan_index = chan = i;
-		dcp = DDMA_CHANNEL_BASE;
+		dcp = KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
 		dcp += (0x0100 * chan);
 		ctp->chan_ptr = (au1x_dma_chan_t *)dcp;
 		cp = (au1x_dma_chan_t *)dcp;
@@ -958,105 +959,75 @@ u32 au1xxx_dbdma_put_dscr(u32 chanid, au1x_ddma_desc_t *dscr)
 }
 
 
-struct alchemy_dbdma_sysdev {
-	struct sys_device sysdev;
-	u32 pm_regs[NUM_DBDMA_CHANS + 1][6];
-};
+static unsigned long alchemy_dbdma_pm_data[NUM_DBDMA_CHANS + 1][6];
 
-static int alchemy_dbdma_suspend(struct sys_device *dev,
-				 pm_message_t state)
+static int alchemy_dbdma_suspend(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	sdev->pm_regs[0][0] = au_readl(addr + 0x00);
-	sdev->pm_regs[0][1] = au_readl(addr + 0x04);
-	sdev->pm_regs[0][2] = au_readl(addr + 0x08);
-	sdev->pm_regs[0][3] = au_readl(addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	alchemy_dbdma_pm_data[0][0] = __raw_readl(addr + 0x00);
+	alchemy_dbdma_pm_data[0][1] = __raw_readl(addr + 0x04);
+	alchemy_dbdma_pm_data[0][2] = __raw_readl(addr + 0x08);
+	alchemy_dbdma_pm_data[0][3] = __raw_readl(addr + 0x0c);
 
 	/* save channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		sdev->pm_regs[i][0] = au_readl(addr + 0x00);
-		sdev->pm_regs[i][1] = au_readl(addr + 0x04);
-		sdev->pm_regs[i][2] = au_readl(addr + 0x08);
-		sdev->pm_regs[i][3] = au_readl(addr + 0x0c);
-		sdev->pm_regs[i][4] = au_readl(addr + 0x10);
-		sdev->pm_regs[i][5] = au_readl(addr + 0x14);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		alchemy_dbdma_pm_data[i][0] = __raw_readl(addr + 0x00);
+		alchemy_dbdma_pm_data[i][1] = __raw_readl(addr + 0x04);
+		alchemy_dbdma_pm_data[i][2] = __raw_readl(addr + 0x08);
+		alchemy_dbdma_pm_data[i][3] = __raw_readl(addr + 0x0c);
+		alchemy_dbdma_pm_data[i][4] = __raw_readl(addr + 0x10);
+		alchemy_dbdma_pm_data[i][5] = __raw_readl(addr + 0x14);
 
 		/* halt channel */
-		au_writel(sdev->pm_regs[i][0] & ~1, addr + 0x00);
-		au_sync();
-		while (!(au_readl(addr + 0x14) & 1))
-			au_sync();
+		__raw_writel(alchemy_dbdma_pm_data[i][0] & ~1, addr + 0x00);
+		wmb();
+		while (!(__raw_readl(addr + 0x14) & 1))
+			wmb();
 
 		addr += 0x100;	/* next channel base */
 	}
 	/* disable channel interrupts */
-	au_writel(0, DDMA_GLOBAL_BASE + 0x0c);
-	au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(0, addr + 0x0c);
+	wmb();
 
 	return 0;
 }
 
-static int alchemy_dbdma_resume(struct sys_device *dev)
+static void alchemy_dbdma_resume(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	au_writel(sdev->pm_regs[0][0], addr + 0x00);
-	au_writel(sdev->pm_regs[0][1], addr + 0x04);
-	au_writel(sdev->pm_regs[0][2], addr + 0x08);
-	au_writel(sdev->pm_regs[0][3], addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(alchemy_dbdma_pm_data[0][0], addr + 0x00);
+	__raw_writel(alchemy_dbdma_pm_data[0][1], addr + 0x04);
+	__raw_writel(alchemy_dbdma_pm_data[0][2], addr + 0x08);
+	__raw_writel(alchemy_dbdma_pm_data[0][3], addr + 0x0c);
 
 	/* restore channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		au_writel(sdev->pm_regs[i][0], addr + 0x00);
-		au_writel(sdev->pm_regs[i][1], addr + 0x04);
-		au_writel(sdev->pm_regs[i][2], addr + 0x08);
-		au_writel(sdev->pm_regs[i][3], addr + 0x0c);
-		au_writel(sdev->pm_regs[i][4], addr + 0x10);
-		au_writel(sdev->pm_regs[i][5], addr + 0x14);
-		au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		__raw_writel(alchemy_dbdma_pm_data[i][0], addr + 0x00);
+		__raw_writel(alchemy_dbdma_pm_data[i][1], addr + 0x04);
+		__raw_writel(alchemy_dbdma_pm_data[i][2], addr + 0x08);
+		__raw_writel(alchemy_dbdma_pm_data[i][3], addr + 0x0c);
+		__raw_writel(alchemy_dbdma_pm_data[i][4], addr + 0x10);
+		__raw_writel(alchemy_dbdma_pm_data[i][5], addr + 0x14);
+		wmb();
 		addr += 0x100;	/* next channel base */
 	}
-
-	return 0;
 }
 
-static struct sysdev_class alchemy_dbdma_sysdev_class = {
-	.name		= "dbdma",
+static struct syscore_ops alchemy_dbdma_syscore_ops = {
 	.suspend	= alchemy_dbdma_suspend,
 	.resume		= alchemy_dbdma_resume,
 };
 
-static int __init alchemy_dbdma_sysdev_init(void)
-{
-	struct alchemy_dbdma_sysdev *sdev;
-	int ret;
-
-	ret = sysdev_class_register(&alchemy_dbdma_sysdev_class);
-	if (ret)
-		return ret;
-
-	sdev = kzalloc(sizeof(struct alchemy_dbdma_sysdev), GFP_KERNEL);
-	if (!sdev)
-		return -ENOMEM;
-
-	sdev->sysdev.id = -1;
-	sdev->sysdev.cls = &alchemy_dbdma_sysdev_class;
-	ret = sysdev_register(&sdev->sysdev);
-	if (ret)
-		kfree(sdev);
-
-	return ret;
-}
-
 static int __init au1xxx_dbdma_init(void)
 {
 	int irq_nr, ret;
@@ -1084,11 +1055,7 @@ static int __init au1xxx_dbdma_init(void)
 	else {
 		dbdma_initialized = 1;
 		printk(KERN_INFO "Alchemy DBDMA initialized\n");
-		ret = alchemy_dbdma_sysdev_init();
-		if (ret) {
-			printk(KERN_ERR "DBDMA PM init failed\n");
-			ret = 0;
-		}
+		register_syscore_ops(&alchemy_dbdma_syscore_ops);
 	}
 
 	return ret;
diff --git a/arch/mips/alchemy/common/dma.c b/arch/mips/alchemy/common/dma.c
index d5278877891..347980e79a8 100644
--- a/arch/mips/alchemy/common/dma.c
+++ b/arch/mips/alchemy/common/dma.c
@@ -58,6 +58,9 @@
  * returned from request_dma.
  */
 
+/* DMA Channel register block spacing */
+#define DMA_CHANNEL_LEN		0x00000100
+
 DEFINE_SPINLOCK(au1000_dma_spin_lock);
 
 struct dma_chan au1000_dma_table[NUM_AU1000_DMA_CHANNELS] = {
@@ -77,22 +80,23 @@ static const struct dma_dev {
 	unsigned int fifo_addr;
 	unsigned int dma_mode;
 } dma_dev_table[DMA_NUM_DEV] = {
-	{UART0_ADDR + UART_TX, 0},
-	{UART0_ADDR + UART_RX, 0},
-	{0, 0},
-	{0, 0},
-	{AC97C_DATA, DMA_DW16 },          /* coherent */
-	{AC97C_DATA, DMA_DR | DMA_DW16 }, /* coherent */
-	{UART3_ADDR + UART_TX, DMA_DW8 | DMA_NC},
-	{UART3_ADDR + UART_RX, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0WR, DMA_DW8 | DMA_NC},
-	{USBD_EP2WR, DMA_DW8 | DMA_NC},
-	{USBD_EP3WR, DMA_DW8 | DMA_NC},
-	{USBD_EP4RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP5RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{I2S_DATA, DMA_DW32 | DMA_NC},
-	{I2S_DATA, DMA_DR | DMA_DW32 | DMA_NC}
+	{ AU1000_UART0_PHYS_ADDR + 0x04, DMA_DW8 },		/* UART0_TX */
+	{ AU1000_UART0_PHYS_ADDR + 0x00, DMA_DW8 | DMA_DR },	/* UART0_RX */
+	{ 0, 0 },	/* DMA_REQ0 */
+	{ 0, 0 },	/* DMA_REQ1 */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 },		/* AC97 TX c */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 | DMA_DR },	/* AC97 RX c */
+	{ AU1000_UART3_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC },	/* UART3_TX */
+	{ AU1000_UART3_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* UART3_RX */
+	{ AU1000_USBD_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */
+	/* on Au1500, these 2 are DMA_REQ2/3 (GPIO208/209) instead! */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC},	/* I2S TX */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC | DMA_DR}, /* I2S RX */
 };
 
 int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
@@ -123,10 +127,10 @@ int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
 
 /* Device FIFO addresses and default DMA modes - 2nd bank */
 static const struct dma_dev dma_dev_table_bank2[DMA_NUM_DEV_BANK2] = {
-	{ SD0_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD0_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 },	/* coherent */
-	{ SD1_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD1_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 }	/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR },	/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR }	/* coherent */
 };
 
 void dump_au1000_dma_channel(unsigned int dmanr)
@@ -202,7 +206,7 @@ int request_au1000_dma(int dev_id, const char *dev_str,
 	}
 
 	/* fill it in */
-	chan->io = DMA_CHANNEL_BASE + i * DMA_CHANNEL_LEN;
+	chan->io = KSEG1ADDR(AU1000_DMA_PHYS_ADDR) + i * DMA_CHANNEL_LEN;
 	chan->dev_id = dev_id;
 	chan->dev_str = dev_str;
 	chan->fifo_addr = dev->fifo_addr;
diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index 55dd7c88851..8b60ba0675e 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
@@ -39,6 +39,36 @@
 #include <asm/mach-pb1x00/pb1000.h>
 #endif
 
+/* Interrupt Controller register offsets */
+#define IC_CFG0RD	0x40
+#define IC_CFG0SET	0x40
+#define IC_CFG0CLR	0x44
+#define IC_CFG1RD	0x48
+#define IC_CFG1SET	0x48
+#define IC_CFG1CLR	0x4C
+#define IC_CFG2RD	0x50
+#define IC_CFG2SET	0x50
+#define IC_CFG2CLR	0x54
+#define IC_REQ0INT	0x54
+#define IC_SRCRD	0x58
+#define IC_SRCSET	0x58
+#define IC_SRCCLR	0x5C
+#define IC_REQ1INT	0x5C
+#define IC_ASSIGNRD	0x60
+#define IC_ASSIGNSET	0x60
+#define IC_ASSIGNCLR	0x64
+#define IC_WAKERD	0x68
+#define IC_WAKESET	0x68
+#define IC_WAKECLR	0x6C
+#define IC_MASKRD	0x70
+#define IC_MASKSET	0x70
+#define IC_MASKCLR	0x74
+#define IC_RISINGRD	0x78
+#define IC_RISINGCLR	0x78
+#define IC_FALLINGRD	0x7C
+#define IC_FALLINGCLR	0x7C
+#define IC_TESTBIT	0x80
+
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type);
 
 /* NOTE on interrupt priorities: The original writers of this code said:
@@ -221,89 +251,101 @@ struct au1xxx_irqmap au1200_irqmap[] __initdata = {
 static void au1x_ic0_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKSET);
-	au_writel(1 << bit, IC0_WAKESET);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
+	wmb();
 }
 
 static void au1x_ic1_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKSET);
-	au_writel(1 << bit, IC1_WAKESET);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
 
 /* very hacky. does the pb1000 cpld auto-disable this int?
  * nowhere in the current kernel sources is it disabled.	--mlau
  */
 #if defined(CONFIG_MIPS_PB1000)
 	if (d->irq == AU1000_GPIO15_INT)
-		au_writel(0x4000, PB1000_MDR); /* enable int */
+		__raw_writel(0x4000, (void __iomem *)PB1000_MDR); /* enable int */
 #endif
-	au_sync();
+	wmb();
 }
 
 static void au1x_ic0_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic1_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic0_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic0_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
@@ -318,13 +360,13 @@ static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
 		return -EINVAL;
 
 	local_irq_save(flags);
-	wakemsk = au_readl(SYS_WAKEMSK);
+	wakemsk = __raw_readl((void __iomem *)SYS_WAKEMSK);
 	if (on)
 		wakemsk |= 1 << bit;
 	else
 		wakemsk &= ~(1 << bit);
-	au_writel(wakemsk, SYS_WAKEMSK);
-	au_sync();
+	__raw_writel(wakemsk, (void __iomem *)SYS_WAKEMSK);
+	wmb();
 	local_irq_restore(flags);
 
 	return 0;
@@ -356,81 +398,74 @@ static struct irq_chip au1x_ic1_chip = {
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type)
 {
 	struct irq_chip *chip;
-	unsigned long icr[6];
-	unsigned int bit, ic, irq = d->irq;
+	unsigned int bit, irq = d->irq;
 	irq_flow_handler_t handler = NULL;
 	unsigned char *name = NULL;
+	void __iomem *base;
 	int ret;
 
 	if (irq >= AU1000_INTC1_INT_BASE) {
 		bit = irq - AU1000_INTC1_INT_BASE;
 		chip = &au1x_ic1_chip;
-		ic = 1;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 	} else {
 		bit = irq - AU1000_INTC0_INT_BASE;
 		chip = &au1x_ic0_chip;
-		ic = 0;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 	}
 
 	if (bit > 31)
 		return -EINVAL;
 
-	icr[0] = ic ? IC1_CFG0SET : IC0_CFG0SET;
-	icr[1] = ic ? IC1_CFG1SET : IC0_CFG1SET;
-	icr[2] = ic ? IC1_CFG2SET : IC0_CFG2SET;
-	icr[3] = ic ? IC1_CFG0CLR : IC0_CFG0CLR;
-	icr[4] = ic ? IC1_CFG1CLR : IC0_CFG1CLR;
-	icr[5] = ic ? IC1_CFG2CLR : IC0_CFG2CLR;
-
 	ret = 0;
 
 	switch (flow_type) {	/* cfgregs 2:1:0 */
 	case IRQ_TYPE_EDGE_RISING:	/* 0:0:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "riseedge";
 		break;
 	case IRQ_TYPE_EDGE_FALLING:	/* 0:1:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_edge_irq;
 		name = "falledge";
 		break;
 	case IRQ_TYPE_EDGE_BOTH:	/* 0:1:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "bothedge";
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:	/* 1:0:1 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_level_irq;
 		name = "hilevel";
 		break;
 	case IRQ_TYPE_LEVEL_LOW:	/* 1:1:0 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_level_irq;
 		name = "lowlevel";
 		break;
 	case IRQ_TYPE_NONE:		/* 0:0:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		break;
 	default:
 		ret = -EINVAL;
 	}
 	__irq_set_chip_handler_name_locked(d->irq, chip, handler, name);
 
-	au_sync();
+	wmb();
 
 	return ret;
 }
@@ -444,21 +479,21 @@ asmlinkage void plat_irq_dispatch(void)
 		off = MIPS_CPU_IRQ_BASE + 7;
 		goto handle;
 	} else if (pending & CAUSEF_IP2) {
-		s = IC0_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP3) {
-		s = IC0_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP4) {
-		s = IC1_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else if (pending & CAUSEF_IP5) {
-		s = IC1_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else
 		goto spurious;
 
-	s = au_readl(s);
+	s = __raw_readl((void __iomem *)s);
 	if (unlikely(!s)) {
 spurious:
 		spurious_interrupt();
@@ -469,48 +504,42 @@ handle:
 	do_IRQ(off);
 }
 
+
+static inline void ic_init(void __iomem *base)
+{
+	/* initialize interrupt controller to a safe state */
+	__raw_writel(0xffffffff, base + IC_CFG0CLR);
+	__raw_writel(0xffffffff, base + IC_CFG1CLR);
+	__raw_writel(0xffffffff, base + IC_CFG2CLR);
+	__raw_writel(0xffffffff, base + IC_MASKCLR);
+	__raw_writel(0xffffffff, base + IC_ASSIGNCLR);
+	__raw_writel(0xffffffff, base + IC_WAKECLR);
+	__raw_writel(0xffffffff, base + IC_SRCSET);
+	__raw_writel(0xffffffff, base + IC_FALLINGCLR);
+	__raw_writel(0xffffffff, base + IC_RISINGCLR);
+	__raw_writel(0x00000000, base + IC_TESTBIT);
+	wmb();
+}
+
 static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 {
 	unsigned int bit, irq_nr;
-	int i;
-
-	/*
-	 * Initialize interrupt controllers to a safe state.
-	 */
-	au_writel(0xffffffff, IC0_CFG0CLR);
-	au_writel(0xffffffff, IC0_CFG1CLR);
-	au_writel(0xffffffff, IC0_CFG2CLR);
-	au_writel(0xffffffff, IC0_MASKCLR);
-	au_writel(0xffffffff, IC0_ASSIGNCLR);
-	au_writel(0xffffffff, IC0_WAKECLR);
-	au_writel(0xffffffff, IC0_SRCSET);
-	au_writel(0xffffffff, IC0_FALLINGCLR);
-	au_writel(0xffffffff, IC0_RISINGCLR);
-	au_writel(0x00000000, IC0_TESTBIT);
-
-	au_writel(0xffffffff, IC1_CFG0CLR);
-	au_writel(0xffffffff, IC1_CFG1CLR);
-	au_writel(0xffffffff, IC1_CFG2CLR);
-	au_writel(0xffffffff, IC1_MASKCLR);
-	au_writel(0xffffffff, IC1_ASSIGNCLR);
-	au_writel(0xffffffff, IC1_WAKECLR);
-	au_writel(0xffffffff, IC1_SRCSET);
-	au_writel(0xffffffff, IC1_FALLINGCLR);
-	au_writel(0xffffffff, IC1_RISINGCLR);
-	au_writel(0x00000000, IC1_TESTBIT);
+	void __iomem *base;
 
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR));
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR));
 	mips_cpu_irq_init();
 
 	/* register all 64 possible IC0+IC1 irq sources as type "none".
 	 * Use set_irq_type() to set edge/level behaviour at runtime.
 	 */
-	for (i = AU1000_INTC0_INT_BASE;
-	     (i < AU1000_INTC0_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC0_INT_BASE;
+	     (irq_nr < AU1000_INTC0_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
-	for (i = AU1000_INTC1_INT_BASE;
-	     (i < AU1000_INTC1_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC1_INT_BASE;
+	     (irq_nr < AU1000_INTC1_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
 	/*
 	 * Initialize IC0, which is fixed per processor.
@@ -520,13 +549,13 @@ static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 
 		if (irq_nr >= AU1000_INTC1_INT_BASE) {
 			bit = irq_nr - AU1000_INTC1_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC1_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 		} else {
 			bit = irq_nr - AU1000_INTC0_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC0_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 		}
+		if (map->im_request)
+			__raw_writel(1 << bit, base + IC_ASSIGNSET);
 
 		au1x_ic_settype(irq_get_irq_data(irq_nr), map->im_type);
 		++map;
@@ -556,90 +585,62 @@ void __init arch_init_irq(void)
 	}
 }
 
-struct alchemy_ic_sysdev {
-	struct sys_device sysdev;
-	void __iomem *base;
-	unsigned long pmdata[7];
-};
 
-static int alchemy_ic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
+static unsigned long alchemy_ic_pmdata[7 * 2];
 
-	icdev->pmdata[0] = __raw_readl(icdev->base + IC_CFG0RD);
-	icdev->pmdata[1] = __raw_readl(icdev->base + IC_CFG1RD);
-	icdev->pmdata[2] = __raw_readl(icdev->base + IC_CFG2RD);
-	icdev->pmdata[3] = __raw_readl(icdev->base + IC_SRCRD);
-	icdev->pmdata[4] = __raw_readl(icdev->base + IC_ASSIGNRD);
-	icdev->pmdata[5] = __raw_readl(icdev->base + IC_WAKERD);
-	icdev->pmdata[6] = __raw_readl(icdev->base + IC_MASKRD);
-
-	return 0;
+static inline void alchemy_ic_suspend_one(void __iomem *base, unsigned long *d)
+{
+	d[0] = __raw_readl(base + IC_CFG0RD);
+	d[1] = __raw_readl(base + IC_CFG1RD);
+	d[2] = __raw_readl(base + IC_CFG2RD);
+	d[3] = __raw_readl(base + IC_SRCRD);
+	d[4] = __raw_readl(base + IC_ASSIGNRD);
+	d[5] = __raw_readl(base + IC_WAKERD);
+	d[6] = __raw_readl(base + IC_MASKRD);
+	ic_init(base);		/* shut it up too while at it */
 }
 
-static int alchemy_ic_resume(struct sys_device *dev)
+static inline void alchemy_ic_resume_one(void __iomem *base, unsigned long *d)
 {
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
-
-	__raw_writel(0xffffffff, icdev->base + IC_MASKCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG0CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG1CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG2CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_SRCCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_ASSIGNCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_WAKECLR);
-	__raw_writel(0xffffffff, icdev->base + IC_RISINGCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_FALLINGCLR);
-	__raw_writel(0x00000000, icdev->base + IC_TESTBIT);
-	wmb();
-	__raw_writel(icdev->pmdata[0], icdev->base + IC_CFG0SET);
-	__raw_writel(icdev->pmdata[1], icdev->base + IC_CFG1SET);
-	__raw_writel(icdev->pmdata[2], icdev->base + IC_CFG2SET);
-	__raw_writel(icdev->pmdata[3], icdev->base + IC_SRCSET);
-	__raw_writel(icdev->pmdata[4], icdev->base + IC_ASSIGNSET);
-	__raw_writel(icdev->pmdata[5], icdev->base + IC_WAKESET);
+	ic_init(base);
+
+	__raw_writel(d[0], base + IC_CFG0SET);
+	__raw_writel(d[1], base + IC_CFG1SET);
+	__raw_writel(d[2], base + IC_CFG2SET);
+	__raw_writel(d[3], base + IC_SRCSET);
+	__raw_writel(d[4], base + IC_ASSIGNSET);
+	__raw_writel(d[5], base + IC_WAKESET);
 	wmb();
 
-	__raw_writel(icdev->pmdata[6], icdev->base + IC_MASKSET);
+	__raw_writel(d[6], base + IC_MASKSET);
 	wmb();
+}
 
+static int alchemy_ic_suspend(void)
+{
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			       alchemy_ic_pmdata);
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			       &alchemy_ic_pmdata[7]);
 	return 0;
 }
 
-static struct sysdev_class alchemy_ic_sysdev_class = {
-	.name		= "ic",
+static void alchemy_ic_resume(void)
+{
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			      &alchemy_ic_pmdata[7]);
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			      alchemy_ic_pmdata);
+}
+
+static struct syscore_ops alchemy_ic_syscore_ops = {
 	.suspend	= alchemy_ic_suspend,
 	.resume		= alchemy_ic_resume,
 };
 
-static int __init alchemy_ic_sysdev_init(void)
+static int __init alchemy_ic_pm_init(void)
 {
-	struct alchemy_ic_sysdev *icdev;
-	unsigned long icbase[2] = { IC0_PHYS_ADDR, IC1_PHYS_ADDR };
-	int err, i;
-
-	err = sysdev_class_register(&alchemy_ic_sysdev_class);
-	if (err)
-		return err;
-
-	for (i = 0; i < 2; i++) {
-		icdev = kzalloc(sizeof(struct alchemy_ic_sysdev), GFP_KERNEL);
-		if (!icdev)
-			return -ENOMEM;
-
-		icdev->base = ioremap(icbase[i], 0x1000);
-
-		icdev->sysdev.id = i;
-		icdev->sysdev.cls = &alchemy_ic_sysdev_class;
-		err = sysdev_register(&icdev->sysdev);
-		if (err) {
-			kfree(icdev);
-			return err;
-		}
-	}
-
+	register_syscore_ops(&alchemy_ic_syscore_ops);
 	return 0;
 }
-device_initcall(alchemy_ic_sysdev_init);
+device_initcall(alchemy_ic_pm_init);
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 9e7814db3d0..3b2c18b1434 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -13,9 +13,10 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
-#include <linux/init.h>
+#include <linux/slab.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
@@ -30,21 +31,12 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 #ifdef CONFIG_SERIAL_8250
 	switch (state) {
 	case 0:
-		if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) {
-			/* power-on sequence as suggested in the databooks */
-			__raw_writel(0, port->membase + UART_MOD_CNTRL);
-			wmb();
-			__raw_writel(1, port->membase + UART_MOD_CNTRL);
-			wmb();
-		}
-		__raw_writel(3, port->membase + UART_MOD_CNTRL); /* full on */
-		wmb();
+		alchemy_uart_enable(CPHYSADDR(port->membase));
 		serial8250_do_pm(port, state, old_state);
 		break;
 	case 3:		/* power off */
 		serial8250_do_pm(port, state, old_state);
-		__raw_writel(0, port->membase + UART_MOD_CNTRL);
-		wmb();
+		alchemy_uart_disable(CPHYSADDR(port->membase));
 		break;
 	default:
 		serial8250_do_pm(port, state, old_state);
@@ -65,38 +57,60 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 		.pm		= alchemy_8250_pm,		\
 	}
 
-static struct plat_serial8250_port au1x00_uart_data[] = {
-#if defined(CONFIG_SOC_AU1000)
-	PORT(UART0_PHYS_ADDR, AU1000_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1000_UART1_INT),
-	PORT(UART2_PHYS_ADDR, AU1000_UART2_INT),
-	PORT(UART3_PHYS_ADDR, AU1000_UART3_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	PORT(UART0_PHYS_ADDR, AU1500_UART0_INT),
-	PORT(UART3_PHYS_ADDR, AU1500_UART3_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	PORT(UART0_PHYS_ADDR, AU1100_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1100_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1100_UART3_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	PORT(UART0_PHYS_ADDR, AU1550_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1550_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1550_UART3_INT),
-#elif defined(CONFIG_SOC_AU1200)
-	PORT(UART0_PHYS_ADDR, AU1200_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1200_UART1_INT),
-#endif
-	{ },
+static struct plat_serial8250_port au1x00_uart_data[][4] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1000_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1000_UART1_INT),
+		PORT(AU1000_UART2_PHYS_ADDR, AU1000_UART2_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1000_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1500_UART0_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1500_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1100_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1100_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1100_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1550_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1550_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1550_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1200] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1200_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1200_UART1_INT),
+	},
 };
 
 static struct platform_device au1xx0_uart_device = {
 	.name			= "serial8250",
 	.id			= PLAT8250_DEV_AU1X00,
-	.dev			= {
-		.platform_data	= au1x00_uart_data,
-	},
 };
 
+static void __init alchemy_setup_uarts(int ctype)
+{
+	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
+	int s = sizeof(struct plat_serial8250_port);
+	int c = alchemy_get_uarts(ctype);
+	struct plat_serial8250_port *ports;
+
+	ports = kzalloc(s * (c + 1), GFP_KERNEL);
+	if (!ports) {
+		printk(KERN_INFO "Alchemy: no memory for UART data\n");
+		return;
+	}
+	memcpy(ports, au1x00_uart_data[ctype], s * c);
+	au1xx0_uart_device.dev.platform_data = ports;
+
+	/* Fill up uartclk. */
+	for (s = 0; s < c; s++)
+		ports[s].uartclk = uartclk;
+	if (platform_device_register(&au1xx0_uart_device))
+		printk(KERN_INFO "Alchemy: failed to register UARTs\n");
+}
+
 /* OHCI (USB full speed host controller) */
 static struct resource au1xxx_usb_ohci_resources[] = {
 	[0] = {
@@ -269,8 +283,8 @@ extern struct au1xmmc_platform_data au1xmmc_platdata[2];
 
 static struct resource au1200_mmc0_resources[] = {
 	[0] = {
-		.start          = SD0_PHYS_ADDR,
-		.end            = SD0_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD0_PHYS_ADDR,
+		.end            = AU1100_SD0_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
@@ -305,8 +319,8 @@ static struct platform_device au1200_mmc0_device = {
 #ifndef CONFIG_MIPS_DB1200
 static struct resource au1200_mmc1_resources[] = {
 	[0] = {
-		.start          = SD1_PHYS_ADDR,
-		.end            = SD1_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD1_PHYS_ADDR,
+		.end            = AU1100_SD1_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
@@ -359,15 +373,16 @@ static struct platform_device pbdb_smbus_device = {
 #endif
 
 /* Macro to help defining the Ethernet MAC resources */
+#define MAC_RES_COUNT	3	/* MAC regs base, MAC enable reg, MAC INT */
 #define MAC_RES(_base, _enable, _irq)			\
 	{						\
-		.start	= CPHYSADDR(_base),		\
-		.end	= CPHYSADDR(_base + 0xffff),	\
+		.start	= _base,			\
+		.end	= _base + 0xffff,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
-		.start	= CPHYSADDR(_enable),		\
-		.end	= CPHYSADDR(_enable + 0x3),	\
+		.start	= _enable,			\
+		.end	= _enable + 0x3,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
@@ -376,19 +391,29 @@ static struct platform_device pbdb_smbus_device = {
 		.flags	= IORESOURCE_IRQ		\
 	}
 
-static struct resource au1xxx_eth0_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH0_BASE, AU1000_MAC0_ENABLE, AU1000_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	MAC_RES(AU1100_ETH0_BASE, AU1100_MAC0_ENABLE, AU1100_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH0_BASE, AU1550_MAC0_ENABLE, AU1550_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH0_BASE, AU1500_MAC0_ENABLE, AU1500_MAC0_DMA_INT),
-#endif
+static struct resource au1xxx_eth0_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1000_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC0_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR,
+			AU1500_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1100_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1550_MAC0_DMA_INT)
+	},
 };
 
-
 static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 	.phy1_search_mac0 = 1,
 };
@@ -396,20 +421,26 @@ static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 static struct platform_device au1xxx_eth0_device = {
 	.name		= "au1000-eth",
 	.id		= 0,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth0_resources),
-	.resource	= au1xxx_eth0_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth0_platform_data,
 };
 
-#ifndef CONFIG_SOC_AU1100
-static struct resource au1xxx_eth1_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH1_BASE, AU1000_MAC1_ENABLE, AU1000_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH1_BASE, AU1550_MAC1_ENABLE, AU1550_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH1_BASE, AU1500_MAC1_ENABLE, AU1500_MAC1_DMA_INT),
-#endif
+static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1000_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC1_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR + 4,
+			AU1500_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1550_MAC1_DMA_INT)
+	},
 };
 
 static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
@@ -419,11 +450,9 @@ static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
 static struct platform_device au1xxx_eth1_device = {
 	.name		= "au1000-eth",
 	.id		= 1,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth1_resources),
-	.resource	= au1xxx_eth1_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth1_platform_data,
 };
-#endif
 
 void __init au1xxx_override_eth_cfg(unsigned int port,
 			struct au1000_eth_platform_data *eth_data)
@@ -434,15 +463,65 @@ void __init au1xxx_override_eth_cfg(unsigned int port,
 	if (port == 0)
 		memcpy(&au1xxx_eth0_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#ifndef CONFIG_SOC_AU1100
 	else
 		memcpy(&au1xxx_eth1_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#endif
+}
+
+static void __init alchemy_setup_macs(int ctype)
+{
+	int ret, i;
+	unsigned char ethaddr[6];
+	struct resource *macres;
+
+	/* Handle 1st MAC */
+	if (alchemy_get_macs(ctype) < 1)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC0 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth0_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth0_device.resource = macres;
+
+	i = prom_get_ethernet_addr(ethaddr);
+	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
+		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+
+	ret = platform_device_register(&au1xxx_eth0_device);
+	if (!ret)
+		printk(KERN_INFO "Alchemy: failed to register MAC0\n");
+
+
+	/* Handle 2nd MAC */
+	if (alchemy_get_macs(ctype) < 2)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC1 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth1_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth1_device.resource = macres;
+
+	ethaddr[5] += 1;	/* next addr for 2nd MAC */
+	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
+		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
+
+	/* Register second MAC if enabled in pinfunc */
+	if (!(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2)) {
+		ret = platform_device_register(&au1xxx_eth1_device);
+		if (ret)
+			printk(KERN_INFO "Alchemy: failed to register MAC1\n");
+	}
 }
 
 static struct platform_device *au1xxx_platform_devices[] __initdata = {
-	&au1xx0_uart_device,
 	&au1xxx_usb_ohci_device,
 #ifdef CONFIG_FB_AU1100
 	&au1100_lcd_device,
@@ -460,36 +539,17 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 #ifdef SMBUS_PSC_BASE
 	&pbdb_smbus_device,
 #endif
-	&au1xxx_eth0_device,
 };
 
 static int __init au1xxx_platform_init(void)
 {
-	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
-	int err, i;
-	unsigned char ethaddr[6];
+	int err, ctype = alchemy_get_cputype();
 
-	/* Fill up uartclk. */
-	for (i = 0; au1x00_uart_data[i].flags; i++)
-		au1x00_uart_data[i].uartclk = uartclk;
-
-	/* use firmware-provided mac addr if available and necessary */
-	i = prom_get_ethernet_addr(ethaddr);
-	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
-		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+	alchemy_setup_uarts(ctype);
+	alchemy_setup_macs(ctype);
 
 	err = platform_add_devices(au1xxx_platform_devices,
 				   ARRAY_SIZE(au1xxx_platform_devices));
-#ifndef CONFIG_SOC_AU1100
-	ethaddr[5] += 1;	/* next addr for 2nd MAC */
-	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
-		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
-
-	/* Register second MAC if enabled in pinfunc */
-	if (!err && !(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2))
-		err = platform_device_register(&au1xxx_eth1_device);
-#endif
-
 	return err;
 }
 
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 561e5da2658..1b887c86841 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -52,8 +52,6 @@ void __init plat_mem_setup(void)
 	/* this is faster than wasting cycles trying to approximate it */
 	preset_lpj = (est_freq >> 1) / HZ;
 
-	board_setup();  /* board specific setup */
-
 	if (au1xxx_cpu_needs_config_od())
 		/* Various early Au1xx0 errata corrected by this */
 		set_c0_config(1 << 19); /* Set Config[OD] */
@@ -61,6 +59,8 @@ void __init plat_mem_setup(void)
 		/* Clear to obtain best system bus performance */
 		clear_c0_config(1 << 19); /* Clear Config[OD] */
 
+	board_setup();  /* board specific setup */
+
 	/* IO/MEM resources. */
 	set_io_port_base(0);
 	ioport_resource.start = IOPORT_RESOURCE_START;
diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c
index 2aecb2fdf98..d5da6adbf63 100644
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -141,8 +141,7 @@ static int __init alchemy_time_init(unsigned int m2int)
 		goto cntr_err;
 
 	/* register counter1 clocksource and event device */
-	clocksource_set_clock(&au1x_counter1_clocksource, 32768);
-	clocksource_register(&au1x_counter1_clocksource);
+	clocksource_register_hz(&au1x_counter1_clocksource, 32768);
 
 	cd->shift = 32;
 	cd->mult = div_sc(32768, NSEC_PER_SEC, cd->shift);
diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c
index 4a8980027ec..1dac4f27d33 100644
--- a/arch/mips/alchemy/devboards/db1200/setup.c
+++ b/arch/mips/alchemy/devboards/db1200/setup.c
@@ -23,6 +23,13 @@ void __init board_setup(void)
 	unsigned long freq0, clksrc, div, pfc;
 	unsigned short whoami;
 
+	/* Set Config[OD] (disable overlapping bus transaction):
+	 * This gets rid of a _lot_ of spurious interrupts (especially
+	 * wrt. IDE); but incurs ~10% performance hit in some
+	 * cpu-bound applications.
+	 */
+	set_c0_config(1 << 19);
+
 	bcsr_init(DB1200_BCSR_PHYS_ADDR,
 		  DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS);
 
diff --git a/arch/mips/alchemy/devboards/db1x00/board_setup.c b/arch/mips/alchemy/devboards/db1x00/board_setup.c
index 05f120ff90f..5c956fe8760 100644
--- a/arch/mips/alchemy/devboards/db1x00/board_setup.c
+++ b/arch/mips/alchemy/devboards/db1x00/board_setup.c
@@ -127,13 +127,10 @@ const char *get_system_type(void)
 void __init board_setup(void)
 {
 	unsigned long bcsr1, bcsr2;
-	u32 pin_func;
 
 	bcsr1 = DB1000_BCSR_PHYS_ADDR;
 	bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS;
 
-	pin_func = 0;
-
 #ifdef CONFIG_MIPS_DB1000
 	printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n");
 #endif
@@ -164,12 +161,16 @@ void __init board_setup(void)
 	/* Not valid for Au1550 */
 #if defined(CONFIG_IRDA) && \
    (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100))
-	/* Set IRFIRSEL instead of GPIO15 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
-	au_writel(pin_func, SYS_PINFUNC);
-	/* Power off until the driver is in use */
-	bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
-				BCSR_RESETS_IRDA_MODE_OFF);
+	{
+		u32 pin_func;
+
+		/* Set IRFIRSEL instead of GPIO15 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
+		au_writel(pin_func, SYS_PINFUNC);
+		/* Power off until the driver is in use */
+		bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
+			 BCSR_RESETS_IRDA_MODE_OFF);
+	}
 #endif
 	bcsr_write(BCSR_PCMCIA, 0);	/* turn off PCMCIA power */
 
@@ -177,31 +178,35 @@ void __init board_setup(void)
 	alchemy_gpio1_input_enable();
 
 #ifdef CONFIG_MIPS_MIRAGE
-	/* GPIO[20] is output */
-	alchemy_gpio_direction_output(20, 0);
+	{
+		u32 pin_func;
 
-	/* Set GPIO[210:208] instead of SSI_0 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
+		/* GPIO[20] is output */
+		alchemy_gpio_direction_output(20, 0);
 
-	/* Set GPIO[215:211] for LEDs */
-	pin_func |= 5 << 2;
+		/* Set GPIO[210:208] instead of SSI_0 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
 
-	/* Set GPIO[214:213] for more LEDs */
-	pin_func |= 5 << 12;
+		/* Set GPIO[215:211] for LEDs */
+		pin_func |= 5 << 2;
 
-	/* Set GPIO[207:200] instead of PCMCIA/LCD */
-	pin_func |= SYS_PF_LCD | SYS_PF_PC;
-	au_writel(pin_func, SYS_PINFUNC);
+		/* Set GPIO[214:213] for more LEDs */
+		pin_func |= 5 << 12;
 
-	/*
-	 * Enable speaker amplifier.  This should
-	 * be part of the audio driver.
-	 */
-	alchemy_gpio_direction_output(209, 1);
+		/* Set GPIO[207:200] instead of PCMCIA/LCD */
+		pin_func |= SYS_PF_LCD | SYS_PF_PC;
+		au_writel(pin_func, SYS_PINFUNC);
 
-	pm_power_off = mirage_power_off;
-	_machine_halt = mirage_power_off;
-	_machine_restart = (void(*)(char *))mips_softreset;
+		/*
+		 * Enable speaker amplifier.  This should
+		 * be part of the audio driver.
+		 */
+		alchemy_gpio_direction_output(209, 1);
+
+		pm_power_off = mirage_power_off;
+		_machine_halt = mirage_power_off;
+		_machine_restart = (void(*)(char *))mips_softreset;
+	}
 #endif
 
 #ifdef CONFIG_MIPS_BOSPORUS
diff --git a/arch/mips/alchemy/devboards/pb1000/board_setup.c b/arch/mips/alchemy/devboards/pb1000/board_setup.c
index 2d85c4b5be0..e64fdcbf75d 100644
--- a/arch/mips/alchemy/devboards/pb1000/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1000/board_setup.c
@@ -65,7 +65,7 @@ void __init board_setup(void)
 
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 #if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE)
diff --git a/arch/mips/alchemy/devboards/pb1500/board_setup.c b/arch/mips/alchemy/devboards/pb1500/board_setup.c
index 83f46215eb0..3b4fa320696 100644
--- a/arch/mips/alchemy/devboards/pb1500/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1500/board_setup.c
@@ -56,7 +56,7 @@ void __init board_setup(void)
 	sys_clksrc = sys_freqctrl = pin_func = 0;
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 	/* GPIO201 is input for PCMCIA card detect */
diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c
index baeb2138505..e5306b56da6 100644
--- a/arch/mips/alchemy/devboards/prom.c
+++ b/arch/mips/alchemy/devboards/prom.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-    alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/gpr/board_setup.c b/arch/mips/alchemy/gpr/board_setup.c
index ad2e3f13793..5f8f0691ed2 100644
--- a/arch/mips/alchemy/gpr/board_setup.c
+++ b/arch/mips/alchemy/gpr/board_setup.c
@@ -36,9 +36,6 @@
 
 #include <prom.h>
 
-#define UART1_ADDR	KSEG1ADDR(UART1_PHYS_ADDR)
-#define UART3_ADDR	KSEG1ADDR(UART3_PHYS_ADDR)
-
 char irq_tab_alchemy[][5] __initdata = {
 	[0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff },
 };
@@ -67,18 +64,15 @@ static void gpr_power_off(void)
 
 void __init board_setup(void)
 {
-	printk(KERN_INFO "Tarpeze ITS GPR board\n");
+	printk(KERN_INFO "Trapeze ITS GPR board\n");
 
 	pm_power_off = gpr_power_off;
 	_machine_halt = gpr_power_off;
 	_machine_restart = gpr_reset;
 
-	/* Enable UART3 */
-	au_writel(0x1, UART3_ADDR + UART_MOD_CNTRL);/* clock enable (CE) */
-	au_writel(0x3, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	/* Enable UART1 */
-	au_writel(0x1, UART1_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	au_writel(0x3, UART1_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
+	/* Enable UART1/3 */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	alchemy_uart_enable(AU1000_UART1_PHYS_ADDR);
 
 	/* Take away Reset of UMTS-card */
 	alchemy_gpio_direction_output(215, 1);
diff --git a/arch/mips/alchemy/gpr/init.c b/arch/mips/alchemy/gpr/init.c
index f044f4c541d..229aafae680 100644
--- a/arch/mips/alchemy/gpr/init.c
+++ b/arch/mips/alchemy/gpr/init.c
@@ -59,5 +59,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index cf436ab679a..3ae984cf98c 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -87,7 +87,7 @@ void __init board_setup(void)
 	au_writel(SYS_PF_NI2, SYS_PINFUNC);
 
 	/* Initialize GPIO */
-	au_writel(0xFFFFFFFF, SYS_TRIOUTCLR);
+	au_writel(~0, KSEG1ADDR(AU1000_SYS_PHYS_ADDR) + SYS_TRIOUTCLR);
 	alchemy_gpio_direction_output(0, 0);	/* Disable M66EN (PCI 66MHz) */
 	alchemy_gpio_direction_output(3, 1);	/* Disable PCI CLKRUN# */
 	alchemy_gpio_direction_output(1, 1);	/* Enable EXT_IO3 */
diff --git a/arch/mips/alchemy/mtx-1/init.c b/arch/mips/alchemy/mtx-1/init.c
index f8d25575fa0..2e81cc7f342 100644
--- a/arch/mips/alchemy/mtx-1/init.c
+++ b/arch/mips/alchemy/mtx-1/init.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index 956f946218c..55628e390fd 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -53,8 +53,8 @@ static struct platform_device mtx1_button = {
 
 static struct resource mtx1_wdt_res[] = {
 	[0] = {
-		.start	= 15,
-		.end	= 15,
+		.start	= 215,
+		.end	= 215,
 		.name	= "mtx1-wdt-gpio",
 		.flags	= IORESOURCE_IRQ,
 	}
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index febfb0fb089..81e57fad07a 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -66,13 +66,10 @@ void __init board_setup(void)
 	au_writel(pin_func, SYS_PINFUNC);
 
 	/* Enable UART */
-	au_writel(0x01, UART3_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	mdelay(10);
-	au_writel(0x03, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	mdelay(10);
-
-	/* Enable DTR = USB power up */
-	au_writel(0x01, UART3_ADDR + UART_MCR); /* UART_MCR_DTR is 0x01??? */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	/* Enable DTR (MCR bit 0) = USB power up */
+	__raw_writel(1, (void __iomem *)KSEG1ADDR(AU1000_UART3_PHYS_ADDR + 0x18));
+	wmb();
 
 #ifdef CONFIG_PCI
 #if defined(__MIPSEB__)
diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c
index 15125c2fda7..0ee02cfa989 100644
--- a/arch/mips/alchemy/xxs1500/init.c
+++ b/arch/mips/alchemy/xxs1500/init.c
@@ -51,14 +51,13 @@ void __init prom_init(void)
 	prom_init_cmdline();
 
 	memsize_str = prom_getenv("memsize");
-	if (!memsize_str)
+	if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize))
 		memsize = 0x04000000;
-	else
-		strict_strtoul(memsize_str, 0, &memsize);
+
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 }
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 425dfa5d6e1..bb571bcdb8f 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
 		size = 0x1f;
 	}
 
-	gpch->regs = ioremap_nocache(AR7_REGS_GPIO,
-					AR7_REGS_GPIO + 0x10);
-
+	gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
 	if (!gpch->regs) {
 		printk(KERN_ERR "%s: failed to ioremap regs\n",
 					gpch->chip.label);
diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index e5b6615731e..54db815bc86 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2005 Broadcom Corporation
  * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -23,7 +24,7 @@
 static char nvram_buf[NVRAM_SPACE];
 
 /* Probe for NVRAM header */
-static void __init early_nvram_init(void)
+static void early_nvram_init(void)
 {
 	struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore;
 	struct nvram_header *header;
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index c95f90bf734..73b529b5743 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -3,6 +3,7 @@
  *  Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
  *  Copyright (C) 2006 Michael Buesch <mb@bu3sch.de>
  *  Copyright (C) 2010 Waldemar Brodkorb <wbx@openadk.org>
+ *  Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
@@ -57,10 +58,49 @@ static void bcm47xx_machine_halt(void)
 }
 
 #define READ_FROM_NVRAM(_outvar, name, buf) \
-	if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\
+	if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\
 		sprom->_outvar = simple_strtoul(buf, NULL, 0);
 
-static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
+#define READ_FROM_NVRAM2(_outvar, name1, name2, buf) \
+	if (nvram_getprefix(prefix, name1, buf, sizeof(buf)) >= 0 || \
+	    nvram_getprefix(prefix, name2, buf, sizeof(buf)) >= 0)\
+		sprom->_outvar = simple_strtoul(buf, NULL, 0);
+
+static inline int nvram_getprefix(const char *prefix, char *name,
+				  char *buf, int len)
+{
+	if (prefix) {
+		char key[100];
+
+		snprintf(key, sizeof(key), "%s%s", prefix, name);
+		return nvram_getenv(key, buf, len);
+	}
+
+	return nvram_getenv(name, buf, len);
+}
+
+static u32 nvram_getu32(const char *name, char *buf, int len)
+{
+	int rv;
+	char key[100];
+	u16 var0, var1;
+
+	snprintf(key, sizeof(key), "%s0", name);
+	rv = nvram_getenv(key, buf, len);
+	/* return 0 here so this looks like unset */
+	if (rv < 0)
+		return 0;
+	var0 = simple_strtoul(buf, NULL, 0);
+
+	snprintf(key, sizeof(key), "%s1", name);
+	rv = nvram_getenv(key, buf, len);
+	if (rv < 0)
+		return 0;
+	var1 = simple_strtoul(buf, NULL, 0);
+	return var1 << 16 | var0;
+}
+
+static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 {
 	char buf[100];
 	u32 boardflags;
@@ -69,11 +109,12 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 
 	sprom->revision = 1; /* Fallback: Old hardware does not define this. */
 	READ_FROM_NVRAM(revision, "sromrev", buf);
-	if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0 ||
+	    nvram_getprefix(prefix, "macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->il0mac);
-	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et0macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et0mac);
-	if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et1macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et1mac);
 	READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf);
 	READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf);
@@ -95,20 +136,36 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf);
 	READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf);
 	READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf);
-	READ_FROM_NVRAM(gpio0, "wl0gpio0", buf);
-	READ_FROM_NVRAM(gpio1, "wl0gpio1", buf);
-	READ_FROM_NVRAM(gpio2, "wl0gpio2", buf);
-	READ_FROM_NVRAM(gpio3, "wl0gpio3", buf);
-	READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf);
-	READ_FROM_NVRAM(itssi_a, "pa1itssit", buf);
-	READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf);
+	READ_FROM_NVRAM2(gpio0, "ledbh0", "wl0gpio0", buf);
+	READ_FROM_NVRAM2(gpio1, "ledbh1", "wl0gpio1", buf);
+	READ_FROM_NVRAM2(gpio2, "ledbh2", "wl0gpio2", buf);
+	READ_FROM_NVRAM2(gpio3, "ledbh3", "wl0gpio3", buf);
+	READ_FROM_NVRAM2(maxpwr_bg, "maxp2ga0", "pa0maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_al, "maxp5gla0", "pa1lomaxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_a, "maxp5ga0", "pa1maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_ah, "maxp5gha0", "pa1himaxpwr", buf);
+	READ_FROM_NVRAM2(itssi_bg, "itt5ga0", "pa0itssit", buf);
+	READ_FROM_NVRAM2(itssi_a, "itt2ga0", "pa1itssit", buf);
 	READ_FROM_NVRAM(tri2g, "tri2g", buf);
 	READ_FROM_NVRAM(tri5gl, "tri5gl", buf);
 	READ_FROM_NVRAM(tri5g, "tri5g", buf);
 	READ_FROM_NVRAM(tri5gh, "tri5gh", buf);
+	READ_FROM_NVRAM(txpid2g[0], "txpid2ga0", buf);
+	READ_FROM_NVRAM(txpid2g[1], "txpid2ga1", buf);
+	READ_FROM_NVRAM(txpid2g[2], "txpid2ga2", buf);
+	READ_FROM_NVRAM(txpid2g[3], "txpid2ga3", buf);
+	READ_FROM_NVRAM(txpid5g[0], "txpid5ga0", buf);
+	READ_FROM_NVRAM(txpid5g[1], "txpid5ga1", buf);
+	READ_FROM_NVRAM(txpid5g[2], "txpid5ga2", buf);
+	READ_FROM_NVRAM(txpid5g[3], "txpid5ga3", buf);
+	READ_FROM_NVRAM(txpid5gl[0], "txpid5gla0", buf);
+	READ_FROM_NVRAM(txpid5gl[1], "txpid5gla1", buf);
+	READ_FROM_NVRAM(txpid5gl[2], "txpid5gla2", buf);
+	READ_FROM_NVRAM(txpid5gl[3], "txpid5gla3", buf);
+	READ_FROM_NVRAM(txpid5gh[0], "txpid5gha0", buf);
+	READ_FROM_NVRAM(txpid5gh[1], "txpid5gha1", buf);
+	READ_FROM_NVRAM(txpid5gh[2], "txpid5gha2", buf);
+	READ_FROM_NVRAM(txpid5gh[3], "txpid5gha3", buf);
 	READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf);
 	READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf);
 	READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf);
@@ -120,19 +177,27 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf);
 	READ_FROM_NVRAM(bxa5g, "bxa5g", buf);
 	READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf);
-	READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf);
-	READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf);
-	READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf);
-	READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf);
 
-	if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) {
+	sprom->ofdm2gpo = nvram_getu32("ofdm2gpo", buf, sizeof(buf));
+	sprom->ofdm5glpo = nvram_getu32("ofdm5glpo", buf, sizeof(buf));
+	sprom->ofdm5gpo = nvram_getu32("ofdm5gpo", buf, sizeof(buf));
+	sprom->ofdm5ghpo = nvram_getu32("ofdm5ghpo", buf, sizeof(buf));
+
+	READ_FROM_NVRAM(antenna_gain.ghz24.a0, "ag0", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a1, "ag1", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a2, "ag2", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a3, "ag3", buf);
+	memcpy(&sprom->antenna_gain.ghz5, &sprom->antenna_gain.ghz24,
+	       sizeof(sprom->antenna_gain.ghz5));
+
+	if (nvram_getprefix(prefix, "boardflags", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags_lo = (boardflags & 0x0000FFFFU);
 			sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16;
 		}
 	}
-	if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) {
+	if (nvram_getprefix(prefix, "boardflags2", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags2_lo = (boardflags & 0x0000FFFFU);
@@ -141,6 +206,22 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	}
 }
 
+int bcm47xx_get_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	char prefix[10];
+
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		snprintf(prefix, sizeof(prefix), "pci/%u/%u/",
+			 bus->host_pci->bus->number + 1,
+			 PCI_SLOT(bus->host_pci->devfn));
+		bcm47xx_fill_sprom(out, prefix);
+		return 0;
+	} else {
+		printk(KERN_WARNING "bcm47xx: unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
+
 static int bcm47xx_get_invariants(struct ssb_bus *bus,
 				   struct ssb_init_invariants *iv)
 {
@@ -158,7 +239,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus,
 	if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0)
 		iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0);
 
-	bcm47xx_fill_sprom(&iv->sprom);
+	bcm47xx_fill_sprom(&iv->sprom, NULL);
 
 	if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0)
 		iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10);
@@ -172,6 +253,11 @@ void __init plat_mem_setup(void)
 	char buf[100];
 	struct ssb_mipscore *mcore;
 
+	err = ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom);
+	if (err)
+		printk(KERN_WARNING "bcm47xx: someone else already registered"
+			" a ssb SPROM callback handler (err %d)\n", err);
+
 	err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 8dba8cfb752..40b223b603b 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = {
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -793,8 +804,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
index 88c9d963be8..9a6243676e2 100644
--- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
+++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
@@ -16,8 +16,8 @@
 
 int main(int argc, char *argv[])
 {
+	unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 	struct stat sb;
-	uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 
 	if (argc != 3) {
 		fprintf(stderr, "Usage: %s <pathname> <vmlinux_load_addr>\n",
diff --git a/arch/mips/boot/compressed/uart-alchemy.c b/arch/mips/boot/compressed/uart-alchemy.c
index 1bff22fa089..eb063e6dead 100644
--- a/arch/mips/boot/compressed/uart-alchemy.c
+++ b/arch/mips/boot/compressed/uart-alchemy.c
@@ -3,5 +3,5 @@
 void putc(char c)
 {
 	/* all current (Jan. 2010) in-kernel boards */
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index caae2285816..cad555ebeca 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -1,11 +1,7 @@
-config CAVIUM_OCTEON_SPECIFIC_OPTIONS
-	bool "Enable Octeon specific options"
-	depends on CPU_CAVIUM_OCTEON
-	default "y"
+if CPU_CAVIUM_OCTEON
 
 config CAVIUM_CN63XXP1
 	bool "Enable CN63XXP1 errata worarounds"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  The CN63XXP1 chip requires build time workarounds to
@@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1
 
 config CAVIUM_OCTEON_2ND_KERNEL
 	bool "Build the kernel to be used as a 2nd kernel on the same chip"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  This option configures this kernel to be linked at a different
@@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL
 
 config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 	bool "Enable hardware fixups of unaligned loads and stores"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Configure the Octeon hardware to automatically fix unaligned loads
@@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 
 config CAVIUM_OCTEON_CVMSEG_SIZE
 	int "Number of L1 cache lines reserved for CVMSEG memory"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	range 0 54
 	default 1
 	help
@@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE
 
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Enable locking parts of the kernel into the L2 cache.
@@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC
-	depends on CPU_CAVIUM_OCTEON
 
 config CAVIUM_OCTEON_HELPER
 	def_bool y
@@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH
 
 config SWIOTLB
 	def_bool y
-	depends on CPU_CAVIUM_OCTEON
 	select IOMMU_HELPER
 	select NEED_SG_DMA_LENGTH
+
+
+endif # CPU_CAVIUM_OCTEON
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 26bf71130bf..29d56afbb02 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -105,8 +105,7 @@ unsigned long long notrace sched_clock(void)
 void __init plat_time_init(void)
 {
 	clocksource_mips.rating = 300;
-	clocksource_set_clock(&clocksource_mips, octeon_get_clock_rate());
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
 }
 
 static u64 octeon_udelay_factor;
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 0707fae3f0e..2d9028f1474 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -288,7 +288,6 @@ void octeon_user_io_init(void)
 	union octeon_cvmemctl cvmmemctl;
 	union cvmx_iob_fau_timeout fau_timeout;
 	union cvmx_pow_nw_tim nm_tim;
-	uint64_t cvmctl;
 
 	/* Get the current settings for CP0_CVMMEMCTL_REG */
 	cvmmemctl.u64 = read_c0_cvmmemctl();
@@ -392,12 +391,6 @@ void octeon_user_io_init(void)
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
 
-	/* Move the performance counter interrupts to IRQ 6 */
-	cvmctl = read_c0_cvmctl();
-	cvmctl &= ~(7 << 7);
-	cvmctl |= 6 << 7;
-	write_c0_cvmctl(cvmctl);
-
 	/* Set a default for the hardware timeouts */
 	fau_timeout.u64 = 0;
 	fau_timeout.s.tout_val = 0xfff;
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d..8b606423bbd 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -37,13 +37,15 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	uint64_t action;
 
 	/* Load the mailbox register to figure out what we're supposed to do */
-	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid));
+	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff;
 
 	/* Clear the mailbox to clear the interrupt */
 	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	/* Check if we've been told to flush the icache */
 	if (action & SMP_ICACHE_FLUSH)
@@ -200,16 +202,15 @@ void octeon_prepare_cpus(unsigned int max_cpus)
 	if (labi->labi_signature != LABI_SIGNATURE)
 		panic("The bootloader version on this board is incorrect.");
 #endif
-
-	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffffffff);
+	/*
+	 * Only the low order mailbox bits are used for IPIs, leave
+	 * the other bits alone.
+	 */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff);
 	if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox0", mailbox_interrupt)) {
+			"SMP-IPI", mailbox_interrupt)) {
 		panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
 	}
-	if (request_irq(OCTEON_IRQ_MBOX1, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox1", mailbox_interrupt)) {
-		panic("Cannot request_irq(OCTEON_IRQ_MBOX1)\n");
-	}
 }
 
 /**
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 167c1d07b80..b6acd2f256b 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -86,8 +86,8 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
@@ -329,7 +329,7 @@ CONFIG_USB_LED=m
 CONFIG_USB_GADGET=m
 CONFIG_USB_GADGET_M66592=y
 CONFIG_MMC=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_STAGING=y
 # CONFIG_STAGING_EXCLUDE_BUILD is not set
 CONFIG_FB_SM7XX=y
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 7270f3183bd..5527abbb7de 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -374,7 +374,7 @@ CONFIG_FB_CIRRUS=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_HID=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_IDE_DISK=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index a97a42c6b2c..37862b2ce36 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -225,8 +225,8 @@ CONFIG_TOSHIBA_FIR=m
 CONFIG_VLSI_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
new file mode 100644
index 00000000000..e4b399fdaa6
--- /dev/null
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -0,0 +1,574 @@
+CONFIG_NLM_XLR_BOARD=y
+CONFIG_HIGHMEM=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_SMP=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_KEXEC=y
+CONFIG_EXPERIMENTAL=y
+CONFIG_CROSS_COMPILE="mips64-unknown-linux-gnu-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs"
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_PCSPKR_PLATFORM is not set
+# CONFIG_PERF_EVENTS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_DECNET_NF_GRABULATOR=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_DCCP=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_TIPC=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_DECNET=m
+CONFIG_LLC2=m
+CONFIG_IPX=m
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_WAN_ROUTER=m
+CONFIG_PHONET=m
+CONFIG_IEEE802154=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_DCB=y
+CONFIG_NET_PKTGEN=m
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_CDROM_PKTCDVD=y
+CONFIG_MISC_DEVICES=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_TGT_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_RAW=m
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_N_HDLC=m
+# CONFIG_DEVKMEM is not set
+CONFIG_STALDRV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_RAW_DRIVER=m
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_UIO=y
+CONFIG_UIO_PDRV=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_FSCACHE_HISTOGRAM=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_ADFS_FS=m
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_VXFS_FS=m
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EXOFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_EXPERIMENTAL=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+CONFIG_AFS_FS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ACORN_PARTITION=y
+CONFIG_ACORN_PARTITION_ICS=y
+CONFIG_ACORN_PARTITION_RISCIX=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SYSV68_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="cp437"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KGDB=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=0
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_SMACK=y
+CONFIG_SECURITY_TOMOYO=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC7=m
diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index 650ac9ba734..b4db69fbc40 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -17,6 +17,6 @@
 #define SMP_CACHE_SHIFT		L1_CACHE_SHIFT
 #define SMP_CACHE_BYTES		L1_CACHE_BYTES
 
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h
index fa4328f9124..65f9bdd02f1 100644
--- a/arch/mips/include/asm/cevt-r4k.h
+++ b/arch/mips/include/asm/cevt-r4k.h
@@ -14,6 +14,9 @@
 #ifndef __ASM_CEVT_R4K_H
 #define __ASM_CEVT_R4K_H
 
+#include <linux/clockchips.h>
+#include <asm/time.h>
+
 DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void mips_event_handler(struct clock_event_device *dev);
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index e7af793e536..5f95a4bfc73 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -33,6 +33,7 @@
 #define PRID_COMP_TOSHIBA	0x070000
 #define PRID_COMP_LSI		0x080000
 #define PRID_COMP_LEXRA		0x0b0000
+#define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
 #define PRID_COMP_INGENIC	0xd00000
 
@@ -142,6 +143,31 @@
 #define PRID_IMP_JZRISC        0x0200
 
 /*
+ * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC
+ */
+#define PRID_IMP_NETLOGIC_XLR732	0x0000
+#define PRID_IMP_NETLOGIC_XLR716	0x0200
+#define PRID_IMP_NETLOGIC_XLR532	0x0900
+#define PRID_IMP_NETLOGIC_XLR308	0x0600
+#define PRID_IMP_NETLOGIC_XLR532C	0x0800
+#define PRID_IMP_NETLOGIC_XLR516C	0x0a00
+#define PRID_IMP_NETLOGIC_XLR508C	0x0b00
+#define PRID_IMP_NETLOGIC_XLR308C	0x0f00
+#define PRID_IMP_NETLOGIC_XLS608	0x8000
+#define PRID_IMP_NETLOGIC_XLS408	0x8800
+#define PRID_IMP_NETLOGIC_XLS404	0x8c00
+#define PRID_IMP_NETLOGIC_XLS208	0x8e00
+#define PRID_IMP_NETLOGIC_XLS204	0x8f00
+#define PRID_IMP_NETLOGIC_XLS108	0xce00
+#define PRID_IMP_NETLOGIC_XLS104	0xcf00
+#define PRID_IMP_NETLOGIC_XLS616B	0x4000
+#define PRID_IMP_NETLOGIC_XLS608B	0x4a00
+#define PRID_IMP_NETLOGIC_XLS416B	0x4400
+#define PRID_IMP_NETLOGIC_XLS412B	0x4c00
+#define PRID_IMP_NETLOGIC_XLS408B	0x4e00
+#define PRID_IMP_NETLOGIC_XLS404B	0x4f00
+
+/*
  * Definitions for 7:0 on legacy processors
  */
 
@@ -234,6 +260,7 @@ enum cpu_type_enum {
 	 */
 	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
+	CPU_XLR,
 
 	CPU_LAST
 };
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 655f849bd08..7aa37ddfca4 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -5,7 +5,9 @@
 #include <asm/cache.h>
 #include <asm-generic/dma-coherent.h>
 
+#ifndef CONFIG_SGI_IP27	/* Kludge to fix 2.6.39 build for IP27 */
 #include <dma-coherence.h>
+#endif
 
 extern struct dma_map_ops *mips_dma_map_ops;
 
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index f5e85601532..c565b7c3f0b 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
+	flush_tlb_mm(vma->vm_mm);
 }
 
 static inline int huge_pte_none(pte_t pte)
diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h
index 48bb8237299..9ad011366f7 100644
--- a/arch/mips/include/asm/i8253.h
+++ b/arch/mips/include/asm/i8253.h
@@ -12,8 +12,13 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH		LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern void setup_pit_timer(void);
 
+#define inb_pit inb_p
+#define outb_pit outb_p
+
 #endif /* __ASM_I8253_H */
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf7507..1881b316ca4 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
 #define WORD_INSN ".word"
 #endif
 
-#define JUMP_LABEL(key, label)						\
-	do {								\
-		asm goto("1:\tnop\n\t"					\
-			"nop\n\t"					\
-			".pushsection __jump_table,  \"a\"\n\t"		\
-			WORD_INSN " 1b, %l[" #label "], %0\n\t"		\
-			".popsection\n\t"				\
-			: :  "i" (key) :  : label);			\
-	} while (0)
-
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:\tnop\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index a6976619160..f260ebed713 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -161,6 +161,45 @@ static inline int alchemy_get_cputype(void)
 	return ALCHEMY_CPU_UNKNOWN;
 }
 
+/* return number of uarts on a given cputype */
+static inline int alchemy_get_uarts(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+		return 4;
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1200:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+	case ALCHEMY_CPU_AU1550:
+		return 3;
+	}
+	return 0;
+}
+
+/* enable an UART block if it isn't already */
+static inline void alchemy_uart_enable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+
+	/* reset, enable clock, deassert reset */
+	if ((__raw_readl(addr + 0x100) & 3) != 3) {
+		__raw_writel(0, addr + 0x100);
+		wmb();
+		__raw_writel(1, addr + 0x100);
+		wmb();
+	}
+	__raw_writel(3, addr + 0x100);
+	wmb();
+}
+
+static inline void alchemy_uart_disable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+	__raw_writel(0, addr + 0x100);	/* UART_MOD_CNTRL */
+	wmb();
+}
+
 static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 {
 	void __iomem *base = (void __iomem *)KSEG1ADDR(uart_phys);
@@ -180,6 +219,20 @@ static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 	wmb();
 }
 
+/* return number of ethernet MACs on a given cputype */
+static inline int alchemy_get_macs(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1550:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+		return 1;
+	}
+	return 0;
+}
+
 /* arch/mips/au1000/common/clocks.c */
 extern void set_au1x00_speed(unsigned int new_freq);
 extern unsigned int get_au1x00_speed(void);
@@ -630,38 +683,42 @@ enum soc_au1200_ints {
 
 /*
  * Physical base addresses for integrated peripherals
+ * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
  */
 
+#define AU1000_AC97_PHYS_ADDR		0x10000000 /* 012 */
+#define AU1000_USBD_PHYS_ADDR		0x10200000 /* 0123 */
+#define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
+#define AU1000_MAC0_PHYS_ADDR		0x10500000 /* 023 */
+#define AU1000_MAC1_PHYS_ADDR		0x10510000 /* 023 */
+#define AU1000_MACEN_PHYS_ADDR		0x10520000 /* 023 */
+#define AU1100_SD0_PHYS_ADDR		0x10600000 /* 24 */
+#define AU1100_SD1_PHYS_ADDR		0x10680000 /* 24 */
+#define AU1000_I2S_PHYS_ADDR		0x11000000 /* 02 */
+#define AU1500_MAC0_PHYS_ADDR		0x11500000 /* 1 */
+#define AU1500_MAC1_PHYS_ADDR		0x11510000 /* 1 */
+#define AU1500_MACEN_PHYS_ADDR		0x11520000 /* 1 */
+#define AU1000_UART0_PHYS_ADDR		0x11100000 /* 01234 */
+#define AU1000_UART1_PHYS_ADDR		0x11200000 /* 0234 */
+#define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
+#define AU1000_UART3_PHYS_ADDR		0x11400000 /* 0123 */
+#define AU1500_GPIO2_PHYS_ADDR		0x11700000 /* 1234 */
+#define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+#define AU1000_SYS_PHYS_ADDR		0x11900000 /* 01234 */
+#define AU1000_DMA_PHYS_ADDR		0x14002000 /* 012 */
+#define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
+#define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
+#define AU1000_MACDMA0_PHYS_ADDR	0x14004000 /* 0123 */
+#define AU1000_MACDMA1_PHYS_ADDR	0x14004200 /* 0123 */
+
+
 #ifdef CONFIG_SOC_AU1000
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART2_PHYS_ADDR		0x11300000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
 #define PCMCIA_MEM_PHYS_ADDR	0xF80000000ULL
@@ -672,30 +729,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1500
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x11500000
-#define	MAC1_PHYS_ADDR		0x11510000
-#define	MACEN_PHYS_ADDR		0x11520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART3_PHYS_ADDR		0x11400000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCI_MEM_PHYS_ADDR	0x400000000ULL
 #define PCI_IO_PHYS_ADDR	0x500000000ULL
 #define PCI_CONFIG0_PHYS_ADDR	0x600000000ULL
@@ -710,34 +745,10 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1100
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
-#define	IC1_PHYS_ADDR		0x11800000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define LCD_PHYS_ADDR		0x15000000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
@@ -749,22 +760,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1550
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	USBH_PHYS_ADDR		0x14020000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PE_PHYS_ADDR		0x14008000
 #define PSC0_PHYS_ADDR		0x11A00000
 #define PSC1_PHYS_ADDR		0x11B00000
@@ -786,19 +783,10 @@ enum soc_au1200_ints {
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define AES_PHYS_ADDR		0x10300000
 #define CIM_PHYS_ADDR		0x14004000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define USBM_PHYS_ADDR		0x14020000
 #define	USBH_PHYS_ADDR		0x14020100
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PSC0_PHYS_ADDR	 	0x11A00000
 #define PSC1_PHYS_ADDR	 	0x11B00000
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
 #define LCD_PHYS_ADDR		0x15000000
 #define SWCNT_PHYS_ADDR		0x1110010C
 #define MAEFE_PHYS_ADDR		0x14012000
@@ -835,183 +823,43 @@ enum soc_au1200_ints {
 #endif
 
 
-/* Interrupt Controller register offsets */
-#define IC_CFG0RD		0x40
-#define IC_CFG0SET		0x40
-#define IC_CFG0CLR		0x44
-#define IC_CFG1RD		0x48
-#define IC_CFG1SET		0x48
-#define IC_CFG1CLR		0x4C
-#define IC_CFG2RD		0x50
-#define IC_CFG2SET		0x50
-#define IC_CFG2CLR		0x54
-#define IC_REQ0INT		0x54
-#define IC_SRCRD		0x58
-#define IC_SRCSET		0x58
-#define IC_SRCCLR		0x5C
-#define IC_REQ1INT		0x5C
-#define IC_ASSIGNRD		0x60
-#define IC_ASSIGNSET		0x60
-#define IC_ASSIGNCLR		0x64
-#define IC_WAKERD		0x68
-#define IC_WAKESET		0x68
-#define IC_WAKECLR		0x6C
-#define IC_MASKRD		0x70
-#define IC_MASKSET		0x70
-#define IC_MASKCLR		0x74
-#define IC_RISINGRD		0x78
-#define IC_RISINGCLR		0x78
-#define IC_FALLINGRD		0x7C
-#define IC_FALLINGCLR		0x7C
-#define IC_TESTBIT		0x80
-
-
-/* Interrupt Controller 0 */
-#define IC0_CFG0RD		0xB0400040
-#define IC0_CFG0SET		0xB0400040
-#define IC0_CFG0CLR		0xB0400044
-
-#define IC0_CFG1RD		0xB0400048
-#define IC0_CFG1SET		0xB0400048
-#define IC0_CFG1CLR		0xB040004C
-
-#define IC0_CFG2RD		0xB0400050
-#define IC0_CFG2SET		0xB0400050
-#define IC0_CFG2CLR		0xB0400054
-
-#define IC0_REQ0INT		0xB0400054
-#define IC0_SRCRD		0xB0400058
-#define IC0_SRCSET		0xB0400058
-#define IC0_SRCCLR		0xB040005C
-#define IC0_REQ1INT		0xB040005C
-
-#define IC0_ASSIGNRD		0xB0400060
-#define IC0_ASSIGNSET		0xB0400060
-#define IC0_ASSIGNCLR		0xB0400064
-
-#define IC0_WAKERD		0xB0400068
-#define IC0_WAKESET		0xB0400068
-#define IC0_WAKECLR		0xB040006C
-
-#define IC0_MASKRD		0xB0400070
-#define IC0_MASKSET		0xB0400070
-#define IC0_MASKCLR		0xB0400074
-
-#define IC0_RISINGRD		0xB0400078
-#define IC0_RISINGCLR		0xB0400078
-#define IC0_FALLINGRD		0xB040007C
-#define IC0_FALLINGCLR		0xB040007C
-
-#define IC0_TESTBIT		0xB0400080
-
-/* Interrupt Controller 1 */
-#define IC1_CFG0RD		0xB1800040
-#define IC1_CFG0SET		0xB1800040
-#define IC1_CFG0CLR		0xB1800044
-
-#define IC1_CFG1RD		0xB1800048
-#define IC1_CFG1SET		0xB1800048
-#define IC1_CFG1CLR		0xB180004C
-
-#define IC1_CFG2RD		0xB1800050
-#define IC1_CFG2SET		0xB1800050
-#define IC1_CFG2CLR		0xB1800054
-
-#define IC1_REQ0INT		0xB1800054
-#define IC1_SRCRD		0xB1800058
-#define IC1_SRCSET		0xB1800058
-#define IC1_SRCCLR		0xB180005C
-#define IC1_REQ1INT		0xB180005C
-
-#define IC1_ASSIGNRD            0xB1800060
-#define IC1_ASSIGNSET           0xB1800060
-#define IC1_ASSIGNCLR           0xB1800064
-
-#define IC1_WAKERD		0xB1800068
-#define IC1_WAKESET		0xB1800068
-#define IC1_WAKECLR		0xB180006C
-
-#define IC1_MASKRD		0xB1800070
-#define IC1_MASKSET		0xB1800070
-#define IC1_MASKCLR		0xB1800074
-
-#define IC1_RISINGRD		0xB1800078
-#define IC1_RISINGCLR		0xB1800078
-#define IC1_FALLINGRD		0xB180007C
-#define IC1_FALLINGCLR		0xB180007C
-
-#define IC1_TESTBIT		0xB1800080
 
 
 /* Au1000 */
 #ifdef CONFIG_SOC_AU1000
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT
-
-#define AU1000_ETH0_BASE	0xB0500000
-#define AU1000_ETH1_BASE	0xB0510000
-#define AU1000_MAC0_ENABLE	0xB0520000
-#define AU1000_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1000 */
 
 /* Au1500 */
 #ifdef CONFIG_SOC_AU1500
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017fffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT
-
-#define AU1500_ETH0_BASE	0xB1500000
-#define AU1500_ETH1_BASE	0xB1510000
-#define AU1500_MAC0_ENABLE	0xB1520000
-#define AU1500_MAC1_ENABLE	0xB1520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1500 */
 
 /* Au1100 */
 #ifdef CONFIG_SOC_AU1100
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT
-
-#define AU1100_ETH0_BASE	0xB0500000
-#define AU1100_MAC0_ENABLE	0xB0520000
-#define NUM_ETH_INTERFACES 1
 #endif /* CONFIG_SOC_AU1100 */
 
 #ifdef CONFIG_SOC_AU1550
-#define UART0_ADDR		0xB1100000
 
 #define USB_OHCI_BASE		0x14020000	/* phys addr for ioremap */
 #define USB_OHCI_LEN		0x00060000
 #define USB_HOST_CONFIG 	0xB4027ffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1550_USB_HOST_INT
-
-#define AU1550_ETH0_BASE	0xB0500000
-#define AU1550_ETH1_BASE	0xB0510000
-#define AU1550_MAC0_ENABLE	0xB0520000
-#define AU1550_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1550 */
 
 
 #ifdef CONFIG_SOC_AU1200
 
-#define UART0_ADDR		0xB1100000
-
 #define USB_UOC_BASE		0x14020020
 #define USB_UOC_LEN		0x20
 #define USB_OHCI_BASE		0x14020100
@@ -1504,22 +1352,6 @@ enum soc_au1200_ints {
 #define SYS_PINFUNC_S1B 	(1 << 2)
 #endif
 
-#define SYS_TRIOUTRD		0xB1900100
-#define SYS_TRIOUTCLR		0xB1900100
-#define SYS_OUTPUTRD		0xB1900108
-#define SYS_OUTPUTSET		0xB1900108
-#define SYS_OUTPUTCLR		0xB190010C
-#define SYS_PINSTATERD		0xB1900110
-#define SYS_PININPUTEN		0xB1900110
-
-/* GPIO2, Au1500, Au1550 only */
-#define GPIO2_BASE		0xB1700000
-#define GPIO2_DIR		(GPIO2_BASE + 0)
-#define GPIO2_OUTPUT		(GPIO2_BASE + 8)
-#define GPIO2_PINSTATE		(GPIO2_BASE + 0xC)
-#define GPIO2_INTENABLE 	(GPIO2_BASE + 0x10)
-#define GPIO2_ENABLE		(GPIO2_BASE + 0x14)
-
 /* Power Management */
 #define SYS_SCRATCH0		0xB1900018
 #define SYS_SCRATCH1		0xB190001C
@@ -1635,12 +1467,6 @@ enum soc_au1200_ints {
 #  define AC97C_RS		(1 << 1)
 #  define AC97C_CE		(1 << 0)
 
-/* Secure Digital (SD) Controller */
-#define SD0_XMIT_FIFO	0xB0600000
-#define SD0_RECV_FIFO	0xB0600004
-#define SD1_XMIT_FIFO	0xB0680000
-#define SD1_RECV_FIFO	0xB0680004
-
 #if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
 /* Au1500 PCI Controller */
 #define Au1500_CFG_BASE 	0xB4005000	/* virtual, KSEG1 addr */
diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
index c333b4e1cd4..59f5b55b220 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
@@ -37,10 +37,6 @@
 
 #define NUM_AU1000_DMA_CHANNELS	8
 
-/* DMA Channel Base Addresses */
-#define DMA_CHANNEL_BASE	0xB4002000
-#define DMA_CHANNEL_LEN		0x00000100
-
 /* DMA Channel Register Offsets */
 #define DMA_MODE_SET		0x00000000
 #define DMA_MODE_READ		DMA_MODE_SET
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
index c8a553a36ba..2fdacfe85e2 100644
--- a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
@@ -37,14 +37,6 @@
 
 #ifndef _LANGUAGE_ASSEMBLY
 
-/*
- * The DMA base addresses.
- * The channels are every 256 bytes (0x0100) from the channel 0 base.
- * Interrupt status/enable is bits 15:0 for channels 15 to zero.
- */
-#define DDMA_GLOBAL_BASE	0xb4003000
-#define DDMA_CHANNEL_BASE	0xb4002000
-
 typedef volatile struct dbdma_global {
 	u32	ddma_config;
 	u32	ddma_intstat;
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index 62d2f136d94..1f41a522906 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -24,6 +24,23 @@
 
 #define MAKE_IRQ(intc, off)	(AU1000_INTC##intc##_INT_BASE + (off))
 
+/* GPIO1 registers within SYS_ area */
+#define SYS_TRIOUTRD		0x100
+#define SYS_TRIOUTCLR		0x100
+#define SYS_OUTPUTRD		0x108
+#define SYS_OUTPUTSET		0x108
+#define SYS_OUTPUTCLR		0x10C
+#define SYS_PINSTATERD		0x110
+#define SYS_PININPUTEN		0x110
+
+/* register offsets within GPIO2 block */
+#define GPIO2_DIR		0x00
+#define GPIO2_OUTPUT		0x08
+#define GPIO2_PINSTATE		0x0C
+#define GPIO2_INTENABLE		0x10
+#define GPIO2_ENABLE		0x14
+
+struct gpio;
 
 static inline int au1000_gpio1_to_irq(int gpio)
 {
@@ -200,23 +217,26 @@ static inline int au1200_irq_to_gpio(int irq)
  */
 static inline void alchemy_gpio1_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
 	unsigned long r = v ? SYS_OUTPUTSET : SYS_OUTPUTCLR;
-	au_writel(mask, r);
-	au_sync();
+	__raw_writel(mask, base + r);
+	wmb();
 }
 
 static inline int alchemy_gpio1_get_value(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	return au_readl(SYS_PINSTATERD) & mask;
+	return __raw_readl(base + SYS_PINSTATERD) & mask;
 }
 
 static inline int alchemy_gpio1_direction_input(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	au_writel(mask, SYS_TRIOUTCLR);
-	au_sync();
+	__raw_writel(mask, base + SYS_TRIOUTCLR);
+	wmb();
 	return 0;
 }
 
@@ -257,27 +277,31 @@ static inline int alchemy_gpio1_to_irq(int gpio)
  */
 static inline void __alchemy_gpio2_mod_dir(int gpio, int to_out)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO2_BASE);
-	unsigned long d = au_readl(GPIO2_DIR);
+	unsigned long d = __raw_readl(base + GPIO2_DIR);
+
 	if (to_out)
 		d |= mask;
 	else
 		d &= ~mask;
-	au_writel(d, GPIO2_DIR);
-	au_sync();
+	__raw_writel(d, base + GPIO2_DIR);
+	wmb();
 }
 
 static inline void alchemy_gpio2_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask;
 	mask = ((v) ? 0x00010001 : 0x00010000) << (gpio - ALCHEMY_GPIO2_BASE);
-	au_writel(mask, GPIO2_OUTPUT);
-	au_sync();
+	__raw_writel(mask, base + GPIO2_OUTPUT);
+	wmb();
 }
 
 static inline int alchemy_gpio2_get_value(int gpio)
 {
-	return au_readl(GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	return __raw_readl(base + GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
 }
 
 static inline int alchemy_gpio2_direction_input(int gpio)
@@ -329,21 +353,23 @@ static inline int alchemy_gpio2_to_irq(int gpio)
  */
 static inline void alchemy_gpio1_input_enable(void)
 {
-	au_writel(0, SYS_PININPUTEN);	/* the write op is key */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
+	__raw_writel(0, base + SYS_PININPUTEN);	/* the write op is key */
+	wmb();
 }
 
 /* GPIO2 shared interrupts and control */
 
 static inline void __alchemy_gpio2_mod_int(int gpio2, int en)
 {
-	unsigned long r = au_readl(GPIO2_INTENABLE);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	unsigned long r = __raw_readl(base + GPIO2_INTENABLE);
 	if (en)
 		r |= 1 << gpio2;
 	else
 		r &= ~(1 << gpio2);
-	au_writel(r, GPIO2_INTENABLE);
-	au_sync();
+	__raw_writel(r, base + GPIO2_INTENABLE);
+	wmb();
 }
 
 /**
@@ -418,10 +444,11 @@ static inline void alchemy_gpio2_disable_int(int gpio2)
  */
 static inline void alchemy_gpio2_enable(void)
 {
-	au_writel(3, GPIO2_ENABLE);	/* reset, clock enabled */
-	au_sync();
-	au_writel(1, GPIO2_ENABLE);	/* clock enabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(3, base + GPIO2_ENABLE);	/* reset, clock enabled */
+	wmb();
+	__raw_writel(1, base + GPIO2_ENABLE);	/* clock enabled */
+	wmb();
 }
 
 /**
@@ -431,8 +458,9 @@ static inline void alchemy_gpio2_enable(void)
  */
 static inline void alchemy_gpio2_disable(void)
 {
-	au_writel(2, GPIO2_ENABLE);	/* reset, clock disabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(2, base + GPIO2_ENABLE);	/* reset, clock disabled */
+	wmb();
 }
 
 /**********************************************************************/
@@ -556,6 +584,16 @@ static inline void gpio_set_value(int gpio, int v)
 	alchemy_gpio_set_value(gpio, v);
 }
 
+static inline int gpio_get_value_cansleep(unsigned gpio)
+{
+	return gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	gpio_set_value(gpio, value);
+}
+
 static inline int gpio_is_valid(int gpio)
 {
 	return alchemy_gpio_is_valid(gpio);
@@ -581,10 +619,50 @@ static inline int gpio_request(unsigned gpio, const char *label)
 	return 0;
 }
 
+static inline int gpio_request_one(unsigned gpio,
+					unsigned long flags, const char *label)
+{
+	return 0;
+}
+
+static inline int gpio_request_array(struct gpio *array, size_t num)
+{
+	return 0;
+}
+
 static inline void gpio_free(unsigned gpio)
 {
 }
 
+static inline void gpio_free_array(struct gpio *array, size_t num)
+{
+}
+
+static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export_link(struct device *dev, const char *name,
+				   unsigned gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+
 #endif	/* !CONFIG_ALCHEMY_GPIO_INDIRECT */
 
 
diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h
index 9759588ba3c..184d5ecb5f5 100644
--- a/arch/mips/include/asm/mach-bcm47xx/nvram.h
+++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h
@@ -39,8 +39,16 @@ extern int nvram_getenv(char *name, char *val, size_t val_len);
 
 static inline void nvram_parse_macaddr(char *buf, u8 *macaddr)
 {
-	sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1],
-	       &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]);
+	if (strchr(buf, ':'))
+		sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else if (strchr(buf, '-'))
+		sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else
+		printk(KERN_WARNING "Can not parse mac address: %s\n", buf);
 }
 
 #endif
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
index 32978d32561..ed72e6a26b7 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
@@ -88,7 +88,7 @@ struct bcm_tag {
 	char kernel_crc[CRC_LEN];
 	/* 228-235: Unused at present */
 	char reserved1[8];
-	/* 236-239: CRC32 of header excluding tagVersion */
+	/* 236-239: CRC32 of header excluding last 20 bytes */
 	char header_crc[CRC_LEN];
 	/* 240-255: Unused at present */
 	char reserved2[16];
diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index 0b2b5eb22e9..dedef7d2b01 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@@ -63,6 +63,11 @@
 	# CN30XX Disable instruction prefetching
 	or  v0, v0, 0x2000
 skip:
+	# First clear off CvmCtl[IPPCI] bit and move the performance
+	# counters interrupt to IRQ 6
+	li	v1, ~(7 << 7)
+	and	v0, v0, v1
+	ori	v0, v0, (6 << 7)
 	# Write the cavium control register
 	dmtc0   v0, CP0_CVMCTL_REG
 	sync
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq.h b/arch/mips/include/asm/mach-lantiq/lantiq.h
new file mode 100644
index 00000000000..ce2f02929d2
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq.h
@@ -0,0 +1,63 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#ifndef _LANTIQ_H__
+#define _LANTIQ_H__
+
+#include <linux/irq.h>
+
+/* generic reg access functions */
+#define ltq_r32(reg)		__raw_readl(reg)
+#define ltq_w32(val, reg)	__raw_writel(val, reg)
+#define ltq_w32_mask(clear, set, reg)	\
+	ltq_w32((ltq_r32(reg) & ~(clear)) | (set), reg)
+#define ltq_r8(reg)		__raw_readb(reg)
+#define ltq_w8(val, reg)	__raw_writeb(val, reg)
+
+/* register access macros for EBU and CGU */
+#define ltq_ebu_w32(x, y)	ltq_w32((x), ltq_ebu_membase + (y))
+#define ltq_ebu_r32(x)		ltq_r32(ltq_ebu_membase + (x))
+#define ltq_cgu_w32(x, y)	ltq_w32((x), ltq_cgu_membase + (y))
+#define ltq_cgu_r32(x)		ltq_r32(ltq_cgu_membase + (x))
+
+extern __iomem void *ltq_ebu_membase;
+extern __iomem void *ltq_cgu_membase;
+
+extern unsigned int ltq_get_cpu_ver(void);
+extern unsigned int ltq_get_soc_type(void);
+
+/* clock speeds */
+#define CLOCK_60M	60000000
+#define CLOCK_83M	83333333
+#define CLOCK_111M	111111111
+#define CLOCK_133M	133333333
+#define CLOCK_167M	166666667
+#define CLOCK_200M	200000000
+#define CLOCK_266M	266666666
+#define CLOCK_333M	333333333
+#define CLOCK_400M	400000000
+
+/* spinlock all ebu i/o */
+extern spinlock_t ebu_lock;
+
+/* some irq helpers */
+extern void ltq_disable_irq(struct irq_data *data);
+extern void ltq_mask_and_ack_irq(struct irq_data *data);
+extern void ltq_enable_irq(struct irq_data *data);
+
+/* find out what caused the last cpu reset */
+extern int ltq_reset_cause(void);
+#define LTQ_RST_CAUSE_WDTRST	0x20
+
+#define IOPORT_RESOURCE_START	0x10000000
+#define IOPORT_RESOURCE_END	0xffffffff
+#define IOMEM_RESOURCE_START	0x10000000
+#define IOMEM_RESOURCE_END	0xffffffff
+#define LTQ_FLASH_START		0x10000000
+#define LTQ_FLASH_MAX		0x04000000
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
new file mode 100644
index 00000000000..a305f1d0259
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
@@ -0,0 +1,53 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_PLATFORM_H__
+#define _LANTIQ_PLATFORM_H__
+
+#include <linux/mtd/partitions.h>
+#include <linux/socket.h>
+
+/* struct used to pass info to the pci core */
+enum {
+	PCI_CLOCK_INT = 0,
+	PCI_CLOCK_EXT
+};
+
+#define PCI_EXIN0	0x0001
+#define PCI_EXIN1	0x0002
+#define PCI_EXIN2	0x0004
+#define PCI_EXIN3	0x0008
+#define PCI_EXIN4	0x0010
+#define PCI_EXIN5	0x0020
+#define PCI_EXIN_MAX	6
+
+#define PCI_GNT1	0x0040
+#define PCI_GNT2	0x0080
+#define PCI_GNT3	0x0100
+#define PCI_GNT4	0x0200
+
+#define PCI_REQ1	0x0400
+#define PCI_REQ2	0x0800
+#define PCI_REQ3	0x1000
+#define PCI_REQ4	0x2000
+#define PCI_REQ_SHIFT	10
+#define PCI_REQ_MASK	0xf
+
+struct ltq_pci_data {
+	int clock;
+	int gpio;
+	int irq[16];
+};
+
+/* struct used to pass info to network drivers */
+struct ltq_eth_data {
+	struct sockaddr mac;
+	int mii_mode;
+};
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/war.h b/arch/mips/include/asm/mach-lantiq/war.h
new file mode 100644
index 00000000000..01b08ef368d
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/war.h
@@ -0,0 +1,24 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#ifndef __ASM_MIPS_MACH_LANTIQ_WAR_H
+#define __ASM_MIPS_MACH_LANTIQ_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR     0
+#define R4600_V1_HIT_CACHEOP_WAR        0
+#define R4600_V2_HIT_CACHEOP_WAR        0
+#define R5432_CP0_INTERRUPT_WAR         0
+#define BCM1250_M3_WAR                  0
+#define SIBYTE_1956_WAR                 0
+#define MIPS4K_ICACHE_REFILL_WAR        0
+#define MIPS_CACHE_SYNC_WAR             0
+#define TX49XX_ICACHE_INDEX_INV_WAR     0
+#define RM9000_CDEX_SMP_WAR             0
+#define ICACHE_REFILLS_WORKAROUND_WAR   0
+#define R10000_LLSC_WAR                 0
+#define MIPS34K_MISSED_ITLB_WAR         0
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/irq.h b/arch/mips/include/asm/mach-lantiq/xway/irq.h
new file mode 100644
index 00000000000..a1471d2dd0d
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/irq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef __LANTIQ_IRQ_H
+#define __LANTIQ_IRQ_H
+
+#include <lantiq_irq.h>
+
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
new file mode 100644
index 00000000000..b4465a888e2
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
@@ -0,0 +1,66 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_XWAY_IRQ_H__
+#define _LANTIQ_XWAY_IRQ_H__
+
+#define INT_NUM_IRQ0		8
+#define INT_NUM_IM0_IRL0	(INT_NUM_IRQ0 + 0)
+#define INT_NUM_IM1_IRL0	(INT_NUM_IRQ0 + 32)
+#define INT_NUM_IM2_IRL0	(INT_NUM_IRQ0 + 64)
+#define INT_NUM_IM3_IRL0	(INT_NUM_IRQ0 + 96)
+#define INT_NUM_IM4_IRL0	(INT_NUM_IRQ0 + 128)
+#define INT_NUM_IM_OFFSET	(INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)
+
+#define LTQ_ASC_TIR(x)		(INT_NUM_IM3_IRL0 + (x * 8))
+#define LTQ_ASC_RIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 1)
+#define LTQ_ASC_EIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 2)
+
+#define LTQ_ASC_ASE_TIR		INT_NUM_IM2_IRL0
+#define LTQ_ASC_ASE_RIR		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_ASC_ASE_EIR		(INT_NUM_IM2_IRL0 + 3)
+
+#define LTQ_SSC_TIR		(INT_NUM_IM0_IRL0 + 15)
+#define LTQ_SSC_RIR		(INT_NUM_IM0_IRL0 + 14)
+#define LTQ_SSC_EIR		(INT_NUM_IM0_IRL0 + 16)
+
+#define LTQ_MEI_DYING_GASP_INT	(INT_NUM_IM1_IRL0 + 21)
+#define LTQ_MEI_INT		(INT_NUM_IM1_IRL0 + 23)
+
+#define LTQ_TIMER6_INT		(INT_NUM_IM1_IRL0 + 23)
+#define LTQ_USB_INT		(INT_NUM_IM1_IRL0 + 22)
+#define LTQ_USB_OC_INT		(INT_NUM_IM4_IRL0 + 23)
+
+#define MIPS_CPU_TIMER_IRQ		7
+
+#define LTQ_DMA_CH0_INT		(INT_NUM_IM2_IRL0)
+#define LTQ_DMA_CH1_INT		(INT_NUM_IM2_IRL0 + 1)
+#define LTQ_DMA_CH2_INT		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_DMA_CH3_INT		(INT_NUM_IM2_IRL0 + 3)
+#define LTQ_DMA_CH4_INT		(INT_NUM_IM2_IRL0 + 4)
+#define LTQ_DMA_CH5_INT		(INT_NUM_IM2_IRL0 + 5)
+#define LTQ_DMA_CH6_INT		(INT_NUM_IM2_IRL0 + 6)
+#define LTQ_DMA_CH7_INT		(INT_NUM_IM2_IRL0 + 7)
+#define LTQ_DMA_CH8_INT		(INT_NUM_IM2_IRL0 + 8)
+#define LTQ_DMA_CH9_INT		(INT_NUM_IM2_IRL0 + 9)
+#define LTQ_DMA_CH10_INT	(INT_NUM_IM2_IRL0 + 10)
+#define LTQ_DMA_CH11_INT	(INT_NUM_IM2_IRL0 + 11)
+#define LTQ_DMA_CH12_INT	(INT_NUM_IM2_IRL0 + 25)
+#define LTQ_DMA_CH13_INT	(INT_NUM_IM2_IRL0 + 26)
+#define LTQ_DMA_CH14_INT	(INT_NUM_IM2_IRL0 + 27)
+#define LTQ_DMA_CH15_INT	(INT_NUM_IM2_IRL0 + 28)
+#define LTQ_DMA_CH16_INT	(INT_NUM_IM2_IRL0 + 29)
+#define LTQ_DMA_CH17_INT	(INT_NUM_IM2_IRL0 + 30)
+#define LTQ_DMA_CH18_INT	(INT_NUM_IM2_IRL0 + 16)
+#define LTQ_DMA_CH19_INT	(INT_NUM_IM2_IRL0 + 21)
+
+#define LTQ_PPE_MBOX_INT	(INT_NUM_IM2_IRL0 + 24)
+
+#define INT_NUM_IM4_IRL14	(INT_NUM_IM4_IRL0 + 14)
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
new file mode 100644
index 00000000000..8a3c6be669d
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -0,0 +1,141 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_XWAY_H__
+#define _LTQ_XWAY_H__
+
+#ifdef CONFIG_SOC_TYPE_XWAY
+
+#include <lantiq.h>
+
+/* Chip IDs */
+#define SOC_ID_DANUBE1		0x129
+#define SOC_ID_DANUBE2		0x12B
+#define SOC_ID_TWINPASS		0x12D
+#define SOC_ID_AMAZON_SE	0x152
+#define SOC_ID_ARX188		0x16C
+#define SOC_ID_ARX168		0x16D
+#define SOC_ID_ARX182		0x16F
+
+/* SoC Types */
+#define SOC_TYPE_DANUBE		0x01
+#define SOC_TYPE_TWINPASS	0x02
+#define SOC_TYPE_AR9		0x03
+#define SOC_TYPE_VR9		0x04
+#define SOC_TYPE_AMAZON_SE	0x05
+
+/* ASC0/1 - serial port */
+#define LTQ_ASC0_BASE_ADDR	0x1E100400
+#define LTQ_ASC1_BASE_ADDR	0x1E100C00
+#define LTQ_ASC_SIZE		0x400
+
+/* RCU - reset control unit */
+#define LTQ_RCU_BASE_ADDR	0x1F203000
+#define LTQ_RCU_SIZE		0x1000
+
+/* GPTU - general purpose timer unit */
+#define LTQ_GPTU_BASE_ADDR	0x18000300
+#define LTQ_GPTU_SIZE		0x100
+
+/* EBU - external bus unit */
+#define LTQ_EBU_GPIO_START	0x14000000
+#define LTQ_EBU_GPIO_SIZE	0x1000
+
+#define LTQ_EBU_BASE_ADDR	0x1E105300
+#define LTQ_EBU_SIZE		0x100
+
+#define LTQ_EBU_BUSCON0		0x0060
+#define LTQ_EBU_PCC_CON		0x0090
+#define LTQ_EBU_PCC_IEN		0x00A4
+#define LTQ_EBU_PCC_ISTAT	0x00A0
+#define LTQ_EBU_BUSCON1		0x0064
+#define LTQ_EBU_ADDRSEL1	0x0024
+#define EBU_WRDIS		0x80000000
+
+/* CGU - clock generation unit */
+#define LTQ_CGU_BASE_ADDR	0x1F103000
+#define LTQ_CGU_SIZE		0x1000
+
+/* ICU - interrupt control unit */
+#define LTQ_ICU_BASE_ADDR	0x1F880200
+#define LTQ_ICU_SIZE		0x100
+
+/* EIU - external interrupt unit */
+#define LTQ_EIU_BASE_ADDR	0x1F101000
+#define LTQ_EIU_SIZE		0x1000
+
+/* PMU - power management unit */
+#define LTQ_PMU_BASE_ADDR	0x1F102000
+#define LTQ_PMU_SIZE		0x1000
+
+#define PMU_DMA			0x0020
+#define PMU_USB			0x8041
+#define PMU_LED			0x0800
+#define PMU_GPT			0x1000
+#define PMU_PPE			0x2000
+#define PMU_FPI			0x4000
+#define PMU_SWITCH		0x10000000
+
+/* ETOP - ethernet */
+#define LTQ_ETOP_BASE_ADDR	0x1E180000
+#define LTQ_ETOP_SIZE		0x40000
+
+/* DMA */
+#define LTQ_DMA_BASE_ADDR	0x1E104100
+#define LTQ_DMA_SIZE		0x800
+
+/* PCI */
+#define PCI_CR_BASE_ADDR	0x1E105400
+#define PCI_CR_SIZE		0x400
+
+/* WDT */
+#define LTQ_WDT_BASE_ADDR	0x1F8803F0
+#define LTQ_WDT_SIZE		0x10
+
+/* STP - serial to parallel conversion unit */
+#define LTQ_STP_BASE_ADDR	0x1E100BB0
+#define LTQ_STP_SIZE		0x40
+
+/* GPIO */
+#define LTQ_GPIO0_BASE_ADDR	0x1E100B10
+#define LTQ_GPIO1_BASE_ADDR	0x1E100B40
+#define LTQ_GPIO2_BASE_ADDR	0x1E100B70
+#define LTQ_GPIO_SIZE		0x30
+
+/* SSC */
+#define LTQ_SSC_BASE_ADDR	0x1e100800
+#define LTQ_SSC_SIZE		0x100
+
+/* MEI - dsl core */
+#define LTQ_MEI_BASE_ADDR	0x1E116000
+
+/* DEU - data encryption unit */
+#define LTQ_DEU_BASE_ADDR	0x1E103100
+
+/* MPS - multi processor unit (voice) */
+#define LTQ_MPS_BASE_ADDR	(KSEG1 + 0x1F107000)
+#define LTQ_MPS_CHIPID		((u32 *)(LTQ_MPS_BASE_ADDR + 0x0344))
+
+/* request a non-gpio and set the PIO config */
+extern int  ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name);
+extern void ltq_pmu_enable(unsigned int module);
+extern void ltq_pmu_disable(unsigned int module);
+
+static inline int ltq_is_ar9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_AR9);
+}
+
+static inline int ltq_is_vr9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_VR9);
+}
+
+#endif /* CONFIG_SOC_TYPE_XWAY */
+#endif /* _LTQ_XWAY_H__ */
diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
new file mode 100644
index 00000000000..872943a4b90
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -0,0 +1,60 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef LTQ_DMA_H__
+#define LTQ_DMA_H__
+
+#define LTQ_DESC_SIZE		0x08	/* each descriptor is 64bit */
+#define LTQ_DESC_NUM		0x40	/* 64 descriptors / channel */
+
+#define LTQ_DMA_OWN		BIT(31)	/* owner bit */
+#define LTQ_DMA_C		BIT(30) /* complete bit */
+#define LTQ_DMA_SOP		BIT(29) /* start of packet */
+#define LTQ_DMA_EOP		BIT(28) /* end of packet */
+#define LTQ_DMA_TX_OFFSET(x)	((x & 0x1f) << 23) /* data bytes offset */
+#define LTQ_DMA_RX_OFFSET(x)	((x & 0x7) << 23) /* data bytes offset */
+#define LTQ_DMA_SIZE_MASK	(0xffff) /* the size field is 16 bit */
+
+struct ltq_dma_desc {
+	u32 ctl;
+	u32 addr;
+};
+
+struct ltq_dma_channel {
+	int nr;				/* the channel number */
+	int irq;			/* the mapped irq */
+	int desc;			/* the current descriptor */
+	struct ltq_dma_desc *desc_base;	/* the descriptor base */
+	int phys;			/* physical addr */
+};
+
+enum {
+	DMA_PORT_ETOP = 0,
+	DMA_PORT_DEU,
+};
+
+extern void ltq_dma_enable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_disable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_ack_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_open(struct ltq_dma_channel *ch);
+extern void ltq_dma_close(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_tx(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_rx(struct ltq_dma_channel *ch);
+extern void ltq_dma_free(struct ltq_dma_channel *ch);
+extern void ltq_dma_init_port(int p);
+
+#endif
diff --git a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
new file mode 100644
index 00000000000..3b728275b9b
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
@@ -0,0 +1,47 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems
+ * Copyright (C) 2003 Ralf Baechle
+ */
+#ifndef __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_4kex		1
+#define cpu_has_4k_cache	1
+#define cpu_has_watch		1
+#define cpu_has_mips16		0
+#define cpu_has_counter		1
+#define cpu_has_divec		1
+#define cpu_has_vce		0
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_prefetch	1
+#define cpu_has_mcheck		1
+#define cpu_has_ejtag		1
+
+#define cpu_has_llsc		1
+#define cpu_has_vtag_icache	0
+#define cpu_has_dc_aliases	0
+#define cpu_has_ic_fills_f_dc	0
+#define cpu_has_dsp		0
+#define cpu_has_mipsmt		0
+#define cpu_has_userlocal	0
+#define cpu_icache_snoops_remote_store	0
+
+#define cpu_has_nofpuex		0
+#define cpu_has_64bits		1
+
+#define cpu_has_mips32r1	1
+#define cpu_has_mips32r2	0
+#define cpu_has_mips64r1	1
+#define cpu_has_mips64r2	0
+
+#define cpu_has_inclusive_pcaches	0
+
+#define cpu_dcache_line_size()	32
+#define cpu_icache_line_size()	32
+
+#endif /* __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-netlogic/irq.h b/arch/mips/include/asm/mach-netlogic/irq.h
new file mode 100644
index 00000000000..b5902458e7c
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/irq.h
@@ -0,0 +1,14 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ */
+#ifndef __ASM_NETLOGIC_IRQ_H
+#define __ASM_NETLOGIC_IRQ_H
+
+#define NR_IRQS			64
+#define MIPS_CPU_IRQ_BASE	0
+
+#endif /* __ASM_NETLOGIC_IRQ_H */
diff --git a/arch/mips/include/asm/mach-netlogic/war.h b/arch/mips/include/asm/mach-netlogic/war.h
new file mode 100644
index 00000000000..22da8932735
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/war.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_NLM_WAR_H
+#define __ASM_MIPS_MACH_NLM_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define RM9000_CDEX_SMP_WAR		0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_NLM_WAR_H */
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index d94085a3eaf..bc01a02cacd 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -118,6 +118,8 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "LOONGSON2 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
+#elif defined CONFIG_CPU_XLR
+#define MODULE_PROC_FAMILY "XLR "
 #else
 #error MODULE_PROC_FAMILY undefined for your processor configuration
 #endif
diff --git a/arch/mips/include/asm/netlogic/interrupt.h b/arch/mips/include/asm/netlogic/interrupt.h
new file mode 100644
index 00000000000..a85aadb6cfd
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/interrupt.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_INTERRUPT_H
+#define _ASM_NLM_INTERRUPT_H
+
+/* Defines for the IRQ numbers */
+
+#define IRQ_IPI_SMP_FUNCTION	3
+#define IRQ_IPI_SMP_RESCHEDULE	4
+#define IRQ_MSGRING		6
+#define IRQ_TIMER		7
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
new file mode 100644
index 00000000000..8c53d0ba4bf
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/mips-extns.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_MIPS_EXTS_H
+#define _ASM_NLM_MIPS_EXTS_H
+
+/*
+ * XLR and XLP interrupt request and interrupt mask registers
+ */
+#define read_c0_eirr()		__read_64bit_c0_register($9, 6)
+#define read_c0_eimr()		__read_64bit_c0_register($9, 7)
+#define write_c0_eirr(val)	__write_64bit_c0_register($9, 6, val)
+
+/*
+ * Writing EIMR in 32 bit is a special case, the lower 8 bit of the
+ * EIMR is shadowed in the status register, so we cannot save and
+ * restore status register for split read.
+ */
+#define write_c0_eimr(val)						\
+do {									\
+	if (sizeof(unsigned long) == 4)	{				\
+		unsigned long __flags;					\
+									\
+		local_irq_save(__flags);				\
+		__asm__ __volatile__(					\
+			".set\tmips64\n\t"				\
+			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsrl\t%L0, %L0, 32\n\t"			\
+			"dsll\t%M0, %M0, 32\n\t"			\
+			"or\t%L0, %L0, %M0\n\t"				\
+			"dmtc0\t%L0, $9, 7\n\t"				\
+			".set\tmips0"					\
+			: : "r" (val));					\
+		__flags = (__flags & 0xffff00ff) | (((val) & 0xff) << 8);\
+		local_irq_restore(__flags);				\
+	} else								\
+		__write_64bit_c0_register($9, 7, (val));		\
+} while (0)
+
+static inline int hard_smp_processor_id(void)
+{
+	return __read_32bit_c0_register($15, 1) & 0x3ff;
+}
+
+#endif /*_ASM_NLM_MIPS_EXTS_H */
diff --git a/arch/mips/include/asm/netlogic/psb-bootinfo.h b/arch/mips/include/asm/netlogic/psb-bootinfo.h
new file mode 100644
index 00000000000..6878307f0ee
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/psb-bootinfo.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NETLOGIC_BOOTINFO_H
+#define _ASM_NETLOGIC_BOOTINFO_H
+
+struct psb_info {
+	uint64_t boot_level;
+	uint64_t io_base;
+	uint64_t output_device;
+	uint64_t uart_print;
+	uint64_t led_output;
+	uint64_t init;
+	uint64_t exit;
+	uint64_t warm_reset;
+	uint64_t wakeup;
+	uint64_t online_cpu_map;
+	uint64_t master_reentry_sp;
+	uint64_t master_reentry_gp;
+	uint64_t master_reentry_fn;
+	uint64_t slave_reentry_fn;
+	uint64_t magic_dword;
+	uint64_t uart_putchar;
+	uint64_t size;
+	uint64_t uart_getchar;
+	uint64_t nmi_handler;
+	uint64_t psb_version;
+	uint64_t mac_addr;
+	uint64_t cpu_frequency;
+	uint64_t board_version;
+	uint64_t malloc;
+	uint64_t free;
+	uint64_t global_shmem_addr;
+	uint64_t global_shmem_size;
+	uint64_t psb_os_cpu_map;
+	uint64_t userapp_cpu_map;
+	uint64_t wakeup_os;
+	uint64_t psb_mem_map;
+	uint64_t board_major_version;
+	uint64_t board_minor_version;
+	uint64_t board_manf_revision;
+	uint64_t board_serial_number;
+	uint64_t psb_physaddr_map;
+	uint64_t xlr_loaderip_config;
+	uint64_t bldr_envp;
+	uint64_t avail_mem_map;
+};
+
+enum {
+	NETLOGIC_IO_SPACE = 0x10,
+	PCIX_IO_SPACE,
+	PCIX_CFG_SPACE,
+	PCIX_MEMORY_SPACE,
+	HT_IO_SPACE,
+	HT_CFG_SPACE,
+	HT_MEMORY_SPACE,
+	SRAM_SPACE,
+	FLASH_CONTROLLER_SPACE
+};
+
+#define NLM_MAX_ARGS	64
+#define NLM_MAX_ENVS	32
+
+/* This is what netlboot passes and linux boot_mem_map is subtly different */
+#define NLM_BOOT_MEM_MAP_MAX	32
+struct nlm_boot_mem_map {
+	int nr_map;
+	struct nlm_boot_mem_map_entry {
+		uint64_t addr;		/* start of memory segment */
+		uint64_t size;		/* size of memory segment */
+		uint32_t type;		/* type of memory segment */
+	} map[NLM_BOOT_MEM_MAP_MAX];
+};
+
+/* Pointer to saved boot loader info */
+extern struct psb_info nlm_prom_info;
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/gpio.h b/arch/mips/include/asm/netlogic/xlr/gpio.h
new file mode 100644
index 00000000000..51f6ad4aeb1
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/gpio.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_GPIO_H
+#define _ASM_NLM_GPIO_H
+
+#define NETLOGIC_GPIO_INT_EN_REG		0
+#define NETLOGIC_GPIO_INPUT_INVERSION_REG	1
+#define NETLOGIC_GPIO_IO_DIR_REG		2
+#define NETLOGIC_GPIO_IO_DATA_WR_REG		3
+#define NETLOGIC_GPIO_IO_DATA_RD_REG		4
+
+#define NETLOGIC_GPIO_SWRESET_REG		8
+#define NETLOGIC_GPIO_DRAM1_CNTRL_REG		9
+#define NETLOGIC_GPIO_DRAM1_RATIO_REG		10
+#define NETLOGIC_GPIO_DRAM1_RESET_REG		11
+#define NETLOGIC_GPIO_DRAM1_STATUS_REG		12
+#define NETLOGIC_GPIO_DRAM2_CNTRL_REG		13
+#define NETLOGIC_GPIO_DRAM2_RATIO_REG		14
+#define NETLOGIC_GPIO_DRAM2_RESET_REG		15
+#define NETLOGIC_GPIO_DRAM2_STATUS_REG		16
+
+#define NETLOGIC_GPIO_PWRON_RESET_CFG_REG	21
+#define NETLOGIC_GPIO_BIST_ALL_GO_STATUS_REG	24
+#define NETLOGIC_GPIO_BIST_CPU_GO_STATUS_REG	25
+#define NETLOGIC_GPIO_BIST_DEV_GO_STATUS_REG	26
+
+#define NETLOGIC_GPIO_FUSE_BANK_REG		35
+#define NETLOGIC_GPIO_CPU_RESET_REG		40
+#define NETLOGIC_GPIO_RNG_REG			43
+
+#define NETLOGIC_PWRON_RESET_PCMCIA_BOOT	17
+#define NETLOGIC_GPIO_LED_BITMAP	0x1700000
+#define NETLOGIC_GPIO_LED_0_SHIFT		20
+#define NETLOGIC_GPIO_LED_1_SHIFT		24
+
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_RESET	0x01
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_HARD_RESET 0x02
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_SOFT_RESET 0x03
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_MAIN	0x04
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/iomap.h b/arch/mips/include/asm/netlogic/xlr/iomap.h
new file mode 100644
index 00000000000..2e3a4dd5304
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/iomap.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_IOMAP_H
+#define _ASM_NLM_IOMAP_H
+
+#define DEFAULT_NETLOGIC_IO_BASE           CKSEG1ADDR(0x1ef00000)
+#define NETLOGIC_IO_DDR2_CHN0_OFFSET       0x01000
+#define NETLOGIC_IO_DDR2_CHN1_OFFSET       0x02000
+#define NETLOGIC_IO_DDR2_CHN2_OFFSET       0x03000
+#define NETLOGIC_IO_DDR2_CHN3_OFFSET       0x04000
+#define NETLOGIC_IO_PIC_OFFSET             0x08000
+#define NETLOGIC_IO_UART_0_OFFSET          0x14000
+#define NETLOGIC_IO_UART_1_OFFSET          0x15100
+
+#define NETLOGIC_IO_SIZE                   0x1000
+
+#define NETLOGIC_IO_BRIDGE_OFFSET          0x00000
+
+#define NETLOGIC_IO_RLD2_CHN0_OFFSET       0x05000
+#define NETLOGIC_IO_RLD2_CHN1_OFFSET       0x06000
+
+#define NETLOGIC_IO_SRAM_OFFSET            0x07000
+
+#define NETLOGIC_IO_PCIX_OFFSET            0x09000
+#define NETLOGIC_IO_HT_OFFSET              0x0A000
+
+#define NETLOGIC_IO_SECURITY_OFFSET        0x0B000
+
+#define NETLOGIC_IO_GMAC_0_OFFSET          0x0C000
+#define NETLOGIC_IO_GMAC_1_OFFSET          0x0D000
+#define NETLOGIC_IO_GMAC_2_OFFSET          0x0E000
+#define NETLOGIC_IO_GMAC_3_OFFSET          0x0F000
+
+/* XLS devices */
+#define NETLOGIC_IO_GMAC_4_OFFSET          0x20000
+#define NETLOGIC_IO_GMAC_5_OFFSET          0x21000
+#define NETLOGIC_IO_GMAC_6_OFFSET          0x22000
+#define NETLOGIC_IO_GMAC_7_OFFSET          0x23000
+
+#define NETLOGIC_IO_PCIE_0_OFFSET          0x1E000
+#define NETLOGIC_IO_PCIE_1_OFFSET          0x1F000
+#define NETLOGIC_IO_SRIO_0_OFFSET          0x1E000
+#define NETLOGIC_IO_SRIO_1_OFFSET          0x1F000
+
+#define NETLOGIC_IO_USB_0_OFFSET           0x24000
+#define NETLOGIC_IO_USB_1_OFFSET           0x25000
+
+#define NETLOGIC_IO_COMP_OFFSET            0x1D000
+/* end XLS devices */
+
+/* XLR devices */
+#define NETLOGIC_IO_SPI4_0_OFFSET          0x10000
+#define NETLOGIC_IO_XGMAC_0_OFFSET         0x11000
+#define NETLOGIC_IO_SPI4_1_OFFSET          0x12000
+#define NETLOGIC_IO_XGMAC_1_OFFSET         0x13000
+/* end XLR devices */
+
+#define NETLOGIC_IO_I2C_0_OFFSET           0x16000
+#define NETLOGIC_IO_I2C_1_OFFSET           0x17000
+
+#define NETLOGIC_IO_GPIO_OFFSET            0x18000
+#define NETLOGIC_IO_FLASH_OFFSET           0x19000
+#define NETLOGIC_IO_TB_OFFSET              0x1C000
+
+#define NETLOGIC_CPLD_OFFSET               KSEG1ADDR(0x1d840000)
+
+/*
+ * Base Address (Virtual) of the PCI Config address space
+ * For now, choose 256M phys in kseg1 = 0xA0000000 + (1<<28)
+ * Config space spans 256 (num of buses) * 256 (num functions) * 256 bytes
+ * ie 1<<24 = 16M
+ */
+#define DEFAULT_PCI_CONFIG_BASE         0x18000000
+#define DEFAULT_HT_TYPE0_CFG_BASE       0x16000000
+#define DEFAULT_HT_TYPE1_CFG_BASE       0x17000000
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef volatile __u32 nlm_reg_t;
+extern unsigned long netlogic_io_base;
+
+/* FIXME read once in write_reg */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define netlogic_read_reg(base, offset)		((base)[(offset)])
+#define netlogic_write_reg(base, offset, value)	((base)[(offset)] = (value))
+#else
+#define netlogic_read_reg(base, offset)		(be32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg(base, offset, value) \
+				((base)[(offset)] = cpu_to_be32((value)))
+#endif
+
+#define netlogic_read_reg_le32(base, offset) (le32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg_le32(base, offset, value) \
+				((base)[(offset)] = cpu_to_le32((value)))
+#define netlogic_io_mmio(offset) ((nlm_reg_t *)(netlogic_io_base+(offset)))
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/pic.h b/arch/mips/include/asm/netlogic/xlr/pic.h
new file mode 100644
index 00000000000..5cceb746f08
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/pic.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_PIC_H
+#define _ASM_NLM_XLR_PIC_H
+
+#define PIC_CLKS_PER_SEC		66666666ULL
+/* PIC hardware interrupt numbers */
+#define PIC_IRT_WD_INDEX		0
+#define PIC_IRT_TIMER_0_INDEX		1
+#define PIC_IRT_TIMER_1_INDEX		2
+#define PIC_IRT_TIMER_2_INDEX		3
+#define PIC_IRT_TIMER_3_INDEX		4
+#define PIC_IRT_TIMER_4_INDEX		5
+#define PIC_IRT_TIMER_5_INDEX		6
+#define PIC_IRT_TIMER_6_INDEX		7
+#define PIC_IRT_TIMER_7_INDEX		8
+#define PIC_IRT_CLOCK_INDEX		PIC_IRT_TIMER_7_INDEX
+#define PIC_IRT_UART_0_INDEX		9
+#define PIC_IRT_UART_1_INDEX		10
+#define PIC_IRT_I2C_0_INDEX		11
+#define PIC_IRT_I2C_1_INDEX		12
+#define PIC_IRT_PCMCIA_INDEX		13
+#define PIC_IRT_GPIO_INDEX		14
+#define PIC_IRT_HYPER_INDEX		15
+#define PIC_IRT_PCIX_INDEX		16
+/* XLS */
+#define PIC_IRT_CDE_INDEX		15
+#define PIC_IRT_BRIDGE_TB_XLS_INDEX	16
+/* XLS */
+#define PIC_IRT_GMAC0_INDEX		17
+#define PIC_IRT_GMAC1_INDEX		18
+#define PIC_IRT_GMAC2_INDEX		19
+#define PIC_IRT_GMAC3_INDEX		20
+#define PIC_IRT_XGS0_INDEX		21
+#define PIC_IRT_XGS1_INDEX		22
+#define PIC_IRT_HYPER_FATAL_INDEX	23
+#define PIC_IRT_PCIX_FATAL_INDEX	24
+#define PIC_IRT_BRIDGE_AERR_INDEX	25
+#define PIC_IRT_BRIDGE_BERR_INDEX	26
+#define PIC_IRT_BRIDGE_TB_XLR_INDEX	27
+#define PIC_IRT_BRIDGE_AERR_NMI_INDEX	28
+/* XLS */
+#define PIC_IRT_GMAC4_INDEX		21
+#define PIC_IRT_GMAC5_INDEX		22
+#define PIC_IRT_GMAC6_INDEX		23
+#define PIC_IRT_GMAC7_INDEX		24
+#define PIC_IRT_BRIDGE_ERR_INDEX	25
+#define PIC_IRT_PCIE_LINK0_INDEX	26
+#define PIC_IRT_PCIE_LINK1_INDEX	27
+#define PIC_IRT_PCIE_LINK2_INDEX	23
+#define PIC_IRT_PCIE_LINK3_INDEX	24
+#define PIC_IRT_PCIE_XLSB0_LINK2_INDEX	28
+#define PIC_IRT_PCIE_XLSB0_LINK3_INDEX	29
+#define PIC_IRT_SRIO_LINK0_INDEX	26
+#define PIC_IRT_SRIO_LINK1_INDEX	27
+#define PIC_IRT_SRIO_LINK2_INDEX	28
+#define PIC_IRT_SRIO_LINK3_INDEX	29
+#define PIC_IRT_PCIE_INT_INDEX		28
+#define PIC_IRT_PCIE_FATAL_INDEX	29
+#define PIC_IRT_GPIO_B_INDEX		30
+#define PIC_IRT_USB_INDEX		31
+/* XLS */
+#define PIC_NUM_IRTS			32
+
+
+#define PIC_CLOCK_TIMER			7
+
+/* PIC Registers */
+#define PIC_CTRL			0x00
+#define PIC_IPI				0x04
+#define PIC_INT_ACK			0x06
+
+#define WD_MAX_VAL_0			0x08
+#define WD_MAX_VAL_1			0x09
+#define WD_MASK_0			0x0a
+#define WD_MASK_1			0x0b
+#define WD_HEARBEAT_0			0x0c
+#define WD_HEARBEAT_1			0x0d
+
+#define PIC_IRT_0_BASE			0x40
+#define PIC_IRT_1_BASE			0x80
+#define PIC_TIMER_MAXVAL_0_BASE		0x100
+#define PIC_TIMER_MAXVAL_1_BASE		0x110
+#define PIC_TIMER_COUNT_0_BASE		0x120
+#define PIC_TIMER_COUNT_1_BASE		0x130
+
+#define PIC_IRT_0(picintr)      (PIC_IRT_0_BASE + (picintr))
+#define PIC_IRT_1(picintr)	(PIC_IRT_1_BASE + (picintr))
+
+#define PIC_TIMER_MAXVAL_0(i)	(PIC_TIMER_MAXVAL_0_BASE + (i))
+#define PIC_TIMER_MAXVAL_1(i)	(PIC_TIMER_MAXVAL_1_BASE + (i))
+#define PIC_TIMER_COUNT_0(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+#define PIC_TIMER_COUNT_1(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+
+/*
+ * Mapping between hardware interrupt numbers and IRQs on CPU
+ * we use a simple scheme to map PIC interrupts 0-31 to IRQs
+ * 8-39. This leaves the IRQ 0-7 for cpu interrupts like
+ * count/compare and FMN
+ */
+#define PIC_IRQ_BASE            8
+#define PIC_INTR_TO_IRQ(i)      (PIC_IRQ_BASE + (i))
+#define PIC_IRQ_TO_INTR(i)      ((i) - PIC_IRQ_BASE)
+
+#define PIC_IRT_FIRST_IRQ	PIC_IRQ_BASE
+#define PIC_WD_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_WD_INDEX)
+#define PIC_TIMER_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_0_INDEX)
+#define PIC_TIMER_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_1_INDEX)
+#define PIC_TIMER_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_2_INDEX)
+#define PIC_TIMER_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_3_INDEX)
+#define PIC_TIMER_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_4_INDEX)
+#define PIC_TIMER_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_5_INDEX)
+#define PIC_TIMER_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_6_INDEX)
+#define PIC_TIMER_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_7_INDEX)
+#define PIC_CLOCK_IRQ		(PIC_TIMER_7_IRQ)
+#define PIC_UART_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_0_INDEX)
+#define PIC_UART_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_1_INDEX)
+#define PIC_I2C_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_0_INDEX)
+#define PIC_I2C_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_1_INDEX)
+#define PIC_PCMCIA_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCMCIA_INDEX)
+#define PIC_GPIO_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_INDEX)
+#define PIC_HYPER_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_HYPER_INDEX)
+#define PIC_PCIX_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCIX_INDEX)
+/* XLS */
+#define PIC_CDE_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_CDE_INDEX)
+#define PIC_BRIDGE_TB_XLS_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLS_INDEX)
+/* end XLS */
+#define PIC_GMAC_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC0_INDEX)
+#define PIC_GMAC_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC1_INDEX)
+#define PIC_GMAC_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC2_INDEX)
+#define PIC_GMAC_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC3_INDEX)
+#define PIC_XGS_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS0_INDEX)
+#define PIC_XGS_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS1_INDEX)
+#define PIC_HYPER_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_HYPER_FATAL_INDEX)
+#define PIC_PCIX_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIX_FATAL_INDEX)
+#define PIC_BRIDGE_AERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_INDEX)
+#define PIC_BRIDGE_BERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_BERR_INDEX)
+#define PIC_BRIDGE_TB_XLR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLR_INDEX)
+#define PIC_BRIDGE_AERR_NMI_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_NMI_INDEX)
+/* XLS defines */
+#define PIC_GMAC_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC4_INDEX)
+#define PIC_GMAC_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC5_INDEX)
+#define PIC_GMAC_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC6_INDEX)
+#define PIC_GMAC_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC7_INDEX)
+#define PIC_BRIDGE_ERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_ERR_INDEX)
+#define PIC_PCIE_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK0_INDEX)
+#define PIC_PCIE_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK1_INDEX)
+#define PIC_PCIE_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK2_INDEX)
+#define PIC_PCIE_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK3_INDEX)
+#define PIC_PCIE_XLSB0_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK2_INDEX)
+#define PIC_PCIE_XLSB0_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK3_INDEX)
+#define PIC_SRIO_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK0_INDEX)
+#define PIC_SRIO_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK1_INDEX)
+#define PIC_SRIO_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK2_INDEX)
+#define PIC_SRIO_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK3_INDEX)
+#define PIC_PCIE_INT_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_INT__INDEX)
+#define PIC_PCIE_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_FATAL_INDEX)
+#define PIC_GPIO_B_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_B_INDEX)
+#define PIC_USB_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_USB_INDEX)
+#define PIC_IRT_LAST_IRQ	PIC_USB_IRQ
+/* end XLS */
+
+#ifndef __ASSEMBLY__
+static inline void pic_send_ipi(u32 ipi)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_IPI, ipi);
+}
+
+static inline u32 pic_read_control(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	return netlogic_read_reg(mmio, PIC_CTRL);
+}
+
+static inline void pic_write_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL, control);
+}
+
+static inline void pic_update_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL,
+		(control | netlogic_read_reg(mmio, PIC_CTRL)));
+}
+
+#define PIC_IRQ_IS_EDGE_TRIGGERED(irq)	(((irq) >= PIC_TIMER_0_IRQ) && \
+					((irq) <= PIC_TIMER_7_IRQ))
+#define PIC_IRQ_IS_IRT(irq)		(((irq) >= PIC_IRT_FIRST_IRQ) && \
+					((irq) <= PIC_IRT_LAST_IRQ))
+#endif
+
+#endif /* _ASM_NLM_XLR_PIC_H */
diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h
new file mode 100644
index 00000000000..3e6372692a0
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/xlr.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_H
+#define _ASM_NLM_XLR_H
+
+/* Platform UART functions */
+struct uart_port;
+unsigned int nlm_xlr_uart_in(struct uart_port *, int);
+void nlm_xlr_uart_out(struct uart_port *, int, int);
+
+/* SMP support functions */
+struct irq_desc;
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc);
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc);
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask);
+void nlm_smp_irq_init(void);
+void nlm_boot_smp_nmi(void);
+void prom_pre_boot_secondary_cpus(void);
+
+extern struct plat_smp_ops nlm_smp_ops;
+extern unsigned long nlm_common_ebase;
+
+/* XLS B silicon "Rook" */
+static inline unsigned int nlm_chip_is_xls_b(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x4000);
+}
+
+/*
+ *  XLR chip types
+ */
+ /* The XLS product line has chip versions 0x[48c]? */
+static inline unsigned int nlm_chip_is_xls(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x8000 || (prid & 0xf000) == 0x4000 ||
+		(prid & 0xf000) == 0xc000);
+}
+
+#endif /* _ASM_NLM_XLR_H */
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 9f1b8dba2c8..de39b1f343e 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -141,7 +141,8 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit);
+extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
 extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
 
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d71160de4d1..97f8bf6639e 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -149,6 +149,9 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
 
+/* work to do in syscall_trace_leave() */
+#define _TIF_WORK_SYSCALL_EXIT	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(0x0000ffef &				\
 					~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT))
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index c7f1bfef157..bc14447e69b 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -84,12 +84,6 @@ static inline int init_mips_clocksource(void)
 #endif
 }
 
-static inline void clocksource_set_clock(struct clocksource *cs,
-					 unsigned int clock)
-{
-	clocksource_calc_mult_shift(cs, clock, 4);
-}
-
 static inline void clockevent_set_clock(struct clock_event_device *cd,
 					unsigned int clock)
 {
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 9ce9f64cb76..2d8e447cb82 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free);
  */
 int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 {
-	int first, pages, npages;
+	int first, pages;
 
 	if (laddr > 0xffffff) {
 		if (vdma_debug)
@@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 		return -EINVAL;	/* invalid physical address */
 	}
 
-	npages = pages =
-	    (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
+	pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
 	first = laddr >> 12;
 	if (vdma_debug)
 		printk("vdma_remap: first=%x, pages=%x\n", first, pages);
diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c
index 5ebe75a6835..d7feb898692 100644
--- a/arch/mips/jz4740/dma.c
+++ b/arch/mips/jz4740/dma.c
@@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue);
 
 static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma)
 {
-	uint32_t status;
-
-	status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
+	(void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
 
 	jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0,
 		JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE);
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 6a9e14dab91..d97cfbf882f 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -1,5 +1,6 @@
 /*
  *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org>
  *  JZ4740 setup code
  *
  *  This program is free software; you can redistribute it and/or modify it
@@ -14,13 +15,44 @@
  */
 
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 
+#include <asm/bootinfo.h>
+
+#include <asm/mach-jz4740/base.h>
+
 #include "reset.h"
 
+
+#define JZ4740_EMC_SDRAM_CTRL 0x80
+
+
+static void __init jz4740_detect_mem(void)
+{
+	void __iomem *jz_emc_base;
+	u32 ctrl, bus, bank, rows, cols;
+	phys_t size;
+
+	jz_emc_base = ioremap(JZ4740_EMC_BASE_ADDR, 0x100);
+	ctrl = readl(jz_emc_base + JZ4740_EMC_SDRAM_CTRL);
+	bus = 2 - ((ctrl >> 31) & 1);
+	bank = 1 + ((ctrl >> 19) & 1);
+	cols = 8 + ((ctrl >> 26) & 7);
+	rows = 11 + ((ctrl >> 20) & 3);
+	printk(KERN_DEBUG
+		"SDRAM preconfigured: bus:%u bank:%u rows:%u cols:%u\n",
+		bus, bank, rows, cols);
+	iounmap(jz_emc_base);
+
+	size = 1 << (bus + bank + cols + rows);
+	add_memory_region(0, size, BOOT_MEM_RAM);
+}
+
 void __init plat_mem_setup(void)
 {
 	jz4740_reset_init();
+	jz4740_detect_mem();
 }
 
 const char *get_system_type(void)
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index fe01678d94f..f83c2dd07a2 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt,
 
 static struct clock_event_device jz4740_clockevent = {
 	.name = "jz4740-timer",
-	.features = CLOCK_EVT_FEAT_PERIODIC,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_next_event = jz4740_clockevent_set_next,
 	.set_mode = jz4740_clockevent_set_mode,
 	.rating = 200,
@@ -121,8 +121,7 @@ void __init plat_time_init(void)
 
 	clockevents_register_device(&jz4740_clockevent);
 
-	clocksource_set_clock(&jz4740_clocksource, clk_rate);
-	ret = clocksource_register(&jz4740_clocksource);
+	ret = clocksource_register_hz(&jz4740_clocksource, clk_rate);
 
 	if (ret)
 		printk(KERN_ERR "Failed to register clocksource: %d\n", ret);
diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c
index b2c01512905..654d5c3900b 100644
--- a/arch/mips/jz4740/timer.c
+++ b/arch/mips/jz4740/timer.c
@@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog);
 
 void jz4740_timer_disable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog);
 
 void __init jz4740_timer_init(void)
 {
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index cedee2bcbd1..83bba332bbf 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r2300_fpu.o r2300_switch.o
 obj-$(CONFIG_CPU_TX49XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_VR41XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= octeon_switch.o
+obj-$(CONFIG_CPU_XLR)		+= r4k_fpu.o r4k_switch.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP_UP)		+= smp-up.o
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index 0b7377361e2..f0ab92a1b05 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -51,8 +51,7 @@ void __init txx9_clocksource_init(unsigned long baseaddr,
 {
 	struct txx9_tmr_reg __iomem *tmrptr;
 
-	clocksource_set_clock(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
-	clocksource_register(&txx9_clocksource.cs);
+	clocksource_register_hz(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
 
 	tmrptr = ioremap(baseaddr, sizeof(struct txx9_tmr_reg));
 	__raw_writel(TCR_BASE, &tmrptr->tcr);
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f65d4c8c65a..bb133d10b14 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -291,6 +291,12 @@ static inline int cpu_has_confreg(void)
 #endif
 }
 
+static inline void set_elf_platform(int cpu, const char *plat)
+{
+	if (cpu == 0)
+		__elf_platform = plat;
+}
+
 /*
  * Get the FPU Implementation/Revision.
  */
@@ -614,6 +620,16 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_LOONGSON2:
 		c->cputype = CPU_LOONGSON2;
 		__cpu_name[cpu] = "ICT Loongson-2";
+
+		switch (c->processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2E:
+			set_elf_platform(cpu, "loongson2e");
+			break;
+		case PRID_REV_LOONGSON2F:
+			set_elf_platform(cpu, "loongson2f");
+			break;
+		}
+
 		c->isa_level = MIPS_CPU_ISA_III;
 		c->options = R4K_OPTS |
 			     MIPS_CPU_FPU | MIPS_CPU_LLSC |
@@ -911,12 +927,14 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_BMIPS32_REV8:
 		c->cputype = CPU_BMIPS32;
 		__cpu_name[cpu] = "Broadcom BMIPS32";
+		set_elf_platform(cpu, "bmips32");
 		break;
 	case PRID_IMP_BMIPS3300:
 	case PRID_IMP_BMIPS3300_ALT:
 	case PRID_IMP_BMIPS3300_BUG:
 		c->cputype = CPU_BMIPS3300;
 		__cpu_name[cpu] = "Broadcom BMIPS3300";
+		set_elf_platform(cpu, "bmips3300");
 		break;
 	case PRID_IMP_BMIPS43XX: {
 		int rev = c->processor_id & 0xff;
@@ -925,15 +943,18 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 				rev <= PRID_REV_BMIPS4380_HI) {
 			c->cputype = CPU_BMIPS4380;
 			__cpu_name[cpu] = "Broadcom BMIPS4380";
+			set_elf_platform(cpu, "bmips4380");
 		} else {
 			c->cputype = CPU_BMIPS4350;
 			__cpu_name[cpu] = "Broadcom BMIPS4350";
+			set_elf_platform(cpu, "bmips4350");
 		}
 		break;
 	}
 	case PRID_IMP_BMIPS5000:
 		c->cputype = CPU_BMIPS5000;
 		__cpu_name[cpu] = "Broadcom BMIPS5000";
+		set_elf_platform(cpu, "bmips5000");
 		c->options |= MIPS_CPU_ULRI;
 		break;
 	}
@@ -956,14 +977,12 @@ static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_CAVIUM_OCTEON_PLUS;
 		__cpu_name[cpu] = "Cavium Octeon+";
 platform:
-		if (cpu == 0)
-			__elf_platform = "octeon";
+		set_elf_platform(cpu, "octeon");
 		break;
 	case PRID_IMP_CAVIUM_CN63XX:
 		c->cputype = CPU_CAVIUM_OCTEON2;
 		__cpu_name[cpu] = "Cavium Octeon II";
-		if (cpu == 0)
-			__elf_platform = "octeon2";
+		set_elf_platform(cpu, "octeon2");
 		break;
 	default:
 		printk(KERN_INFO "Unknown Octeon chip!\n");
@@ -988,6 +1007,59 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 	}
 }
 
+static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
+{
+	decode_configs(c);
+
+	c->options = (MIPS_CPU_TLB       |
+			MIPS_CPU_4KEX    |
+			MIPS_CPU_COUNTER |
+			MIPS_CPU_DIVEC   |
+			MIPS_CPU_WATCH   |
+			MIPS_CPU_EJTAG   |
+			MIPS_CPU_LLSC);
+
+	switch (c->processor_id & 0xff00) {
+	case PRID_IMP_NETLOGIC_XLR732:
+	case PRID_IMP_NETLOGIC_XLR716:
+	case PRID_IMP_NETLOGIC_XLR532:
+	case PRID_IMP_NETLOGIC_XLR308:
+	case PRID_IMP_NETLOGIC_XLR532C:
+	case PRID_IMP_NETLOGIC_XLR516C:
+	case PRID_IMP_NETLOGIC_XLR508C:
+	case PRID_IMP_NETLOGIC_XLR308C:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLR";
+		break;
+
+	case PRID_IMP_NETLOGIC_XLS608:
+	case PRID_IMP_NETLOGIC_XLS408:
+	case PRID_IMP_NETLOGIC_XLS404:
+	case PRID_IMP_NETLOGIC_XLS208:
+	case PRID_IMP_NETLOGIC_XLS204:
+	case PRID_IMP_NETLOGIC_XLS108:
+	case PRID_IMP_NETLOGIC_XLS104:
+	case PRID_IMP_NETLOGIC_XLS616B:
+	case PRID_IMP_NETLOGIC_XLS608B:
+	case PRID_IMP_NETLOGIC_XLS416B:
+	case PRID_IMP_NETLOGIC_XLS412B:
+	case PRID_IMP_NETLOGIC_XLS408B:
+	case PRID_IMP_NETLOGIC_XLS404B:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLS";
+		break;
+
+	default:
+		printk(KERN_INFO "Unknown Netlogic chip id [%02x]!\n",
+		       c->processor_id);
+		c->cputype = CPU_XLR;
+		break;
+	}
+
+	c->isa_level = MIPS_CPU_ISA_M64R1;
+	c->tlbsize = ((read_c0_config1() >> 25) & 0x3f) + 1;
+}
+
 #ifdef CONFIG_64BIT
 /* For use by uaccess.h */
 u64 __ua_limit;
@@ -1035,6 +1107,9 @@ __cpuinit void cpu_probe(void)
 	case PRID_COMP_INGENIC:
 		cpu_probe_ingenic(c, cpu);
 		break;
+	case PRID_COMP_NETLOGIC:
+		cpu_probe_netlogic(c, cpu);
+		break;
 	}
 
 	BUG_ON(!__cpu_name[cpu]);
diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c
index 51489f8a825..f96f99c794a 100644
--- a/arch/mips/kernel/csrc-bcm1480.c
+++ b/arch/mips/kernel/csrc-bcm1480.c
@@ -49,6 +49,5 @@ void __init sb1480_clocksource_init(void)
 
 	plldiv = G_BCM1480_SYS_PLL_DIV(__raw_readq(IOADDR(A_SCD_SYSTEM_CFG)));
 	zbbus = ((plldiv >> 1) * 50000000) + ((plldiv & 1) * 25000000);
-	clocksource_set_clock(cs, zbbus);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, zbbus);
 }
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
index 23da108506b..46bd7fa98d6 100644
--- a/arch/mips/kernel/csrc-ioasic.c
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -59,7 +59,5 @@ void __init dec_ioasic_clocksource_init(void)
 	printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
 
 	clocksource_dec.rating = 200 + freq / 10000000;
-	clocksource_set_clock(&clocksource_dec, freq);
-
-	clocksource_register(&clocksource_dec);
+	clocksource_register_hz(&clocksource_dec, freq);
 }
diff --git a/arch/mips/kernel/csrc-powertv.c b/arch/mips/kernel/csrc-powertv.c
index a27c16c8690..2e7c5232da8 100644
--- a/arch/mips/kernel/csrc-powertv.c
+++ b/arch/mips/kernel/csrc-powertv.c
@@ -78,9 +78,7 @@ static void __init powertv_c0_hpt_clocksource_init(void)
 
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 }
 
 /**
@@ -130,43 +128,16 @@ static struct clocksource clocksource_tim_c = {
 /**
  * powertv_tim_c_clocksource_init - set up a clock source for the TIM_C clock
  *
- * The hard part here is coming up with a constant k and shift s such that
- * the 48-bit TIM_C value multiplied by k doesn't overflow and that value,
- * when shifted right by s, yields the corresponding number of nanoseconds.
  * We know that TIM_C counts at 27 MHz/8, so each cycle corresponds to
- * 1 / (27,000,000/8) seconds. Multiply that by a billion and you get the
- * number of nanoseconds. Since the TIM_C value has 48 bits and the math is
- * done in 64 bits, avoiding an overflow means that k must be less than
- * 64 - 48 = 16 bits.
+ * 1 / (27,000,000/8) seconds.
  */
 static void __init powertv_tim_c_clocksource_init(void)
 {
-	int			prescale;
-	unsigned long		dividend;
-	unsigned long		k;
-	int			s;
-	const int		max_k_bits = (64 - 48) - 1;
-	const unsigned long	billion = 1000000000;
 	const unsigned long	counts_per_second = 27000000 / 8;
 
-	prescale = BITS_PER_LONG - ilog2(billion) - 1;
-	dividend = billion << prescale;
-	k = dividend / counts_per_second;
-	s = ilog2(k) - max_k_bits;
-
-	if (s < 0)
-		s = prescale;
-
-	else {
-		k >>= s;
-		s += prescale;
-	}
-
-	clocksource_tim_c.mult = k;
-	clocksource_tim_c.shift = s;
 	clocksource_tim_c.rating = 200;
 
-	clocksource_register(&clocksource_tim_c);
+	clocksource_register_hz(&clocksource_tim_c, counts_per_second);
 	tim_c = (struct tim_c *) asic_reg_addr(tim_ch);
 }
 
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index e95a3cd48ee..decd1fa38d5 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -30,9 +30,7 @@ int __init init_r4k_clocksource(void)
 	/* Calculate a somewhat reasonable rating value */
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 
 	return 0;
 }
diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c
index d14d3d1907f..e9606d90768 100644
--- a/arch/mips/kernel/csrc-sb1250.c
+++ b/arch/mips/kernel/csrc-sb1250.c
@@ -65,6 +65,5 @@ void __init sb1250_clocksource_init(void)
 		     IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM,
 						 R_SCD_TIMER_CFG)));
 
-	clocksource_set_clock(cs, V_SCD_TIMER_FREQ);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, V_SCD_TIMER_FREQ);
 }
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ffa331029e0..37acfa036d4 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -167,14 +167,13 @@ work_notifysig:				# deal with pending signals and
 FEXPORT(syscall_exit_work_partial)
 	SAVE_STATIC
 syscall_exit_work:
-	li	t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t0, _TIF_WORK_SYSCALL_EXIT
 	and	t0, a2			# a2 is preloaded with TI_FLAGS
 	beqz	t0, work_pending	# trace bit set?
-	local_irq_enable		# could let do_syscall_trace()
+	local_irq_enable		# could let syscall_trace_leave()
 					# call schedule() instead
 	move	a0, sp
-	li	a1, 1
-	jal	do_syscall_trace
+	jal	syscall_trace_leave
 	b	resume_userspace
 
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT)
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 94ca2b018af..feb8021a305 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -23,6 +23,7 @@
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
 #define ADDR_MASK 0x03ffffff	/*  op_code|addr : 31...26|25 ....0 */
+#define JUMP_RANGE_MASK ((1UL << 28) - 1)
 
 #define INSN_NOP 0x00000000	/* nop */
 #define INSN_JAL(addr)	\
@@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void)
 
 	/* jal (ftrace_caller + 8), jump over the first two instruction */
 	buf = (u32 *)&insn_jal_ftrace_caller;
-	uasm_i_jal(&buf, (FTRACE_ADDR + 8));
+	uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* j ftrace_graph_caller */
 	buf = (u32 *)&insn_j_ftrace_graph_caller;
-	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller);
+	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK);
 #endif
 }
 
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index 2392a7a296d..391221b6a6a 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -125,87 +125,11 @@ void __init setup_pit_timer(void)
 	setup_irq(0, &irq0);
 }
 
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	int count;
-	u32 jifs;
-	static int old_count;
-	static u32 old_jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_p(PIT_CH0);	/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs) {
-		count = old_count;
-	}
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource clocksource_pit = {
-	.name	= "pit",
-	.rating = 110,
-	.read	= pit_read,
-	.mask	= CLOCKSOURCE_MASK(32),
-	.mult	= 0,
-	.shift	= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	if (num_possible_cpus() > 1) /* PIT does not scale! */
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
-	return clocksource_register(&clocksource_pit);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d21c388c011..4e6ea1ffad4 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -533,15 +533,10 @@ static inline int audit_arch(void)
  * Notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 {
 	/* do the secure computing check first */
-	if (!entryexit)
-		secure_computing(regs->regs[2]);
-
-	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
-		                   regs->regs[2]);
+	secure_computing(regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
 		goto out;
@@ -565,8 +560,40 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 	}
 
 out:
-	if (unlikely(current->audit_context) && !entryexit)
+	if (unlikely(current->audit_context))
 		audit_syscall_entry(audit_arch(), regs->regs[2],
 				    regs->regs[4], regs->regs[5],
 				    regs->regs[6], regs->regs[7]);
 }
+
+/*
+ * Notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
+		                   -regs->regs[2]);
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	/* The 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ?
+	                         0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b38d4..7a8e1dd7f6f 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -88,8 +88,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -565,7 +564,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_ioprio_get		2	/* 4315 */
 	sys	sys_utimensat		4
 	sys	sys_signalfd		3
-	sys	sys_ni_syscall		0
+	sys	sys_ni_syscall		0	/* was timerfd */
 	sys	sys_eventfd		1
 	sys	sys_fallocate		6	/* 4320 */
 	sys	sys_timerfd_create	2
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcbc41d..2d31c83224f 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -91,8 +91,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -404,7 +403,7 @@ sys_call_table:
 	PTR	sys_ioprio_get
 	PTR	sys_utimensat			/* 5275 */
 	PTR	sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create		/* 5280 */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624995b..38a0503b9a4 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -89,8 +89,7 @@ n32_syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -403,7 +402,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ioprio_get
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd		/* 6280 */
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8c49a..91ea5e4041d 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -123,8 +123,7 @@ trace_a_syscall:
 
 	move	s0, t2			# Save syscall pointer
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
@@ -522,7 +521,7 @@ sys_call_table:
 	PTR	sys_ioprio_get			/* 4315 */
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys32_fallocate			/* 4320 */
 	PTR	sys_timerfd_create
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5a88cc4ccd5..cedac463374 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
 static void ipi_resched_interrupt(void)
 {
-	/* Return from interrupt should be enough to cause scheduler check */
+	scheduler_ipi();
 }
 
 static void ipi_call_interrupt(void)
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 58beabf50b3..d02765708dd 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -10,12 +10,9 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
-#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/mman.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
@@ -25,11 +22,9 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/compiler.h>
-#include <linux/module.h>
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/random.h>
 #include <linux/elf.h>
 
 #include <asm/asm.h>
@@ -66,121 +61,6 @@ out:
 	return res;
 }
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
-
-EXPORT_SYMBOL(shm_align_mask);
-
-#define COLOUR_ALIGN(addr,pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
-
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-	unsigned long len, unsigned long pgoff, unsigned long flags)
-{
-	struct vm_area_struct * vmm;
-	int do_color_align;
-	unsigned long task_size;
-
-#ifdef CONFIG_32BIT
-	task_size = TASK_SIZE;
-#else /* Must be CONFIG_64BIT*/
-	task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE;
-#endif
-
-	if (len > task_size)
-		return -ENOMEM;
-
-	if (flags & MAP_FIXED) {
-		/* Even MAP_FIXED mappings must reside within task_size.  */
-		if (task_size - len < addr)
-			return -EINVAL;
-
-		/*
-		 * We do not accept a shared mapping if it would violate
-		 * cache aliasing constraints.
-		 */
-		if ((flags & MAP_SHARED) &&
-		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
-			return -EINVAL;
-		return addr;
-	}
-
-	do_color_align = 0;
-	if (filp || (flags & MAP_SHARED))
-		do_color_align = 1;
-	if (addr) {
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-		else
-			addr = PAGE_ALIGN(addr);
-		vmm = find_vma(current->mm, addr);
-		if (task_size - len >= addr &&
-		    (!vmm || addr + len <= vmm->vm_start))
-			return addr;
-	}
-	addr = current->mm->mmap_base;
-	if (do_color_align)
-		addr = COLOUR_ALIGN(addr, pgoff);
-	else
-		addr = PAGE_ALIGN(addr);
-
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (task_size - len < addr)
-			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
-			return addr;
-		addr = vmm->vm_end;
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-	}
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	unsigned long random_factor = 0UL;
-
-	if (current->flags & PF_RANDOMIZE) {
-		random_factor = get_random_int();
-		random_factor = random_factor << PAGE_SHIFT;
-		if (TASK_IS_32BIT_ADDR)
-			random_factor &= 0xfffffful;
-		else
-			random_factor &= 0xffffffful;
-	}
-
-	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
-}
-
-static inline unsigned long brk_rnd(void)
-{
-	unsigned long rnd = get_random_int();
-
-	rnd = rnd << PAGE_SHIFT;
-	/* 8MB for 32bit, 256MB for 64bit */
-	if (TASK_IS_32BIT_ADDR)
-		rnd = rnd & 0x7ffffful;
-	else
-		rnd = rnd & 0xffffffful;
-
-	return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long base = mm->brk;
-	unsigned long ret;
-
-	ret = PAGE_ALIGN(base + brk_rnd());
-
-	if (ret < mm->brk)
-		return mm->brk;
-
-	return ret;
-}
-
 SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags, unsigned long,
 	fd, off_t, offset)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d8..e9b3af27d84 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	unsigned long dvpret = dvpe();
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV);
+	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
+		sig = 0;
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	mips_mt_regdump(dvpret);
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
-		sig = 0;
-
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
 	add_taint(TAINT_DIE);
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 832afbb8758..01af3876cf9 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -65,15 +65,18 @@ SECTIONS
 	NOTES :text :note
 	.dummy : { *(.dummy) } :text
 
+	_sdata = .;			/* Start of data section */
 	RODATA
 
 	/* writeable */
+	_sdata = .;				/* Start of data section */
 	.data : {	/* Data */
 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
 
 		INIT_TASK_DATA(PAGE_SIZE)
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 		DATA_DATA
 		CONSTRUCTORS
 	}
diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
new file mode 100644
index 00000000000..3fccf210451
--- /dev/null
+++ b/arch/mips/lantiq/Kconfig
@@ -0,0 +1,23 @@
+if LANTIQ
+
+config SOC_TYPE_XWAY
+	bool
+	default n
+
+choice
+	prompt "SoC Type"
+	default SOC_XWAY
+
+config SOC_AMAZON_SE
+	bool "Amazon SE"
+	select SOC_TYPE_XWAY
+
+config SOC_XWAY
+	bool "XWAY"
+	select SOC_TYPE_XWAY
+	select HW_HAS_PCI
+endchoice
+
+source "arch/mips/lantiq/xway/Kconfig"
+
+endif
diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
new file mode 100644
index 00000000000..e5dae0e24b0
--- /dev/null
+++ b/arch/mips/lantiq/Makefile
@@ -0,0 +1,11 @@
+# Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+
+obj-y := irq.o setup.o clk.o prom.o devices.o
+
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+
+obj-$(CONFIG_SOC_TYPE_XWAY) += xway/
diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform
new file mode 100644
index 00000000000..f3dff05722d
--- /dev/null
+++ b/arch/mips/lantiq/Platform
@@ -0,0 +1,8 @@
+#
+# Lantiq
+#
+
+platform-$(CONFIG_LANTIQ)	+= lantiq/
+cflags-$(CONFIG_LANTIQ)		+= -I$(srctree)/arch/mips/include/asm/mach-lantiq
+load-$(CONFIG_LANTIQ)		= 0xffffffff80002000
+cflags-$(CONFIG_SOC_TYPE_XWAY)	+= -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
new file mode 100644
index 00000000000..94560899d13
--- /dev/null
+++ b/arch/mips/lantiq/clk.c
@@ -0,0 +1,140 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/list.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+#include "clk.h"
+
+struct clk {
+	const char *name;
+	unsigned long rate;
+	unsigned long (*get_rate) (void);
+};
+
+static struct clk *cpu_clk;
+static int cpu_clk_cnt;
+
+/* lantiq socs have 3 static clocks */
+static struct clk cpu_clk_generic[] = {
+	{
+		.name = "cpu",
+		.get_rate = ltq_get_cpu_hz,
+	}, {
+		.name = "fpi",
+		.get_rate = ltq_get_fpi_hz,
+	}, {
+		.name = "io",
+		.get_rate = ltq_get_io_region_clock,
+	},
+};
+
+static struct resource ltq_cgu_resource = {
+	.name	= "cgu",
+	.start	= LTQ_CGU_BASE_ADDR,
+	.end	= LTQ_CGU_BASE_ADDR + LTQ_CGU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped clock register range */
+void __iomem *ltq_cgu_membase;
+
+void clk_init(void)
+{
+	cpu_clk = cpu_clk_generic;
+	cpu_clk_cnt = ARRAY_SIZE(cpu_clk_generic);
+}
+
+static inline int clk_good(struct clk *clk)
+{
+	return clk && !IS_ERR(clk);
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+
+	if (clk->rate != 0)
+		return clk->rate;
+
+	if (clk->get_rate != NULL)
+		return clk->get_rate();
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	int i;
+
+	for (i = 0; i < cpu_clk_cnt; i++)
+		if (!strcmp(id, cpu_clk[i].name))
+			return &cpu_clk[i];
+	BUG();
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	/* not used */
+}
+EXPORT_SYMBOL(clk_put);
+
+static inline u32 ltq_get_counter_resolution(void)
+{
+	u32 res;
+
+	__asm__ __volatile__(
+		".set   push\n"
+		".set   mips32r2\n"
+		"rdhwr  %0, $3\n"
+		".set pop\n"
+		: "=&r" (res)
+		: /* no input */
+		: "memory");
+
+	return res;
+}
+
+void __init plat_time_init(void)
+{
+	struct clk *clk;
+
+	if (insert_resource(&iomem_resource, &ltq_cgu_resource) < 0)
+		panic("Failed to insert cgu memory\n");
+
+	if (request_mem_region(ltq_cgu_resource.start,
+			resource_size(&ltq_cgu_resource), "cgu") < 0)
+		panic("Failed to request cgu memory\n");
+
+	ltq_cgu_membase = ioremap_nocache(ltq_cgu_resource.start,
+				resource_size(&ltq_cgu_resource));
+	if (!ltq_cgu_membase) {
+		pr_err("Failed to remap cgu memory\n");
+		unreachable();
+	}
+	clk = clk_get(0, "cpu");
+	mips_hpt_frequency = clk_get_rate(clk) / ltq_get_counter_resolution();
+	write_c0_compare(read_c0_count());
+	clk_put(clk);
+}
diff --git a/arch/mips/lantiq/clk.h b/arch/mips/lantiq/clk.h
new file mode 100644
index 00000000000..3328925f2c3
--- /dev/null
+++ b/arch/mips/lantiq/clk.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_CLK_H__
+#define _LTQ_CLK_H__
+
+extern void clk_init(void);
+
+extern unsigned long ltq_get_cpu_hz(void);
+extern unsigned long ltq_get_fpi_hz(void);
+extern unsigned long ltq_get_io_region_clock(void);
+
+#endif
diff --git a/arch/mips/lantiq/devices.c b/arch/mips/lantiq/devices.c
new file mode 100644
index 00000000000..7b82c34cb16
--- /dev/null
+++ b/arch/mips/lantiq/devices.c
@@ -0,0 +1,122 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+
+#include "devices.h"
+
+/* nor flash */
+static struct resource ltq_nor_resource = {
+	.name	= "nor",
+	.start	= LTQ_FLASH_START,
+	.end	= LTQ_FLASH_START + LTQ_FLASH_MAX - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_nor = {
+	.name		= "ltq_nor",
+	.resource	= &ltq_nor_resource,
+	.num_resources	= 1,
+};
+
+void __init ltq_register_nor(struct physmap_flash_data *data)
+{
+	ltq_nor.dev.platform_data = data;
+	platform_device_register(&ltq_nor);
+}
+
+/* watchdog */
+static struct resource ltq_wdt_resource = {
+	.name	= "watchdog",
+	.start  = LTQ_WDT_BASE_ADDR,
+	.end    = LTQ_WDT_BASE_ADDR + LTQ_WDT_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_wdt(void)
+{
+	platform_device_register_simple("ltq_wdt", 0, &ltq_wdt_resource, 1);
+}
+
+/* asc ports */
+static struct resource ltq_asc0_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC0_BASE_ADDR,
+		.end    = LTQ_ASC0_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(0)),
+	IRQ_RES(rx, LTQ_ASC_RIR(0)),
+	IRQ_RES(err, LTQ_ASC_EIR(0)),
+};
+
+static struct resource ltq_asc1_resources[] = {
+	{
+		.name	= "asc1",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(1)),
+	IRQ_RES(rx, LTQ_ASC_RIR(1)),
+	IRQ_RES(err, LTQ_ASC_EIR(1)),
+};
+
+void __init ltq_register_asc(int port)
+{
+	switch (port) {
+	case 0:
+		platform_device_register_simple("ltq_asc", 0,
+			ltq_asc0_resources, ARRAY_SIZE(ltq_asc0_resources));
+		break;
+	case 1:
+		platform_device_register_simple("ltq_asc", 1,
+			ltq_asc1_resources, ARRAY_SIZE(ltq_asc1_resources));
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_PCI
+/* pci */
+static struct platform_device ltq_pci = {
+	.name		= "ltq_pci",
+	.num_resources	= 0,
+};
+
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	ltq_pci.dev.platform_data = data;
+	platform_device_register(&ltq_pci);
+}
+#else
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	pr_err("kernel is compiled without PCI support\n");
+}
+#endif
diff --git a/arch/mips/lantiq/devices.h b/arch/mips/lantiq/devices.h
new file mode 100644
index 00000000000..2947bb19a52
--- /dev/null
+++ b/arch/mips/lantiq/devices.h
@@ -0,0 +1,23 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_H__
+#define _LTQ_DEVICES_H__
+
+#include <lantiq_platform.h>
+#include <linux/mtd/physmap.h>
+
+#define IRQ_RES(resname, irq) \
+	{.name = #resname, .start = (irq), .flags = IORESOURCE_IRQ}
+
+extern void ltq_register_nor(struct physmap_flash_data *data);
+extern void ltq_register_wdt(void);
+extern void ltq_register_asc(int port);
+extern void ltq_register_pci(struct ltq_pci_data *data);
+
+#endif
diff --git a/arch/mips/lantiq/early_printk.c b/arch/mips/lantiq/early_printk.c
new file mode 100644
index 00000000000..972e05f8763
--- /dev/null
+++ b/arch/mips/lantiq/early_printk.c
@@ -0,0 +1,33 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+
+#include <lantiq.h>
+#include <lantiq_soc.h>
+
+/* no ioremap possible at this early stage, lets use KSEG1 instead  */
+#define LTQ_ASC_BASE	KSEG1ADDR(LTQ_ASC1_BASE_ADDR)
+#define ASC_BUF		1024
+#define LTQ_ASC_FSTAT	((u32 *)(LTQ_ASC_BASE + 0x0048))
+#define LTQ_ASC_TBUF	((u32 *)(LTQ_ASC_BASE + 0x0020))
+#define TXMASK		0x3F00
+#define TXOFFSET	8
+
+void prom_putchar(char c)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	do { } while ((ltq_r32(LTQ_ASC_FSTAT) & TXMASK) >> TXOFFSET);
+	if (c == '\n')
+		ltq_w32('\r', LTQ_ASC_TBUF);
+	ltq_w32(c, LTQ_ASC_TBUF);
+	local_irq_restore(flags);
+}
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
new file mode 100644
index 00000000000..fc89795cafd
--- /dev/null
+++ b/arch/mips/lantiq/irq.c
@@ -0,0 +1,326 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq_cpu.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+/* register definitions */
+#define LTQ_ICU_IM0_ISR		0x0000
+#define LTQ_ICU_IM0_IER		0x0008
+#define LTQ_ICU_IM0_IOSR	0x0010
+#define LTQ_ICU_IM0_IRSR	0x0018
+#define LTQ_ICU_IM0_IMR		0x0020
+#define LTQ_ICU_IM1_ISR		0x0028
+#define LTQ_ICU_OFFSET		(LTQ_ICU_IM1_ISR - LTQ_ICU_IM0_ISR)
+
+#define LTQ_EIU_EXIN_C		0x0000
+#define LTQ_EIU_EXIN_INIC	0x0004
+#define LTQ_EIU_EXIN_INEN	0x000C
+
+/* irq numbers used by the external interrupt unit (EIU) */
+#define LTQ_EIU_IR0		(INT_NUM_IM4_IRL0 + 30)
+#define LTQ_EIU_IR1		(INT_NUM_IM3_IRL0 + 31)
+#define LTQ_EIU_IR2		(INT_NUM_IM1_IRL0 + 26)
+#define LTQ_EIU_IR3		INT_NUM_IM1_IRL0
+#define LTQ_EIU_IR4		(INT_NUM_IM1_IRL0 + 1)
+#define LTQ_EIU_IR5		(INT_NUM_IM1_IRL0 + 2)
+#define LTQ_EIU_IR6		(INT_NUM_IM2_IRL0 + 30)
+
+#define MAX_EIU			6
+
+/* irqs generated by device attached to the EBU need to be acked in
+ * a special manner
+ */
+#define LTQ_ICU_EBU_IRQ		22
+
+#define ltq_icu_w32(x, y)	ltq_w32((x), ltq_icu_membase + (y))
+#define ltq_icu_r32(x)		ltq_r32(ltq_icu_membase + (x))
+
+#define ltq_eiu_w32(x, y)	ltq_w32((x), ltq_eiu_membase + (y))
+#define ltq_eiu_r32(x)		ltq_r32(ltq_eiu_membase + (x))
+
+static unsigned short ltq_eiu_irq[MAX_EIU] = {
+	LTQ_EIU_IR0,
+	LTQ_EIU_IR1,
+	LTQ_EIU_IR2,
+	LTQ_EIU_IR3,
+	LTQ_EIU_IR4,
+	LTQ_EIU_IR5,
+};
+
+static struct resource ltq_icu_resource = {
+	.name	= "icu",
+	.start	= LTQ_ICU_BASE_ADDR,
+	.end	= LTQ_ICU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource ltq_eiu_resource = {
+	.name	= "eiu",
+	.start	= LTQ_EIU_BASE_ADDR,
+	.end	= LTQ_EIU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_icu_membase;
+static void __iomem *ltq_eiu_membase;
+
+void ltq_disable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+}
+
+void ltq_mask_and_ack_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+static void ltq_ack_irq(struct irq_data *d)
+{
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+void ltq_enable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET  * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) | (1 << irq_nr), ier);
+}
+
+static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_enable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* low level - we should really handle set_type */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) |
+				(0x6 << (i * 4)), LTQ_EIU_EXIN_C);
+			/* clear all pending */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INIC) & ~(1 << i),
+				LTQ_EIU_EXIN_INIC);
+			/* enable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) | (1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void ltq_shutdown_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_disable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* disable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+}
+
+static struct irq_chip ltq_irq_type = {
+	"icu",
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static struct irq_chip ltq_eiu_type = {
+	"eiu",
+	.irq_startup = ltq_startup_eiu_irq,
+	.irq_shutdown = ltq_shutdown_eiu_irq,
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static void ltq_hw_irqdispatch(int module)
+{
+	u32 irq;
+
+	irq = ltq_icu_r32(LTQ_ICU_IM0_IOSR + (module * LTQ_ICU_OFFSET));
+	if (irq == 0)
+		return;
+
+	/* silicon bug causes only the msb set to 1 to be valid. all
+	 * other bits might be bogus
+	 */
+	irq = __fls(irq);
+	do_IRQ((int)irq + INT_NUM_IM0_IRL0 + (INT_NUM_IM_OFFSET * module));
+
+	/* if this is a EBU irq, we need to ack it or get a deadlock */
+	if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0))
+		ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10,
+			LTQ_EBU_PCC_ISTAT);
+}
+
+#define DEFINE_HWx_IRQDISPATCH(x)					\
+	static void ltq_hw ## x ## _irqdispatch(void)			\
+	{								\
+		ltq_hw_irqdispatch(x);					\
+	}
+DEFINE_HWx_IRQDISPATCH(0)
+DEFINE_HWx_IRQDISPATCH(1)
+DEFINE_HWx_IRQDISPATCH(2)
+DEFINE_HWx_IRQDISPATCH(3)
+DEFINE_HWx_IRQDISPATCH(4)
+
+static void ltq_hw5_irqdispatch(void)
+{
+	do_IRQ(MIPS_CPU_TIMER_IRQ);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
+	unsigned int i;
+
+	if (pending & CAUSEF_IP7) {
+		do_IRQ(MIPS_CPU_TIMER_IRQ);
+		goto out;
+	} else {
+		for (i = 0; i < 5; i++) {
+			if (pending & (CAUSEF_IP2 << i)) {
+				ltq_hw_irqdispatch(i);
+				goto out;
+			}
+		}
+	}
+	pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
+
+out:
+	return;
+}
+
+static struct irqaction cascade = {
+	.handler = no_action,
+	.flags = IRQF_DISABLED,
+	.name = "cascade",
+};
+
+void __init arch_init_irq(void)
+{
+	int i;
+
+	if (insert_resource(&iomem_resource, &ltq_icu_resource) < 0)
+		panic("Failed to insert icu memory\n");
+
+	if (request_mem_region(ltq_icu_resource.start,
+			resource_size(&ltq_icu_resource), "icu") < 0)
+		panic("Failed to request icu memory\n");
+
+	ltq_icu_membase = ioremap_nocache(ltq_icu_resource.start,
+				resource_size(&ltq_icu_resource));
+	if (!ltq_icu_membase)
+		panic("Failed to remap icu memory\n");
+
+	if (insert_resource(&iomem_resource, &ltq_eiu_resource) < 0)
+		panic("Failed to insert eiu memory\n");
+
+	if (request_mem_region(ltq_eiu_resource.start,
+			resource_size(&ltq_eiu_resource), "eiu") < 0)
+		panic("Failed to request eiu memory\n");
+
+	ltq_eiu_membase = ioremap_nocache(ltq_eiu_resource.start,
+				resource_size(&ltq_eiu_resource));
+	if (!ltq_eiu_membase)
+		panic("Failed to remap eiu memory\n");
+
+	/* make sure all irqs are turned off by default */
+	for (i = 0; i < 5; i++)
+		ltq_icu_w32(0, LTQ_ICU_IM0_IER + (i * LTQ_ICU_OFFSET));
+
+	/* clear all possibly pending interrupts */
+	ltq_icu_w32(~0, LTQ_ICU_IM0_ISR + (i * LTQ_ICU_OFFSET));
+
+	mips_cpu_irq_init();
+
+	for (i = 2; i <= 6; i++)
+		setup_irq(i, &cascade);
+
+	if (cpu_has_vint) {
+		pr_info("Setting up vectored interrupts\n");
+		set_vi_handler(2, ltq_hw0_irqdispatch);
+		set_vi_handler(3, ltq_hw1_irqdispatch);
+		set_vi_handler(4, ltq_hw2_irqdispatch);
+		set_vi_handler(5, ltq_hw3_irqdispatch);
+		set_vi_handler(6, ltq_hw4_irqdispatch);
+		set_vi_handler(7, ltq_hw5_irqdispatch);
+	}
+
+	for (i = INT_NUM_IRQ0;
+		i <= (INT_NUM_IRQ0 + (5 * INT_NUM_IM_OFFSET)); i++)
+		if ((i == LTQ_EIU_IR0) || (i == LTQ_EIU_IR1) ||
+			(i == LTQ_EIU_IR2))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		/* EIU3-5 only exist on ar9 and vr9 */
+		else if (((i == LTQ_EIU_IR3) || (i == LTQ_EIU_IR4) ||
+			(i == LTQ_EIU_IR5)) && (ltq_is_ar9() || ltq_is_vr9()))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &ltq_irq_type,
+				handle_level_irq);
+
+#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
+	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
+		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#else
+	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
+		IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#endif
+}
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return CP0_LEGACY_COMPARE_IRQ;
+}
diff --git a/arch/mips/lantiq/machtypes.h b/arch/mips/lantiq/machtypes.h
new file mode 100644
index 00000000000..7e01b8c484e
--- /dev/null
+++ b/arch/mips/lantiq/machtypes.h
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_MACH_H__
+#define _LANTIQ_MACH_H__
+
+#include <asm/mips_machine.h>
+
+enum lantiq_mach_type {
+	LTQ_MACH_GENERIC = 0,
+	LTQ_MACH_EASY50712,	/* Danube evaluation board */
+	LTQ_MACH_EASY50601,	/* Amazon SE evaluation board */
+};
+
+#endif
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
new file mode 100644
index 00000000000..56ba007bf1e
--- /dev/null
+++ b/arch/mips/lantiq/prom.c
@@ -0,0 +1,71 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq.h>
+
+#include "prom.h"
+#include "clk.h"
+
+static struct ltq_soc_info soc_info;
+
+unsigned int ltq_get_cpu_ver(void)
+{
+	return soc_info.rev;
+}
+EXPORT_SYMBOL(ltq_get_cpu_ver);
+
+unsigned int ltq_get_soc_type(void)
+{
+	return soc_info.type;
+}
+EXPORT_SYMBOL(ltq_get_soc_type);
+
+const char *get_system_type(void)
+{
+	return soc_info.sys_type;
+}
+
+void prom_free_prom_memory(void)
+{
+}
+
+static void __init prom_init_cmdline(void)
+{
+	int argc = fw_arg0;
+	char **argv = (char **) KSEG1ADDR(fw_arg1);
+	int i;
+
+	for (i = 0; i < argc; i++) {
+		char *p = (char *)  KSEG1ADDR(argv[i]);
+
+		if (p && *p) {
+			strlcat(arcs_cmdline, p, sizeof(arcs_cmdline));
+			strlcat(arcs_cmdline, " ", sizeof(arcs_cmdline));
+		}
+	}
+}
+
+void __init prom_init(void)
+{
+	struct clk *clk;
+
+	ltq_soc_detect(&soc_info);
+	clk_init();
+	clk = clk_get(0, "cpu");
+	snprintf(soc_info.sys_type, LTQ_SYS_TYPE_LEN - 1, "%s rev1.%d",
+		soc_info.name, soc_info.rev);
+	clk_put(clk);
+	soc_info.sys_type[LTQ_SYS_TYPE_LEN - 1] = '\0';
+	pr_info("SoC: %s\n", soc_info.sys_type);
+	prom_init_cmdline();
+}
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
new file mode 100644
index 00000000000..b4229d94280
--- /dev/null
+++ b/arch/mips/lantiq/prom.h
@@ -0,0 +1,25 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PROM_H__
+#define _LTQ_PROM_H__
+
+#define LTQ_SYS_TYPE_LEN	0x100
+
+struct ltq_soc_info {
+	unsigned char *name;
+	unsigned int rev;
+	unsigned int partnum;
+	unsigned int type;
+	unsigned char sys_type[LTQ_SYS_TYPE_LEN];
+};
+
+extern void ltq_soc_detect(struct ltq_soc_info *i);
+extern void ltq_soc_setup(void);
+
+#endif
diff --git a/arch/mips/lantiq/setup.c b/arch/mips/lantiq/setup.c
new file mode 100644
index 00000000000..9b8af77ed0f
--- /dev/null
+++ b/arch/mips/lantiq/setup.c
@@ -0,0 +1,66 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <asm/bootinfo.h>
+
+#include <lantiq_soc.h>
+
+#include "machtypes.h"
+#include "devices.h"
+#include "prom.h"
+
+void __init plat_mem_setup(void)
+{
+	/* assume 16M as default incase uboot fails to pass proper ramsize */
+	unsigned long memsize = 16;
+	char **envp = (char **) KSEG1ADDR(fw_arg2);
+
+	ioport_resource.start = IOPORT_RESOURCE_START;
+	ioport_resource.end = IOPORT_RESOURCE_END;
+	iomem_resource.start = IOMEM_RESOURCE_START;
+	iomem_resource.end = IOMEM_RESOURCE_END;
+
+	set_io_port_base((unsigned long) KSEG1);
+
+	while (*envp) {
+		char *e = (char *)KSEG1ADDR(*envp);
+		if (!strncmp(e, "memsize=", 8)) {
+			e += 8;
+			if (strict_strtoul(e, 0, &memsize))
+				pr_warn("bad memsize specified\n");
+		}
+		envp++;
+	}
+	memsize *= 1024 * 1024;
+	add_memory_region(0x00000000, memsize, BOOT_MEM_RAM);
+}
+
+static int __init
+lantiq_setup(void)
+{
+	ltq_soc_setup();
+	mips_machine_setup();
+	return 0;
+}
+
+arch_initcall(lantiq_setup);
+
+static void __init
+lantiq_generic_init(void)
+{
+	/* Nothing to do */
+}
+
+MIPS_MACHINE(LTQ_MACH_GENERIC,
+	     "Generic",
+	     "Generic Lantiq based board",
+	     lantiq_generic_init);
diff --git a/arch/mips/lantiq/xway/Kconfig b/arch/mips/lantiq/xway/Kconfig
new file mode 100644
index 00000000000..2b857de3662
--- /dev/null
+++ b/arch/mips/lantiq/xway/Kconfig
@@ -0,0 +1,23 @@
+if SOC_XWAY
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50712
+	bool "Easy50712 - Danube"
+	default y
+
+endmenu
+
+endif
+
+if SOC_AMAZON_SE
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50601
+	bool "Easy50601 - Amazon SE"
+	default y
+
+endmenu
+
+endif
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
new file mode 100644
index 00000000000..c517f2e7756
--- /dev/null
+++ b/arch/mips/lantiq/xway/Makefile
@@ -0,0 +1,7 @@
+obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o dma.o
+
+obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
+obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
+
+obj-$(CONFIG_LANTIQ_MACH_EASY50712) += mach-easy50712.o
+obj-$(CONFIG_LANTIQ_MACH_EASY50601) += mach-easy50601.o
diff --git a/arch/mips/lantiq/xway/clk-ase.c b/arch/mips/lantiq/xway/clk-ase.c
new file mode 100644
index 00000000000..22d823acd53
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-ase.c
@@ -0,0 +1,48 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+/* cgu registers */
+#define LTQ_CGU_SYS	0x0010
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & (1 << 5))
+		return CLOCK_266M;
+	else
+		return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/clk-xway.c b/arch/mips/lantiq/xway/clk-xway.c
new file mode 100644
index 00000000000..ddd39593c58
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-xway.c
@@ -0,0 +1,223 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+static unsigned int ltq_ram_clocks[] = {
+	CLOCK_167M, CLOCK_133M, CLOCK_111M, CLOCK_83M };
+#define DDR_HZ ltq_ram_clocks[ltq_cgu_r32(LTQ_CGU_SYS) & 0x3]
+
+#define BASIC_FREQUENCY_1	35328000
+#define BASIC_FREQUENCY_2	36000000
+#define BASIS_REQUENCY_USB	12000000
+
+#define GET_BITS(x, msb, lsb) \
+	(((x) & ((1 << ((msb) + 1)) - 1)) >> (lsb))
+
+#define LTQ_CGU_PLL0_CFG	0x0004
+#define LTQ_CGU_PLL1_CFG	0x0008
+#define LTQ_CGU_PLL2_CFG	0x000C
+#define LTQ_CGU_SYS		0x0010
+#define LTQ_CGU_UPDATE		0x0014
+#define LTQ_CGU_IF_CLK		0x0018
+#define LTQ_CGU_OSC_CON		0x001C
+#define LTQ_CGU_SMD		0x0020
+#define LTQ_CGU_CT1SR		0x0028
+#define LTQ_CGU_CT2SR		0x002C
+#define LTQ_CGU_PCMCR		0x0030
+#define LTQ_CGU_PCI_CR		0x0034
+#define LTQ_CGU_PD_PC		0x0038
+#define LTQ_CGU_FMR		0x003C
+
+#define CGU_PLL0_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 31))
+#define CGU_PLL0_BYPASS			\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 30))
+#define CGU_PLL0_CFG_DSMSEL		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 28))
+#define CGU_PLL0_CFG_FRAC_EN		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 27))
+#define CGU_PLL1_SRC			\
+	(ltq_cgu_r32(LTQ_CGU_PLL1_CFG) & (1 << 31))
+#define CGU_PLL2_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & (1 << 20))
+#define CGU_SYS_FPI_SEL			(1 << 6)
+#define CGU_SYS_DDR_SEL			0x3
+#define CGU_PLL0_SRC			(1 << 29)
+
+#define CGU_PLL0_CFG_PLLK	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 26, 17)
+#define CGU_PLL0_CFG_PLLN	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 12, 6)
+#define CGU_PLL0_CFG_PLLM	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 5, 2)
+#define CGU_PLL2_SRC		GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 18, 17)
+#define CGU_PLL2_CFG_INPUT_DIV	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 16, 13)
+
+static unsigned int ltq_get_pll0_fdiv(void);
+
+static inline unsigned int get_input_clock(int pll)
+{
+	switch (pll) {
+	case 0:
+		if (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & CGU_PLL0_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 1:
+		if (CGU_PLL1_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 2:
+		switch (CGU_PLL2_SRC) {
+		case 0:
+			return ltq_get_pll0_fdiv();
+		case 1:
+			return CGU_PLL2_PHASE_DIVIDER_ENABLE ?
+				BASIC_FREQUENCY_1 :
+				BASIC_FREQUENCY_2;
+		case 2:
+			return BASIS_REQUENCY_USB;
+		}
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int cal_dsm(int pll, unsigned int num, unsigned int den)
+{
+	u64 res, clock = get_input_clock(pll);
+
+	res = num * clock;
+	do_div(res, den);
+	return res;
+}
+
+static inline unsigned int mash_dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 10) + K;
+	unsigned int den = (M + 1) << 10;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_1(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 11) + K + 512;
+	unsigned int den = (M + 1) << 11;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_2(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = K >= 512 ?
+		((N + 1) << 12) + K - 512 : ((N + 1) << 12) + K + 3584;
+	unsigned int den = (M + 1) << 12;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K, unsigned int dsmsel, unsigned int phase_div_en)
+{
+	if (!dsmsel)
+		return mash_dsm(pll, M, N, K);
+	else if (!phase_div_en)
+		return mash_dsm(pll, M, N, K);
+	else
+		return ssff_dsm_2(pll, M, N, K);
+}
+
+static inline unsigned int ltq_get_pll0_fosc(void)
+{
+	if (CGU_PLL0_BYPASS)
+		return get_input_clock(0);
+	else
+		return !CGU_PLL0_CFG_FRAC_EN
+			? dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN, 0,
+				CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			: dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN,
+				CGU_PLL0_CFG_PLLK, CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE);
+}
+
+static unsigned int ltq_get_pll0_fdiv(void)
+{
+	unsigned int div = CGU_PLL2_CFG_INPUT_DIV + 1;
+
+	return (ltq_get_pll0_fosc() + (div >> 1)) / div;
+}
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	unsigned int ret = ltq_get_pll0_fosc();
+
+	switch (ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & CGU_SYS_DDR_SEL) {
+	default:
+	case 0:
+		return (ret + 1) / 2;
+	case 1:
+		return (ret * 2 + 2) / 5;
+	case 2:
+		return (ret + 1) / 3;
+	case 3:
+		return (ret + 2) / 4;
+	}
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	unsigned int ret = ltq_get_io_region_clock();
+
+	if ((fpi == 2) && (ltq_cgu_r32(LTQ_CGU_SYS) & CGU_SYS_FPI_SEL))
+		ret >>= 1;
+	return ret;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	switch (ltq_cgu_r32(LTQ_CGU_SYS) & 0xc) {
+	case 0:
+		return CLOCK_333M;
+	case 4:
+		return DDR_HZ;
+	case 8:
+		return DDR_HZ << 1;
+	default:
+		return DDR_HZ >> 1;
+	}
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	unsigned int ddr_clock = DDR_HZ;
+
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & 0x40)
+		return ddr_clock >> 1;
+	return ddr_clock;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/devices.c b/arch/mips/lantiq/xway/devices.c
new file mode 100644
index 00000000000..e09e789dfc2
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.c
@@ -0,0 +1,121 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mtd/physmap.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "devices.h"
+
+/* gpio */
+static struct resource ltq_gpio_resource[] = {
+	{
+		.name	= "gpio0",
+		.start  = LTQ_GPIO0_BASE_ADDR,
+		.end    = LTQ_GPIO0_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio1",
+		.start  = LTQ_GPIO1_BASE_ADDR,
+		.end    = LTQ_GPIO1_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio2",
+		.start  = LTQ_GPIO2_BASE_ADDR,
+		.end    = LTQ_GPIO2_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}
+};
+
+void __init ltq_register_gpio(void)
+{
+	platform_device_register_simple("ltq_gpio", 0,
+		&ltq_gpio_resource[0], 1);
+	platform_device_register_simple("ltq_gpio", 1,
+		&ltq_gpio_resource[1], 1);
+
+	/* AR9 and VR9 have an extra gpio block */
+	if (ltq_is_ar9() || ltq_is_vr9()) {
+		platform_device_register_simple("ltq_gpio", 2,
+			&ltq_gpio_resource[2], 1);
+	}
+}
+
+/* serial to parallel conversion */
+static struct resource ltq_stp_resource = {
+	.name   = "stp",
+	.start  = LTQ_STP_BASE_ADDR,
+	.end    = LTQ_STP_BASE_ADDR + LTQ_STP_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_gpio_stp(void)
+{
+	platform_device_register_simple("ltq_stp", 0, &ltq_stp_resource, 1);
+}
+
+/* asc ports - amazon se has its own serial mapping */
+static struct resource ltq_ase_asc_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_ASE_TIR),
+	IRQ_RES(rx, LTQ_ASC_ASE_RIR),
+	IRQ_RES(err, LTQ_ASC_ASE_EIR),
+};
+
+void __init ltq_register_ase_asc(void)
+{
+	platform_device_register_simple("ltq_asc", 0,
+		ltq_ase_asc_resources, ARRAY_SIZE(ltq_ase_asc_resources));
+}
+
+/* ethernet */
+static struct resource ltq_etop_resources = {
+	.name	= "etop",
+	.start	= LTQ_ETOP_BASE_ADDR,
+	.end	= LTQ_ETOP_BASE_ADDR + LTQ_ETOP_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_etop = {
+	.name		= "ltq_etop",
+	.resource	= &ltq_etop_resources,
+	.num_resources	= 1,
+};
+
+void __init
+ltq_register_etop(struct ltq_eth_data *eth)
+{
+	if (eth) {
+		ltq_etop.dev.platform_data = eth;
+		platform_device_register(&ltq_etop);
+	}
+}
diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h
new file mode 100644
index 00000000000..e90493471bc
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.h
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_XWAY_H__
+#define _LTQ_DEVICES_XWAY_H__
+
+#include "../devices.h"
+#include <linux/phy.h>
+
+extern void ltq_register_gpio(void);
+extern void ltq_register_gpio_stp(void);
+extern void ltq_register_ase_asc(void);
+extern void ltq_register_etop(struct ltq_eth_data *eth);
+
+#endif
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
new file mode 100644
index 00000000000..4278a459d6c
--- /dev/null
+++ b/arch/mips/lantiq/xway/dma.c
@@ -0,0 +1,253 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+
+#include <lantiq_soc.h>
+#include <xway_dma.h>
+
+#define LTQ_DMA_CTRL		0x10
+#define LTQ_DMA_CPOLL		0x14
+#define LTQ_DMA_CS		0x18
+#define LTQ_DMA_CCTRL		0x1C
+#define LTQ_DMA_CDBA		0x20
+#define LTQ_DMA_CDLEN		0x24
+#define LTQ_DMA_CIS		0x28
+#define LTQ_DMA_CIE		0x2C
+#define LTQ_DMA_PS		0x40
+#define LTQ_DMA_PCTRL		0x44
+#define LTQ_DMA_IRNEN		0xf4
+
+#define DMA_DESCPT		BIT(3)		/* descriptor complete irq */
+#define DMA_TX			BIT(8)		/* TX channel direction */
+#define DMA_CHAN_ON		BIT(0)		/* channel on / off bit */
+#define DMA_PDEN		BIT(6)		/* enable packet drop */
+#define DMA_CHAN_RST		BIT(1)		/* channel on / off bit */
+#define DMA_RESET		BIT(0)		/* channel on / off bit */
+#define DMA_IRQ_ACK		0x7e		/* IRQ status register */
+#define DMA_POLL		BIT(31)		/* turn on channel polling */
+#define DMA_CLK_DIV4		BIT(6)		/* polling clock divider */
+#define DMA_2W_BURST		BIT(1)		/* 2 word burst length */
+#define DMA_MAX_CHANNEL		20		/* the soc has 20 channels */
+#define DMA_ETOP_ENDIANESS	(0xf << 8) /* endianess swap etop channels */
+#define DMA_WEIGHT	(BIT(17) | BIT(16))	/* default channel wheight */
+
+#define ltq_dma_r32(x)			ltq_r32(ltq_dma_membase + (x))
+#define ltq_dma_w32(x, y)		ltq_w32(x, ltq_dma_membase + (y))
+#define ltq_dma_w32_mask(x, y, z)	ltq_w32_mask(x, y, \
+						ltq_dma_membase + (z))
+
+static struct resource ltq_dma_resource = {
+	.name	= "dma",
+	.start	= LTQ_DMA_BASE_ADDR,
+	.end	= LTQ_DMA_BASE_ADDR + LTQ_DMA_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_dma_membase;
+
+void
+ltq_dma_enable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_enable_irq);
+
+void
+ltq_dma_disable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(1 << ch->nr, 0, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_disable_irq);
+
+void
+ltq_dma_ack_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(DMA_IRQ_ACK, LTQ_DMA_CIS);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_ack_irq);
+
+void
+ltq_dma_open(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, DMA_CHAN_ON, LTQ_DMA_CCTRL);
+	ltq_dma_enable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_open);
+
+void
+ltq_dma_close(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	ltq_dma_disable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_close);
+
+static void
+ltq_dma_alloc(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ch->desc = 0;
+	ch->desc_base = dma_alloc_coherent(NULL,
+				LTQ_DESC_NUM * LTQ_DESC_SIZE,
+				&ch->phys, GFP_ATOMIC);
+	memset(ch->desc_base, 0, LTQ_DESC_NUM * LTQ_DESC_SIZE);
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(ch->phys, LTQ_DMA_CDBA);
+	ltq_dma_w32(LTQ_DESC_NUM, LTQ_DMA_CDLEN);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	wmb();
+	ltq_dma_w32_mask(0, DMA_CHAN_RST, LTQ_DMA_CCTRL);
+	while (ltq_dma_r32(LTQ_DMA_CCTRL) & DMA_CHAN_RST)
+		;
+	local_irq_restore(flags);
+}
+
+void
+ltq_dma_alloc_tx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT | DMA_TX, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_tx);
+
+void
+ltq_dma_alloc_rx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_rx);
+
+void
+ltq_dma_free(struct ltq_dma_channel *ch)
+{
+	if (!ch->desc_base)
+		return;
+	ltq_dma_close(ch);
+	dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE,
+		ch->desc_base, ch->phys);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_free);
+
+void
+ltq_dma_init_port(int p)
+{
+	ltq_dma_w32(p, LTQ_DMA_PS);
+	switch (p) {
+	case DMA_PORT_ETOP:
+		/*
+		 * Tell the DMA engine to swap the endianess of data frames and
+		 * drop packets if the channel arbitration fails.
+		 */
+		ltq_dma_w32_mask(0, DMA_ETOP_ENDIANESS | DMA_PDEN,
+			LTQ_DMA_PCTRL);
+		break;
+
+	case DMA_PORT_DEU:
+		ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2),
+			LTQ_DMA_PCTRL);
+		break;
+
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ltq_dma_init_port);
+
+int __init
+ltq_dma_init(void)
+{
+	int i;
+
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_dma_resource) < 0)
+		panic("Failed to insert dma memory\n");
+
+	if (request_mem_region(ltq_dma_resource.start,
+			resource_size(&ltq_dma_resource), "dma") < 0)
+		panic("Failed to request dma memory\n");
+
+	/* remap dma register range */
+	ltq_dma_membase = ioremap_nocache(ltq_dma_resource.start,
+				resource_size(&ltq_dma_resource));
+	if (!ltq_dma_membase)
+		panic("Failed to remap dma memory\n");
+
+	/* power up and reset the dma engine */
+	ltq_pmu_enable(PMU_DMA);
+	ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL);
+
+	/* disable all interrupts */
+	ltq_dma_w32(0, LTQ_DMA_IRNEN);
+
+	/* reset/configure each channel */
+	for (i = 0; i < DMA_MAX_CHANNEL; i++) {
+		ltq_dma_w32(i, LTQ_DMA_CS);
+		ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL);
+		ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL);
+		ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	}
+	return 0;
+}
+
+postcore_initcall(ltq_dma_init);
diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c
new file mode 100644
index 00000000000..66eb52fa50a
--- /dev/null
+++ b/arch/mips/lantiq/xway/ebu.c
@@ -0,0 +1,53 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  EBU - the external bus unit attaches PCI, NOR and NAND
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* all access to the ebu must be locked */
+DEFINE_SPINLOCK(ebu_lock);
+EXPORT_SYMBOL_GPL(ebu_lock);
+
+static struct resource ltq_ebu_resource = {
+	.name	= "ebu",
+	.start	= LTQ_EBU_BASE_ADDR,
+	.end	= LTQ_EBU_BASE_ADDR + LTQ_EBU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped base addr of the clock unit and external bus unit */
+void __iomem *ltq_ebu_membase;
+
+static int __init lantiq_ebu_init(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_ebu_resource) < 0)
+		panic("Failed to insert ebu memory\n");
+
+	if (request_mem_region(ltq_ebu_resource.start,
+			resource_size(&ltq_ebu_resource), "ebu") < 0)
+		panic("Failed to request ebu memory\n");
+
+	/* remap ebu register range */
+	ltq_ebu_membase = ioremap_nocache(ltq_ebu_resource.start,
+				resource_size(&ltq_ebu_resource));
+	if (!ltq_ebu_membase)
+		panic("Failed to remap ebu memory\n");
+
+	/* make sure to unprotect the memory region where flash is located */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_BUSCON0) & ~EBU_WRDIS, LTQ_EBU_BUSCON0);
+	return 0;
+}
+
+postcore_initcall(lantiq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio.c b/arch/mips/lantiq/xway/gpio.c
new file mode 100644
index 00000000000..a321451a545
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio.c
@@ -0,0 +1,195 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_GPIO_OUT		0x00
+#define LTQ_GPIO_IN		0x04
+#define LTQ_GPIO_DIR		0x08
+#define LTQ_GPIO_ALTSEL0	0x0C
+#define LTQ_GPIO_ALTSEL1	0x10
+#define LTQ_GPIO_OD		0x14
+
+#define PINS_PER_PORT		16
+#define MAX_PORTS		3
+
+#define ltq_gpio_getbit(m, r, p)	(!!(ltq_r32(m + r) & (1 << p)))
+#define ltq_gpio_setbit(m, r, p)	ltq_w32_mask(0, (1 << p), m + r)
+#define ltq_gpio_clearbit(m, r, p)	ltq_w32_mask((1 << p), 0, m + r)
+
+struct ltq_gpio {
+	void __iomem *membase;
+	struct gpio_chip chip;
+};
+
+static struct ltq_gpio ltq_gpio_port[MAX_PORTS];
+
+int gpio_to_irq(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(gpio_to_irq);
+
+int irq_to_gpio(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(irq_to_gpio);
+
+int ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name)
+{
+	int id = 0;
+
+	if (pin >= (MAX_PORTS * PINS_PER_PORT))
+		return -EINVAL;
+	if (gpio_request(pin, name)) {
+		pr_err("failed to setup lantiq gpio: %s\n", name);
+		return -EBUSY;
+	}
+	if (dir)
+		gpio_direction_output(pin, 1);
+	else
+		gpio_direction_input(pin);
+	while (pin >= PINS_PER_PORT) {
+		pin -= PINS_PER_PORT;
+		id++;
+	}
+	if (alt0)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	if (alt1)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	return 0;
+}
+EXPORT_SYMBOL(ltq_gpio_request);
+
+static void ltq_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	if (value)
+		ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+	else
+		ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+}
+
+static int ltq_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	return ltq_gpio_getbit(ltq_gpio->membase, LTQ_GPIO_IN, offset);
+}
+
+static int ltq_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+
+	return 0;
+}
+
+static int ltq_gpio_direction_output(struct gpio_chip *chip,
+	unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+	ltq_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static int ltq_gpio_req(struct gpio_chip *chip, unsigned offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL0, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL1, offset);
+	return 0;
+}
+
+static int ltq_gpio_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	if (pdev->id >= MAX_PORTS) {
+		dev_err(&pdev->dev, "invalid gpio port %d\n",
+			pdev->id);
+		return -EINVAL;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory for gpio port %d\n",
+			pdev->id);
+		return -ENOENT;
+	}
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev,
+			"failed to request memory for gpio port %d\n",
+			pdev->id);
+		return -EBUSY;
+	}
+	ltq_gpio_port[pdev->id].membase = devm_ioremap_nocache(&pdev->dev,
+		res->start, resource_size(res));
+	if (!ltq_gpio_port[pdev->id].membase) {
+		dev_err(&pdev->dev, "failed to remap memory for gpio port %d\n",
+			pdev->id);
+		return -ENOMEM;
+	}
+	ltq_gpio_port[pdev->id].chip.label = "ltq_gpio";
+	ltq_gpio_port[pdev->id].chip.direction_input = ltq_gpio_direction_input;
+	ltq_gpio_port[pdev->id].chip.direction_output =
+		ltq_gpio_direction_output;
+	ltq_gpio_port[pdev->id].chip.get = ltq_gpio_get;
+	ltq_gpio_port[pdev->id].chip.set = ltq_gpio_set;
+	ltq_gpio_port[pdev->id].chip.request = ltq_gpio_req;
+	ltq_gpio_port[pdev->id].chip.base = PINS_PER_PORT * pdev->id;
+	ltq_gpio_port[pdev->id].chip.ngpio = PINS_PER_PORT;
+	platform_set_drvdata(pdev, &ltq_gpio_port[pdev->id]);
+	return gpiochip_add(&ltq_gpio_port[pdev->id].chip);
+}
+
+static struct platform_driver
+ltq_gpio_driver = {
+	.probe = ltq_gpio_probe,
+	.driver = {
+		.name = "ltq_gpio",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_gpio_init(void)
+{
+	int ret = platform_driver_register(&ltq_gpio_driver);
+
+	if (ret)
+		pr_info("ltq_gpio : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_gpio_init);
diff --git a/arch/mips/lantiq/xway/gpio_ebu.c b/arch/mips/lantiq/xway/gpio_ebu.c
new file mode 100644
index 00000000000..a479355abdb
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_ebu.c
@@ -0,0 +1,126 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+/*
+ * By attaching hardware latches to the EBU it is possible to create output
+ * only gpios. This driver configures a special memory address, which when
+ * written to outputs 16 bit to the latches.
+ */
+
+#define LTQ_EBU_BUSCON	0x1e7ff		/* 16 bit access, slowest timing */
+#define LTQ_EBU_WP	0x80000000	/* write protect bit */
+
+/* we keep a shadow value of the last value written to the ebu */
+static int ltq_ebu_gpio_shadow = 0x0;
+static void __iomem *ltq_ebu_gpio_membase;
+
+static void ltq_ebu_apply(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+	ltq_ebu_w32(LTQ_EBU_BUSCON, LTQ_EBU_BUSCON1);
+	*((__u16 *)ltq_ebu_gpio_membase) = ltq_ebu_gpio_shadow;
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+static void ltq_ebu_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_ebu_gpio_shadow |= (1 << offset);
+	else
+		ltq_ebu_gpio_shadow &= ~(1 << offset);
+	ltq_ebu_apply();
+}
+
+static int ltq_ebu_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_ebu_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_ebu_chip = {
+	.label = "ltq_ebu",
+	.direction_output = ltq_ebu_direction_output,
+	.set = ltq_ebu_set,
+	.base = 72,
+	.ngpio = 16,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_ebu_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory resource\n");
+		return -ENOENT;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request memory resource\n");
+		return -EBUSY;
+	}
+
+	ltq_ebu_gpio_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_ebu_gpio_membase) {
+		dev_err(&pdev->dev, "Failed to ioremap mem region\n");
+		return -ENOMEM;
+	}
+
+	/* grab the default shadow value passed form the platform code */
+	ltq_ebu_gpio_shadow = (unsigned int) pdev->dev.platform_data;
+
+	/* tell the ebu controller which memory address we will be using */
+	ltq_ebu_w32(pdev->resource->start | 0x1, LTQ_EBU_ADDRSEL1);
+
+	/* write protect the region */
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+
+	ret = gpiochip_add(&ltq_ebu_chip);
+	if (!ret)
+		ltq_ebu_apply();
+	return ret;
+}
+
+static struct platform_driver ltq_ebu_driver = {
+	.probe = ltq_ebu_probe,
+	.driver = {
+		.name = "ltq_ebu",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init ltq_ebu_init(void)
+{
+	int ret = platform_driver_register(&ltq_ebu_driver);
+
+	if (ret)
+		pr_info("ltq_ebu : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio_stp.c b/arch/mips/lantiq/xway/gpio_stp.c
new file mode 100644
index 00000000000..67d59d69034
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_stp.c
@@ -0,0 +1,157 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2007 John Crispin <blogic@openwrt.org>
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_STP_CON0		0x00
+#define LTQ_STP_CON1		0x04
+#define LTQ_STP_CPU0		0x08
+#define LTQ_STP_CPU1		0x0C
+#define LTQ_STP_AR		0x10
+
+#define LTQ_STP_CON_SWU		(1 << 31)
+#define LTQ_STP_2HZ		0
+#define LTQ_STP_4HZ		(1 << 23)
+#define LTQ_STP_8HZ		(2 << 23)
+#define LTQ_STP_10HZ		(3 << 23)
+#define LTQ_STP_SPEED_MASK	(0xf << 23)
+#define LTQ_STP_UPD_FPI		(1 << 31)
+#define LTQ_STP_UPD_MASK	(3 << 30)
+#define LTQ_STP_ADSL_SRC	(3 << 24)
+
+#define LTQ_STP_GROUP0		(1 << 0)
+
+#define LTQ_STP_RISING		0
+#define LTQ_STP_FALLING		(1 << 26)
+#define LTQ_STP_EDGE_MASK	(1 << 26)
+
+#define ltq_stp_r32(reg)	__raw_readl(ltq_stp_membase + reg)
+#define ltq_stp_w32(val, reg)	__raw_writel(val, ltq_stp_membase + reg)
+#define ltq_stp_w32_mask(clear, set, reg) \
+		ltq_w32((ltq_r32(ltq_stp_membase + reg) & ~(clear)) | (set), \
+		ltq_stp_membase + (reg))
+
+static int ltq_stp_shadow = 0xffff;
+static void __iomem *ltq_stp_membase;
+
+static void ltq_stp_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_stp_shadow |= (1 << offset);
+	else
+		ltq_stp_shadow &= ~(1 << offset);
+	ltq_stp_w32(ltq_stp_shadow, LTQ_STP_CPU0);
+}
+
+static int ltq_stp_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_stp_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_stp_chip = {
+	.label = "ltq_stp",
+	.direction_output = ltq_stp_direction_output,
+	.set = ltq_stp_set,
+	.base = 48,
+	.ngpio = 24,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_stp_hw_init(void)
+{
+	/* the 3 pins used to control the external stp */
+	ltq_gpio_request(4, 1, 0, 1, "stp-st");
+	ltq_gpio_request(5, 1, 0, 1, "stp-d");
+	ltq_gpio_request(6, 1, 0, 1, "stp-sh");
+
+	/* sane defaults */
+	ltq_stp_w32(0, LTQ_STP_AR);
+	ltq_stp_w32(0, LTQ_STP_CPU0);
+	ltq_stp_w32(0, LTQ_STP_CPU1);
+	ltq_stp_w32(LTQ_STP_CON_SWU, LTQ_STP_CON0);
+	ltq_stp_w32(0, LTQ_STP_CON1);
+
+	/* rising or falling edge */
+	ltq_stp_w32_mask(LTQ_STP_EDGE_MASK, LTQ_STP_FALLING, LTQ_STP_CON0);
+
+	/* per default stp 15-0 are set */
+	ltq_stp_w32_mask(0, LTQ_STP_GROUP0, LTQ_STP_CON1);
+
+	/* stp are update periodically by the FPI bus */
+	ltq_stp_w32_mask(LTQ_STP_UPD_MASK, LTQ_STP_UPD_FPI, LTQ_STP_CON1);
+
+	/* set stp update speed */
+	ltq_stp_w32_mask(LTQ_STP_SPEED_MASK, LTQ_STP_8HZ, LTQ_STP_CON1);
+
+	/* tell the hardware that pin (led) 0 and 1 are controlled
+	 *  by the dsl arc
+	 */
+	ltq_stp_w32_mask(0, LTQ_STP_ADSL_SRC, LTQ_STP_CON0);
+
+	ltq_pmu_enable(PMU_LED);
+	return 0;
+}
+
+static int __devinit ltq_stp_probe(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int ret = 0;
+
+	if (!res)
+		return -ENOENT;
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request STP memory\n");
+		return -EBUSY;
+	}
+	ltq_stp_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_stp_membase) {
+		dev_err(&pdev->dev, "failed to remap STP memory\n");
+		return -ENOMEM;
+	}
+	ret = gpiochip_add(&ltq_stp_chip);
+	if (!ret)
+		ret = ltq_stp_hw_init();
+
+	return ret;
+}
+
+static struct platform_driver ltq_stp_driver = {
+	.probe = ltq_stp_probe,
+	.driver = {
+		.name = "ltq_stp",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_stp_init(void)
+{
+	int ret = platform_driver_register(&ltq_stp_driver);
+
+	if (ret)
+		pr_info("ltq_stp: error registering platfom driver");
+	return ret;
+}
+
+postcore_initcall(ltq_stp_init);
diff --git a/arch/mips/lantiq/xway/mach-easy50601.c b/arch/mips/lantiq/xway/mach-easy50601.c
new file mode 100644
index 00000000000..d5aaf637ab1
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50601.c
@@ -0,0 +1,57 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+
+#include <lantiq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50601_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xE0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50601_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50601_partitions),
+	.parts		= easy50601_partitions,
+};
+
+static void __init easy50601_init(void)
+{
+	ltq_register_nor(&easy50601_flash_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50601,
+			"EASY50601",
+			"EASY50601 Eval Board",
+			easy50601_init);
diff --git a/arch/mips/lantiq/xway/mach-easy50712.c b/arch/mips/lantiq/xway/mach-easy50712.c
new file mode 100644
index 00000000000..ea5027b3239
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50712.c
@@ -0,0 +1,74 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+#include <linux/phy.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50712_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xe0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50712_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50712_partitions),
+	.parts		= easy50712_partitions,
+};
+
+static struct ltq_pci_data ltq_pci_data = {
+	.clock	= PCI_CLOCK_INT,
+	.gpio	= PCI_GNT1 | PCI_REQ1,
+	.irq	= {
+		[14] = INT_NUM_IM0_IRL0 + 22,
+	},
+};
+
+static struct ltq_eth_data ltq_eth_data = {
+	.mii_mode = PHY_INTERFACE_MODE_MII,
+};
+
+static void __init easy50712_init(void)
+{
+	ltq_register_gpio_stp();
+	ltq_register_nor(&easy50712_flash_data);
+	ltq_register_pci(&ltq_pci_data);
+	ltq_register_etop(&ltq_eth_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50712,
+	     "EASY50712",
+	     "EASY50712 Eval Board",
+	      easy50712_init);
diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c
new file mode 100644
index 00000000000..9d69f01e352
--- /dev/null
+++ b/arch/mips/lantiq/xway/pmu.c
@@ -0,0 +1,70 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* PMU - the power management unit allows us to turn part of the core
+ * on and off
+ */
+
+/* the enable / disable registers */
+#define LTQ_PMU_PWDCR	0x1C
+#define LTQ_PMU_PWDSR	0x20
+
+#define ltq_pmu_w32(x, y)	ltq_w32((x), ltq_pmu_membase + (y))
+#define ltq_pmu_r32(x)		ltq_r32(ltq_pmu_membase + (x))
+
+static struct resource ltq_pmu_resource = {
+	.name	= "pmu",
+	.start	= LTQ_PMU_BASE_ADDR,
+	.end	= LTQ_PMU_BASE_ADDR + LTQ_PMU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_pmu_membase;
+
+void ltq_pmu_enable(unsigned int module)
+{
+	int err = 1000000;
+
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) & ~module, LTQ_PMU_PWDCR);
+	do {} while (--err && (ltq_pmu_r32(LTQ_PMU_PWDSR) & module));
+
+	if (!err)
+		panic("activating PMU module failed!\n");
+}
+EXPORT_SYMBOL(ltq_pmu_enable);
+
+void ltq_pmu_disable(unsigned int module)
+{
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) | module, LTQ_PMU_PWDCR);
+}
+EXPORT_SYMBOL(ltq_pmu_disable);
+
+int __init ltq_pmu_init(void)
+{
+	if (insert_resource(&iomem_resource, &ltq_pmu_resource) < 0)
+		panic("Failed to insert pmu memory\n");
+
+	if (request_mem_region(ltq_pmu_resource.start,
+			resource_size(&ltq_pmu_resource), "pmu") < 0)
+		panic("Failed to request pmu memory\n");
+
+	ltq_pmu_membase = ioremap_nocache(ltq_pmu_resource.start,
+				resource_size(&ltq_pmu_resource));
+	if (!ltq_pmu_membase)
+		panic("Failed to remap pmu memory\n");
+	return 0;
+}
+
+core_initcall(ltq_pmu_init);
diff --git a/arch/mips/lantiq/xway/prom-ase.c b/arch/mips/lantiq/xway/prom-ase.c
new file mode 100644
index 00000000000..abe49f4db57
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-ase.c
@@ -0,0 +1,39 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_AMAZON_SE	"Amazon_SE"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_AMAZON_SE:
+		i->name = SOC_AMAZON_SE;
+		i->type = SOC_TYPE_AMAZON_SE;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/prom-xway.c b/arch/mips/lantiq/xway/prom-xway.c
new file mode 100644
index 00000000000..1686692ac24
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-xway.c
@@ -0,0 +1,54 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_DANUBE	"Danube"
+#define SOC_TWINPASS	"Twinpass"
+#define SOC_AR9		"AR9"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_DANUBE1:
+	case SOC_ID_DANUBE2:
+		i->name = SOC_DANUBE;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_TWINPASS:
+		i->name = SOC_TWINPASS;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_ARX188:
+	case SOC_ID_ARX168:
+	case SOC_ID_ARX182:
+		i->name = SOC_AR9;
+		i->type = SOC_TYPE_AR9;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/reset.c b/arch/mips/lantiq/xway/reset.c
new file mode 100644
index 00000000000..a1be36d0e49
--- /dev/null
+++ b/arch/mips/lantiq/xway/reset.c
@@ -0,0 +1,91 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/pm.h>
+#include <linux/module.h>
+#include <asm/reboot.h>
+
+#include <lantiq_soc.h>
+
+#define ltq_rcu_w32(x, y)	ltq_w32((x), ltq_rcu_membase + (y))
+#define ltq_rcu_r32(x)		ltq_r32(ltq_rcu_membase + (x))
+
+/* register definitions */
+#define LTQ_RCU_RST		0x0010
+#define LTQ_RCU_RST_ALL		0x40000000
+
+#define LTQ_RCU_RST_STAT	0x0014
+#define LTQ_RCU_STAT_SHIFT	26
+
+static struct resource ltq_rcu_resource = {
+	.name   = "rcu",
+	.start  = LTQ_RCU_BASE_ADDR,
+	.end    = LTQ_RCU_BASE_ADDR + LTQ_RCU_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+/* remapped base addr of the reset control unit */
+static void __iomem *ltq_rcu_membase;
+
+/* This function is used by the watchdog driver */
+int ltq_reset_cause(void)
+{
+	u32 val = ltq_rcu_r32(LTQ_RCU_RST_STAT);
+	return val >> LTQ_RCU_STAT_SHIFT;
+}
+EXPORT_SYMBOL_GPL(ltq_reset_cause);
+
+static void ltq_machine_restart(char *command)
+{
+	pr_notice("System restart\n");
+	local_irq_disable();
+	ltq_rcu_w32(ltq_rcu_r32(LTQ_RCU_RST) | LTQ_RCU_RST_ALL, LTQ_RCU_RST);
+	unreachable();
+}
+
+static void ltq_machine_halt(void)
+{
+	pr_notice("System halted.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static void ltq_machine_power_off(void)
+{
+	pr_notice("Please turn off the power now.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static int __init mips_reboot_setup(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_rcu_resource) < 0)
+		panic("Failed to insert rcu memory\n");
+
+	if (request_mem_region(ltq_rcu_resource.start,
+			resource_size(&ltq_rcu_resource), "rcu") < 0)
+		panic("Failed to request rcu memory\n");
+
+	/* remap rcu register range */
+	ltq_rcu_membase = ioremap_nocache(ltq_rcu_resource.start,
+				resource_size(&ltq_rcu_resource));
+	if (!ltq_rcu_membase)
+		panic("Failed to remap rcu memory\n");
+
+	_machine_restart = ltq_machine_restart;
+	_machine_halt = ltq_machine_halt;
+	pm_power_off = ltq_machine_power_off;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/lantiq/xway/setup-ase.c b/arch/mips/lantiq/xway/setup-ase.c
new file mode 100644
index 00000000000..f6f326798a3
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-ase.c
@@ -0,0 +1,19 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_ase_asc();
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
diff --git a/arch/mips/lantiq/xway/setup-xway.c b/arch/mips/lantiq/xway/setup-xway.c
new file mode 100644
index 00000000000..c292f643a85
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-xway.c
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_asc(0);
+	ltq_register_asc(1);
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 2adead5a8a3..b2cad4fd5fc 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o
 obj-$(CONFIG_CPU_TX49XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_VR41XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= dump_tlb.o
+obj-$(CONFIG_CPU_XLR)		+= dump_tlb.o
 
 # libgcc-style stuff needed in the kernel
 obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o
diff --git a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
index 8c807c96519..0cb1b9760e3 100644
--- a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
@@ -201,8 +201,6 @@ static struct clocksource clocksource_mfgpt = {
 	.rating = 120, /* Functional for real use, but not desired */
 	.read = mfgpt_read,
 	.mask = CLOCKSOURCE_MASK(32),
-	.mult = 0,
-	.shift = 22,
 };
 
 int __init init_mfgpt_clocksource(void)
@@ -210,8 +208,7 @@ int __init init_mfgpt_clocksource(void)
 	if (num_possible_cpus() > 1)	/* MFGPT does not scale! */
 		return 0;
 
-	clocksource_mfgpt.mult = clocksource_hz2mult(MFGPT_TICK_RATE, 22);
-	return clocksource_register(&clocksource_mfgpt);
+	return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE);
 }
 
 arch_initcall(init_mfgpt_clocksource);
diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c
index 11b193f848f..d93830ad611 100644
--- a/arch/mips/loongson/common/env.c
+++ b/arch/mips/loongson/common/env.c
@@ -29,9 +29,10 @@ unsigned long memsize, highmemsize;
 
 #define parse_even_earlier(res, option, p)				\
 do {									\
-	int ret;							\
+	unsigned int tmp __maybe_unused;				\
+									\
 	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \
+		tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \
 } while (0)
 
 void __init prom_init_env(void)
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index d679c772d08..4d8c1623eee 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -3,7 +3,8 @@
 #
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
-				   init.o tlbex.o tlbex-fault.o uasm.o page.o
+				   init.o mmap.o tlbex.o tlbex-fault.o uasm.o \
+				   page.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
@@ -29,6 +30,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o tlb-r3k.o
 obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= c-octeon.o cex-oct.o tlb-r4k.o
+obj-$(CONFIG_CPU_XLR)		+= c-r4k.o tlb-r4k.o cex-gen.o
 
 obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
 obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index fd5cb57905d..eeb642e4066 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1006,6 +1006,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_25KF:
 	case CPU_SB1:
 	case CPU_SB1A:
+	case CPU_XLR:
 		c->dcache.flags |= MIPS_CACHE_PINDEX;
 		break;
 
@@ -1075,7 +1076,6 @@ static int __cpuinit probe_scache(void)
 	unsigned long flags, addr, begin, end, pow2;
 	unsigned int config = read_c0_config();
 	struct cpuinfo_mips *c = &current_cpu_data;
-	int tmp;
 
 	if (config & CONF_SC)
 		return 0;
@@ -1108,7 +1108,6 @@ static int __cpuinit probe_scache(void)
 
 	/* Now search for the wrap around point. */
 	pow2 = (128 * 1024);
-	tmp = 0;
 	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {
 		cache_op(Index_Load_Tag_SD, addr);
 		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
new file mode 100644
index 00000000000..ae3c20a9556
--- /dev/null
+++ b/arch/mips/mm/mmap.c
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+
+EXPORT_SYMBOL(shm_align_mask);
+
+#define COLOUR_ALIGN(addr,pgoff)				\
+	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
+	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_area_struct * vmm;
+	int do_color_align;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		/* Even MAP_FIXED mappings must reside within TASK_SIZE.  */
+		if (TASK_SIZE - len < addr)
+			return -EINVAL;
+
+		/*
+		 * We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+	if (addr) {
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+		vmm = find_vma(current->mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vmm || addr + len <= vmm->vm_start))
+			return addr;
+	}
+	addr = current->mm->mmap_base;
+	if (do_color_align)
+		addr = COLOUR_ALIGN(addr, pgoff);
+	else
+		addr = PAGE_ALIGN(addr);
+
+	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vmm || addr + len <= vmm->vm_start)
+			return addr;
+		addr = vmm->vm_end;
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+	}
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE) {
+		random_factor = get_random_int();
+		random_factor = random_factor << PAGE_SHIFT;
+		if (TASK_IS_32BIT_ADDR)
+			random_factor &= 0xfffffful;
+		else
+			random_factor &= 0xffffffful;
+	}
+
+	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+	mm->unmap_area = arch_unmap_area;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	unsigned long rnd = get_random_int();
+
+	rnd = rnd << PAGE_SHIFT;
+	/* 8MB for 32bit, 256MB for 64bit */
+	if (TASK_IS_32BIT_ADDR)
+		rnd = rnd & 0x7ffffful;
+	else
+		rnd = rnd & 0xffffffful;
+
+	return rnd;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long base = mm->brk;
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(base + brk_rnd());
+
+	if (ret < mm->brk)
+		return mm->brk;
+
+	return ret;
+}
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 5ef294fbb6e..424ed4b92e6 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -404,6 +404,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_5KC:
 	case CPU_TX49XX:
 	case CPU_PR4450:
+	case CPU_XLR:
 		uasm_i_nop(p);
 		tlbw(p);
 		break;
@@ -1151,8 +1152,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	struct uasm_reloc *r = relocs;
 	u32 *f;
 	unsigned int final_len;
-	struct mips_huge_tlb_info htlb_info;
-	enum vmalloc64_mode vmalloc_mode;
+	struct mips_huge_tlb_info htlb_info __maybe_unused;
+	enum vmalloc64_mode vmalloc_mode __maybe_unused;
 
 	memset(tlb_handler, 0, sizeof(tlb_handler));
 	memset(labels, 0, sizeof(labels));
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 414f0c99b19..31180c321a1 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops;
 
 void __init prom_init(void)
 {
-	int result;
-
 	prom_argc = fw_arg0;
 	_prom_argv = (int *) fw_arg1;
 	_prom_envp = (int *) fw_arg2;
@@ -360,20 +358,14 @@ void __init prom_init(void)
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 	console_config();
 #endif
-	/* Early detection of CMP support */
-	result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
-
 #ifdef CONFIG_MIPS_CMP
-	if (result)
+	/* Early detection of CMP support */
+	if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ))
 		register_smp_ops(&cmp_smp_ops);
+	else
 #endif
 #ifdef CONFIG_MIPS_MT_SMP
-#ifdef CONFIG_MIPS_CMP
-	if (!result)
 		register_smp_ops(&vsmp_smp_ops);
-#else
-	register_smp_ops(&vsmp_smp_ops);
-#endif
 #endif
 #ifdef CONFIG_MIPS_MT_SMTC
 	register_smp_ops(&msmtc_smp_ops);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ea..1d36c511a7a 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock);
 static inline int mips_pcibios_iack(void)
 {
 	int irq;
-	u32 dummy;
 
 	/*
 	 * Determine highest priority pending interrupt by performing
@@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void)
 		BONITO_PCIMAP_CFG = 0x20000;
 
 		/* Flush Bonito register block */
-		dummy = BONITO_PCIMAP_CFG;
+		(void) BONITO_PCIMAP_CFG;
 		iob();    /* sync */
 
 		irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg);
@@ -309,6 +308,8 @@ static void ipi_call_dispatch(void)
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+	scheduler_ipi();
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
new file mode 100644
index 00000000000..a5ca743613f
--- /dev/null
+++ b/arch/mips/netlogic/Kconfig
@@ -0,0 +1,5 @@
+config NLM_COMMON
+	bool
+
+config NLM_XLR
+	bool
diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile
new file mode 100644
index 00000000000..9bd3f731f62
--- /dev/null
+++ b/arch/mips/netlogic/xlr/Makefile
@@ -0,0 +1,5 @@
+obj-y				+= setup.o platform.o irq.o setup.o time.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_EARLY_PRINTK)	+= xlr_console.o
+
+EXTRA_CFLAGS			+= -Werror
diff --git a/arch/mips/netlogic/xlr/irq.c b/arch/mips/netlogic/xlr/irq.c
new file mode 100644
index 00000000000..1446d58e364
--- /dev/null
+++ b/arch/mips/netlogic/xlr/irq.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <asm/mipsregs.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+static u64 nlm_irq_mask;
+static DEFINE_SPINLOCK(nlm_pic_lock);
+
+static void xlr_pic_enable(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (1 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+static void xlr_pic_mask(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (0 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+#ifdef CONFIG_PCI
+/* Extra ACK needed for XLR on chip PCI controller */
+static void xlr_pci_ack(struct irq_data *d)
+{
+	nlm_reg_t *pci_mmio = netlogic_io_mmio(NETLOGIC_IO_PCIX_OFFSET);
+
+	netlogic_read_reg(pci_mmio, (0x140 >> 2));
+}
+
+/* Extra ACK needed for XLS on chip PCIe controller */
+static void xls_pcie_ack(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+
+/* For XLS B silicon, the 3,4 PCI interrupts are different */
+static void xls_pcie_ack_b(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+#endif
+
+static void xlr_pic_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	nlm_reg_t *mmio;
+	int irq = d->irq;
+	void *hd = irq_data_get_irq_handler_data(d);
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	if (hd) {
+		void (*extra_ack)(void *) = hd;
+		extra_ack(d);
+	}
+	mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	netlogic_write_reg(mmio, PIC_INT_ACK, (1 << (irq - PIC_IRQ_BASE)));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+/*
+ * This chip definition handles interrupts routed thru the XLR
+ * hardware PIC, currently IRQs 8-39 are mapped to hardware intr
+ * 0-31 wired the XLR PIC
+ */
+static struct irq_chip xlr_pic = {
+	.name		= "XLR-PIC",
+	.irq_enable	= xlr_pic_enable,
+	.irq_mask	= xlr_pic_mask,
+	.irq_ack	= xlr_pic_ack,
+};
+
+static void rsvd_irq_handler(struct irq_data *d)
+{
+	WARN(d->irq >= PIC_IRQ_BASE, "Bad irq %d", d->irq);
+}
+
+/*
+ * Chip definition for CPU originated interrupts(timer, msg) and
+ * IPIs
+ */
+struct irq_chip nlm_cpu_intr = {
+	.name		= "XLR-CPU-INTR",
+	.irq_enable	= rsvd_irq_handler,
+	.irq_mask	= rsvd_irq_handler,
+	.irq_ack	= rsvd_irq_handler,
+};
+
+void __init init_xlr_irqs(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	uint32_t thread_mask = 1;
+	int level, i;
+
+	pr_info("Interrupt thread mask [%x]\n", thread_mask);
+	for (i = 0; i < PIC_NUM_IRTS; i++) {
+		level = PIC_IRQ_IS_EDGE_TRIGGERED(i);
+
+		/* Bind all PIC irqs to boot cpu */
+		netlogic_write_reg(mmio, PIC_IRT_0_BASE + i, thread_mask);
+
+		/*
+		 * Use local scheduling and high polarity for all IRTs
+		 * Invalidate all IRTs, by default
+		 */
+		netlogic_write_reg(mmio, PIC_IRT_1_BASE + i,
+				(level << 30) | (1 << 6) | (PIC_IRQ_BASE + i));
+	}
+
+	/* Make all IRQs as level triggered by default */
+	for (i = 0; i < NR_IRQS; i++) {
+		if (PIC_IRQ_IS_IRT(i))
+			irq_set_chip_and_handler(i, &xlr_pic, handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &nlm_cpu_intr,
+						handle_level_irq);
+	}
+#ifdef CONFIG_SMP
+	irq_set_chip_and_handler(IRQ_IPI_SMP_FUNCTION, &nlm_cpu_intr,
+			 nlm_smp_function_ipi_handler);
+	irq_set_chip_and_handler(IRQ_IPI_SMP_RESCHEDULE, &nlm_cpu_intr,
+			 nlm_smp_resched_ipi_handler);
+	nlm_irq_mask |=
+	    ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE));
+#endif
+
+#ifdef CONFIG_PCI
+	/*
+	 * For PCI interrupts, we need to ack the PIC controller too, overload
+	 * irq handler data to do this
+	 */
+	if (nlm_chip_is_xls()) {
+		if (nlm_chip_is_xls_b()) {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK2_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ,
+							xls_pcie_ack_b);
+		} else {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK2_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK3_IRQ, xls_pcie_ack);
+		}
+	} else {
+		/* XLR PCI controller ACK */
+		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack);
+	}
+#endif
+	/* unmask all PIC related interrupts. If no handler is installed by the
+	 * drivers, it'll just ack the interrupt and return
+	 */
+	for (i = PIC_IRT_FIRST_IRQ; i <= PIC_IRT_LAST_IRQ; i++)
+		nlm_irq_mask |= (1ULL << i);
+
+	nlm_irq_mask |= (1ULL << IRQ_TIMER);
+}
+
+void __init arch_init_irq(void)
+{
+	/* Initialize the irq descriptors */
+	init_xlr_irqs();
+	write_c0_eimr(nlm_irq_mask);
+}
+
+void __cpuinit nlm_smp_irq_init(void)
+{
+	/* set interrupt mask for non-zero cpus */
+	write_c0_eimr(nlm_irq_mask);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	uint64_t eirr;
+	int i;
+
+	eirr = read_c0_eirr() & read_c0_eimr();
+	if (!eirr)
+		return;
+
+	/* no need of EIRR here, writing compare clears interrupt */
+	if (eirr & (1 << IRQ_TIMER)) {
+		do_IRQ(IRQ_TIMER);
+		return;
+	}
+
+	/* use dcltz: optimize below code */
+	for (i = 63; i != -1; i--) {
+		if (eirr & (1ULL << i))
+			break;
+	}
+	if (i == -1) {
+		pr_err("no interrupt !!\n");
+		return;
+	}
+
+	/* Ack eirr */
+	write_c0_eirr(1ULL << i);
+
+	do_IRQ(i);
+}
diff --git a/arch/mips/netlogic/xlr/platform.c b/arch/mips/netlogic/xlr/platform.c
new file mode 100644
index 00000000000..609ec253464
--- /dev/null
+++ b/arch/mips/netlogic/xlr/platform.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2011, Netlogic Microsystems.
+ * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset)
+{
+	nlm_reg_t *mmio;
+	unsigned int value;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+	value = netlogic_read_reg(mmio, 0);
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	return value;
+}
+
+void nlm_xlr_uart_out(struct uart_port *p, int offset, int value)
+{
+	nlm_reg_t *mmio;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	netlogic_write_reg(mmio, 0, value);
+}
+
+#define PORT(_irq)					\
+	{						\
+		.irq		= _irq,			\
+		.regshift	= 2,			\
+		.iotype		= UPIO_MEM32,		\
+		.flags		= (UPF_SKIP_TEST |	\
+			 UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF),\
+		.uartclk	= PIC_CLKS_PER_SEC,	\
+		.type		= PORT_16550A,		\
+		.serial_in	= nlm_xlr_uart_in,	\
+		.serial_out	= nlm_xlr_uart_out,	\
+	}
+
+static struct plat_serial8250_port xlr_uart_data[] = {
+	PORT(PIC_UART_0_IRQ),
+	PORT(PIC_UART_1_IRQ),
+	{},
+};
+
+static struct platform_device uart_device = {
+	.name		= "serial8250",
+	.id		= PLAT8250_DEV_PLATFORM,
+	.dev = {
+		.platform_data = xlr_uart_data,
+	},
+};
+
+static int __init nlm_uart_init(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	xlr_uart_data[0].membase = (void __iomem *)mmio;
+	xlr_uart_data[0].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_1_OFFSET);
+	xlr_uart_data[1].membase = (void __iomem *)mmio;
+	xlr_uart_data[1].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	return platform_device_register(&uart_device);
+}
+
+arch_initcall(nlm_uart_init);
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
new file mode 100644
index 00000000000..482802569e7
--- /dev/null
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/serial_8250.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/time.h>
+#include <asm/bootinfo.h>
+#include <asm/smp-ops.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+#include <asm/netlogic/xlr/xlr.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/gpio.h>
+
+unsigned long netlogic_io_base = (unsigned long)(DEFAULT_NETLOGIC_IO_BASE);
+unsigned long nlm_common_ebase = 0x0;
+struct psb_info nlm_prom_info;
+
+static void nlm_early_serial_setup(void)
+{
+	struct uart_port s;
+	nlm_reg_t *uart_base;
+
+	uart_base = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	memset(&s, 0, sizeof(s));
+	s.flags		= ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
+	s.iotype	= UPIO_MEM32;
+	s.regshift	= 2;
+	s.irq		= PIC_UART_0_IRQ;
+	s.uartclk	= PIC_CLKS_PER_SEC;
+	s.serial_in	= nlm_xlr_uart_in;
+	s.serial_out	= nlm_xlr_uart_out;
+	s.mapbase	= (unsigned long)uart_base;
+	s.membase	= (unsigned char __iomem *)uart_base;
+	early_serial_setup(&s);
+}
+
+static void nlm_linux_exit(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_GPIO_OFFSET);
+	/* trigger a chip reset by writing 1 to GPIO_SWRESET_REG */
+	netlogic_write_reg(mmio, NETLOGIC_GPIO_SWRESET_REG, 1);
+	for ( ; ; )
+		cpu_wait();
+}
+
+void __init plat_mem_setup(void)
+{
+	panic_timeout	= 5;
+	_machine_restart = (void (*)(char *))nlm_linux_exit;
+	_machine_halt	= nlm_linux_exit;
+	pm_power_off	= nlm_linux_exit;
+}
+
+const char *get_system_type(void)
+{
+	return "Netlogic XLR/XLS Series";
+}
+
+void __init prom_free_prom_memory(void)
+{
+	/* Nothing yet */
+}
+
+static void build_arcs_cmdline(int *argv)
+{
+	int i, remain, len;
+	char *arg;
+
+	remain = sizeof(arcs_cmdline) - 1;
+	arcs_cmdline[0] = '\0';
+	for (i = 0; argv[i] != 0; i++) {
+		arg = (char *)(long)argv[i];
+		len = strlen(arg);
+		if (len + 1 > remain)
+			break;
+		strcat(arcs_cmdline, arg);
+		strcat(arcs_cmdline, " ");
+		remain -=  len + 1;
+	}
+
+	/* Add the default options here */
+	if ((strstr(arcs_cmdline, "console=")) == NULL) {
+		arg = "console=ttyS0,38400 ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#ifdef CONFIG_BLK_DEV_INITRD
+	if ((strstr(arcs_cmdline, "rdinit=")) == NULL) {
+		arg = "rdinit=/sbin/init ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#endif
+	return;
+fail:
+	panic("Cannot add %s, command line too big!", arg);
+}
+
+static void prom_add_memory(void)
+{
+	struct nlm_boot_mem_map *bootm;
+	u64 start, size;
+	u64 pref_backup = 512;  /* avoid pref walking beyond end */
+	int i;
+
+	bootm = (void *)(long)nlm_prom_info.psb_mem_map;
+	for (i = 0; i < bootm->nr_map; i++) {
+		if (bootm->map[i].type != BOOT_MEM_RAM)
+			continue;
+		start = bootm->map[i].addr;
+		size   = bootm->map[i].size;
+
+		/* Work around for using bootloader mem */
+		if (i == 0 && start == 0 && size == 0x0c000000)
+			size = 0x0ff00000;
+
+		add_memory_region(start, size - pref_backup, BOOT_MEM_RAM);
+	}
+}
+
+void __init prom_init(void)
+{
+	int *argv, *envp;		/* passed as 32 bit ptrs */
+	struct psb_info *prom_infop;
+
+	/* truncate to 32 bit and sign extend all args */
+	argv = (int *)(long)(int)fw_arg1;
+	envp = (int *)(long)(int)fw_arg2;
+	prom_infop = (struct psb_info *)(long)(int)fw_arg3;
+
+	nlm_prom_info = *prom_infop;
+
+	nlm_early_serial_setup();
+	build_arcs_cmdline(argv);
+	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	prom_add_memory();
+
+#ifdef CONFIG_SMP
+	nlm_wakeup_secondary_cpus(nlm_prom_info.online_cpu_map);
+	register_smp_ops(&nlm_smp_ops);
+#endif
+}
diff --git a/arch/mips/netlogic/xlr/smp.c b/arch/mips/netlogic/xlr/smp.c
new file mode 100644
index 00000000000..b495a7f1433
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smp.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+
+#include <asm/mmu_context.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+void core_send_ipi(int logical_cpu, unsigned int action)
+{
+	int cpu = cpu_logical_map(logical_cpu);
+	u32 tid = cpu & 0x3;
+	u32 pid = (cpu >> 2) & 0x07;
+	u32 ipi = (tid << 16) | (pid << 20);
+
+	if (action & SMP_CALL_FUNCTION)
+		ipi |= IRQ_IPI_SMP_FUNCTION;
+	else if (action & SMP_RESCHEDULE_YOURSELF)
+		ipi |= IRQ_IPI_SMP_RESCHEDULE;
+	else
+		return;
+
+	pic_send_ipi(ipi);
+}
+
+void nlm_send_ipi_single(int cpu, unsigned int action)
+{
+	core_send_ipi(cpu, action);
+}
+
+void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask) {
+		core_send_ipi(cpu, action);
+	}
+}
+
+/* IRQ_IPI_SMP_FUNCTION Handler */
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	smp_call_function_interrupt();
+}
+
+/* IRQ_IPI_SMP_RESCHEDULE  handler */
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	set_need_resched();
+}
+
+void nlm_common_ipi_handler(int irq, struct pt_regs *regs)
+{
+	if (irq == IRQ_IPI_SMP_FUNCTION) {
+		smp_call_function_interrupt();
+	} else {
+		/* Announce that we are for reschduling */
+		set_need_resched();
+	}
+}
+
+/*
+ * Called before going into mips code, early cpu init
+ */
+void nlm_early_init_secondary(void)
+{
+	write_c0_ebase((uint32_t)nlm_common_ebase);
+	/* TLB partition here later */
+}
+
+/*
+ * Code to run on secondary just after probing the CPU
+ */
+static void __cpuinit nlm_init_secondary(void)
+{
+	nlm_smp_irq_init();
+}
+
+void nlm_smp_finish(void)
+{
+#ifdef notyet
+	nlm_common_msgring_cpu_init();
+#endif
+}
+
+void nlm_cpus_done(void)
+{
+}
+
+/*
+ * Boot all other cpus in the system, initialize them, and bring them into
+ * the boot function
+ */
+int nlm_cpu_unblock[NR_CPUS];
+int nlm_cpu_ready[NR_CPUS];
+unsigned long nlm_next_gp;
+unsigned long nlm_next_sp;
+cpumask_t phys_cpu_present_map;
+
+void nlm_boot_secondary(int logical_cpu, struct task_struct *idle)
+{
+	unsigned long gp = (unsigned long)task_thread_info(idle);
+	unsigned long sp = (unsigned long)__KSTK_TOS(idle);
+	int cpu = cpu_logical_map(logical_cpu);
+
+	nlm_next_sp = sp;
+	nlm_next_gp = gp;
+
+	/* barrier */
+	__sync();
+	nlm_cpu_unblock[cpu] = 1;
+}
+
+void __init nlm_smp_setup(void)
+{
+	unsigned int boot_cpu;
+	int num_cpus, i;
+
+	boot_cpu = hard_smp_processor_id();
+	cpus_clear(phys_cpu_present_map);
+
+	cpu_set(boot_cpu, phys_cpu_present_map);
+	__cpu_number_map[boot_cpu] = 0;
+	__cpu_logical_map[0] = boot_cpu;
+	cpu_set(0, cpu_possible_map);
+
+	num_cpus = 1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (nlm_cpu_ready[i]) {
+			cpu_set(i, phys_cpu_present_map);
+			__cpu_number_map[i] = num_cpus;
+			__cpu_logical_map[num_cpus] = i;
+			cpu_set(num_cpus, cpu_possible_map);
+			++num_cpus;
+		}
+	}
+
+	pr_info("Phys CPU present map: %lx, possible map %lx\n",
+		(unsigned long)phys_cpu_present_map.bits[0],
+		(unsigned long)cpu_possible_map.bits[0]);
+
+	pr_info("Detected %i Slave CPU(s)\n", num_cpus);
+}
+
+void nlm_prepare_cpus(unsigned int max_cpus)
+{
+}
+
+struct plat_smp_ops nlm_smp_ops = {
+	.send_ipi_single	= nlm_send_ipi_single,
+	.send_ipi_mask		= nlm_send_ipi_mask,
+	.init_secondary		= nlm_init_secondary,
+	.smp_finish		= nlm_smp_finish,
+	.cpus_done		= nlm_cpus_done,
+	.boot_secondary		= nlm_boot_secondary,
+	.smp_setup		= nlm_smp_setup,
+	.prepare_cpus		= nlm_prepare_cpus,
+};
+
+unsigned long secondary_entry_point;
+
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask)
+{
+	unsigned int tid, pid, ipi, i, boot_cpu;
+	void *reset_vec;
+
+	secondary_entry_point = (unsigned long)prom_pre_boot_secondary_cpus;
+	reset_vec = (void *)CKSEG1ADDR(0x1fc00000);
+	memcpy(reset_vec, nlm_boot_smp_nmi, 0x80);
+	boot_cpu = hard_smp_processor_id();
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (i == boot_cpu)
+			continue;
+		if (wakeup_mask & (1u << i)) {
+			tid = i & 0x3;
+			pid = (i >> 2) & 0x7;
+			ipi = (tid << 16) | (pid << 20) | (1 << 8);
+			pic_send_ipi(ipi);
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/mips/netlogic/xlr/smpboot.S b/arch/mips/netlogic/xlr/smpboot.S
new file mode 100644
index 00000000000..b8e074402c9
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smpboot.S
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+
+
+/* Don't jump to linux function from Bootloader stack. Change it
+ * here. Kernel might allocate bootloader memory before all the CPUs are
+ * brought up (eg: Inode cache region) and we better don't overwrite this
+ * memory
+ */
+NESTED(prom_pre_boot_secondary_cpus, 16, sp)
+	.set	mips64
+	mfc0	t0, $15, 1	# read ebase
+	andi	t0, 0x1f	# t0 has the processor_id()
+	sll	t0, 2		# offset in cpu array
+
+	PTR_LA	t1, nlm_cpu_ready # mark CPU ready
+	PTR_ADDU t1, t0
+	li	t2, 1
+	sw	t2, 0(t1)
+
+	PTR_LA	t1, nlm_cpu_unblock
+	PTR_ADDU t1, t0
+1:	lw	t2, 0(t1)	# wait till unblocked
+	beqz	t2, 1b
+	nop
+
+	PTR_LA	t1, nlm_next_sp
+	PTR_L	sp, 0(t1)
+	PTR_LA	t1, nlm_next_gp
+	PTR_L	gp, 0(t1)
+
+	PTR_LA	t0, nlm_early_init_secondary
+	jalr	t0
+	nop
+
+	PTR_LA	t0, smp_bootstrap
+	jr	t0
+	nop
+END(prom_pre_boot_secondary_cpus)
+
+NESTED(nlm_boot_smp_nmi, 0, sp)
+	.set push
+	.set noat
+	.set mips64
+	.set noreorder
+
+	/* Clear the  NMI and BEV bits */
+	MFC0	k0, CP0_STATUS
+	li 	k1, 0xffb7ffff
+	and	k0, k0, k1
+	MTC0	k0, CP0_STATUS
+
+	PTR_LA  k1, secondary_entry_point
+	PTR_L	k0, 0(k1)
+	jr	k0
+	nop
+	.set pop
+END(nlm_boot_smp_nmi)
diff --git a/arch/mips/netlogic/xlr/time.c b/arch/mips/netlogic/xlr/time.c
new file mode 100644
index 00000000000..0d81b262593
--- /dev/null
+++ b/arch/mips/netlogic/xlr/time.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+
+#include <asm/time.h>
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return IRQ_TIMER;
+}
+
+void __init plat_time_init(void)
+{
+	mips_hpt_frequency = nlm_prom_info.cpu_frequency;
+	pr_info("MIPS counter frequency [%ld]\n",
+		(unsigned long)mips_hpt_frequency);
+}
diff --git a/arch/mips/netlogic/xlr/xlr_console.c b/arch/mips/netlogic/xlr/xlr_console.c
new file mode 100644
index 00000000000..759df069220
--- /dev/null
+++ b/arch/mips/netlogic/xlr/xlr_console.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <asm/netlogic/xlr/iomap.h>
+
+void prom_putchar(char c)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	while (netlogic_read_reg(mmio, 0x5) == 0)
+		;
+	netlogic_write_reg(mmio, 0x0, c);
+}
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index c9209ca6c8e..4df87993744 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_SB1250)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM112X)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM1x80)	+= pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)		+= fixup-sni.o ops-sni.o
+obj-$(CONFIG_SOC_XWAY)		+= pci-lantiq.o ops-lantiq.o
 obj-$(CONFIG_TANBAC_TB0219)	+= fixup-tb0219.o
 obj-$(CONFIG_TANBAC_TB0226)	+= fixup-tb0226.o
 obj-$(CONFIG_TANBAC_TB0287)	+= fixup-tb0287.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_ZAO_CAPCELLA)	+= fixup-capcella.o
 obj-$(CONFIG_WR_PPMC)		+= fixup-wrppmc.o
 obj-$(CONFIG_MIKROTIK_RB532)	+= pci-rc32434.o ops-rc32434.o fixup-rc32434.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= pci-octeon.o pcie-octeon.o
+obj-$(CONFIG_NLM_XLR)		+= pci-xlr.o
 
 ifdef CONFIG_PCI_MSI
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= msi-octeon.o
diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c
new file mode 100644
index 00000000000..1f2afb55cc7
--- /dev/null
+++ b/arch/mips/pci/ops-lantiq.c
@@ -0,0 +1,116 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/addrspace.h>
+#include <linux/vmalloc.h>
+
+#include <lantiq_soc.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BUSNUM_SHF 16
+#define LTQ_PCI_CFG_DEVNUM_SHF 11
+#define LTQ_PCI_CFG_FUNNUM_SHF 8
+
+#define PCI_ACCESS_READ  0
+#define PCI_ACCESS_WRITE 1
+
+static int ltq_pci_config_access(unsigned char access_type, struct pci_bus *bus,
+	unsigned int devfn, unsigned int where, u32 *data)
+{
+	unsigned long cfg_base;
+	unsigned long flags;
+	u32 temp;
+
+	/* we support slot from 0 to 15 dev_fn & 0x68 (AD29) is the
+	   SoC itself */
+	if ((bus->number != 0) || ((devfn & 0xf8) > 0x78)
+		|| ((devfn & 0xf8) == 0) || ((devfn & 0xf8) == 0x68))
+		return 1;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (bus->number << LTQ_PCI_CFG_BUSNUM_SHF) | (devfn <<
+			LTQ_PCI_CFG_FUNNUM_SHF) | (where & ~0x3);
+
+	/* Perform access */
+	if (access_type == PCI_ACCESS_WRITE) {
+		ltq_w32(swab32(*data), ((u32 *)cfg_base));
+	} else {
+		*data = ltq_r32(((u32 *)(cfg_base)));
+		*data = swab32(*data);
+	}
+	wmb();
+
+	/* clean possible Master abort */
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x0 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	temp = ltq_r32(((u32 *)(cfg_base)));
+	temp = swab32(temp);
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x68 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	ltq_w32(temp, ((u32 *)cfg_base));
+
+	spin_unlock_irqrestore(&ebu_lock, flags);
+
+	if (((*data) == 0xffffffff) && (access_type == PCI_ACCESS_READ))
+		return 1;
+
+	return 0;
+}
+
+int ltq_pci_read_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data = 0;
+
+	if (ltq_pci_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int ltq_pci_write_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 val)
+{
+	u32 data = 0;
+
+	if (size == 4) {
+		data = val;
+	} else {
+		if (ltq_pci_config_access(PCI_ACCESS_READ, bus,
+				devfn, where, &data))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (size == 1)
+			data = (data & ~(0xff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+		else if (size == 2)
+			data = (data & ~(0xffff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+	}
+
+	if (ltq_pci_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
new file mode 100644
index 00000000000..603d7493e96
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.c
@@ -0,0 +1,297 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/platform_device.h>
+
+#include <asm/pci.h>
+#include <asm/gpio.h>
+#include <asm/addrspace.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BASE		0x17000000
+#define LTQ_PCI_CFG_SIZE		0x00008000
+#define LTQ_PCI_MEM_BASE		0x18000000
+#define LTQ_PCI_MEM_SIZE		0x02000000
+#define LTQ_PCI_IO_BASE			0x1AE00000
+#define LTQ_PCI_IO_SIZE			0x00200000
+
+#define PCI_CR_FCI_ADDR_MAP0		0x00C0
+#define PCI_CR_FCI_ADDR_MAP1		0x00C4
+#define PCI_CR_FCI_ADDR_MAP2		0x00C8
+#define PCI_CR_FCI_ADDR_MAP3		0x00CC
+#define PCI_CR_FCI_ADDR_MAP4		0x00D0
+#define PCI_CR_FCI_ADDR_MAP5		0x00D4
+#define PCI_CR_FCI_ADDR_MAP6		0x00D8
+#define PCI_CR_FCI_ADDR_MAP7		0x00DC
+#define PCI_CR_CLK_CTRL			0x0000
+#define PCI_CR_PCI_MOD			0x0030
+#define PCI_CR_PC_ARB			0x0080
+#define PCI_CR_FCI_ADDR_MAP11hg		0x00E4
+#define PCI_CR_BAR11MASK		0x0044
+#define PCI_CR_BAR12MASK		0x0048
+#define PCI_CR_BAR13MASK		0x004C
+#define PCI_CS_BASE_ADDR1		0x0010
+#define PCI_CR_PCI_ADDR_MAP11		0x0064
+#define PCI_CR_FCI_BURST_LENGTH		0x00E8
+#define PCI_CR_PCI_EOI			0x002C
+#define PCI_CS_STS_CMD			0x0004
+
+#define PCI_MASTER0_REQ_MASK_2BITS	8
+#define PCI_MASTER1_REQ_MASK_2BITS	10
+#define PCI_MASTER2_REQ_MASK_2BITS	12
+#define INTERNAL_ARB_ENABLE_BIT		0
+
+#define LTQ_CGU_IFCCR		0x0018
+#define LTQ_CGU_PCICR		0x0034
+
+#define ltq_pci_w32(x, y)	ltq_w32((x), ltq_pci_membase + (y))
+#define ltq_pci_r32(x)		ltq_r32(ltq_pci_membase + (x))
+
+#define ltq_pci_cfg_w32(x, y)	ltq_w32((x), ltq_pci_mapped_cfg + (y))
+#define ltq_pci_cfg_r32(x)	ltq_r32(ltq_pci_mapped_cfg + (x))
+
+struct ltq_pci_gpio_map {
+	int pin;
+	int alt0;
+	int alt1;
+	int dir;
+	char *name;
+};
+
+/* the pci core can make use of the following gpios */
+static struct ltq_pci_gpio_map ltq_pci_gpio_map[] = {
+	{ 0, 1, 0, 0, "pci-exin0" },
+	{ 1, 1, 0, 0, "pci-exin1" },
+	{ 2, 1, 0, 0, "pci-exin2" },
+	{ 39, 1, 0, 0, "pci-exin3" },
+	{ 10, 1, 0, 0, "pci-exin4" },
+	{ 9, 1, 0, 0, "pci-exin5" },
+	{ 30, 1, 0, 1, "pci-gnt1" },
+	{ 23, 1, 0, 1, "pci-gnt2" },
+	{ 19, 1, 0, 1, "pci-gnt3" },
+	{ 38, 1, 0, 1, "pci-gnt4" },
+	{ 29, 1, 0, 0, "pci-req1" },
+	{ 31, 1, 0, 0, "pci-req2" },
+	{ 3, 1, 0, 0, "pci-req3" },
+	{ 37, 1, 0, 0, "pci-req4" },
+};
+
+__iomem void *ltq_pci_mapped_cfg;
+static __iomem void *ltq_pci_membase;
+
+int (*ltqpci_plat_dev_init)(struct pci_dev *dev) = NULL;
+
+/* Since the PCI REQ pins can be reused for other functionality, make it
+   possible to exclude those from interpretation by the PCI controller */
+static int ltq_pci_req_mask = 0xf;
+
+static int *ltq_pci_irq_map;
+
+struct pci_ops ltq_pci_ops = {
+	.read	= ltq_pci_read_config_dword,
+	.write	= ltq_pci_write_config_dword
+};
+
+static struct resource pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LTQ_PCI_IO_BASE,
+	.end	= LTQ_PCI_IO_BASE + LTQ_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LTQ_PCI_MEM_BASE,
+	.end	= LTQ_PCI_MEM_BASE + LTQ_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_controller ltq_pci_controller = {
+	.pci_ops	= &ltq_pci_ops,
+	.mem_resource	= &pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_resource	= &pci_io_resource,
+	.io_offset	= 0x00000000UL,
+};
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	if (ltqpci_plat_dev_init)
+		return ltqpci_plat_dev_init(dev);
+
+	return 0;
+}
+
+static u32 ltq_calc_bar11mask(void)
+{
+	u32 mem, bar11mask;
+
+	/* BAR11MASK value depends on available memory on system. */
+	mem = num_physpages * PAGE_SIZE;
+	bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8;
+
+	return bar11mask;
+}
+
+static void ltq_pci_setup_gpio(int gpio)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(ltq_pci_gpio_map); i++) {
+		if (gpio & (1 << i)) {
+			ltq_gpio_request(ltq_pci_gpio_map[i].pin,
+				ltq_pci_gpio_map[i].alt0,
+				ltq_pci_gpio_map[i].alt1,
+				ltq_pci_gpio_map[i].dir,
+				ltq_pci_gpio_map[i].name);
+		}
+	}
+	ltq_gpio_request(21, 0, 0, 1, "pci-reset");
+	ltq_pci_req_mask = (gpio >> PCI_REQ_SHIFT) & PCI_REQ_MASK;
+}
+
+static int __devinit ltq_pci_startup(struct ltq_pci_data *conf)
+{
+	u32 temp_buffer;
+
+	/* set clock to 33Mhz */
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR);
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR);
+
+	/* external or internal clock ? */
+	if (conf->clock) {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~(1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 30), LTQ_CGU_PCICR);
+	} else {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | (1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 31) | (1 << 30), LTQ_CGU_PCICR);
+	}
+
+	/* setup pci clock and gpis used by pci */
+	ltq_pci_setup_gpio(conf->gpio);
+
+	/* enable auto-switching between PCI and EBU */
+	ltq_pci_w32(0xa, PCI_CR_CLK_CTRL);
+
+	/* busy, i.e. configuration is not done, PCI access has to be retried */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) & ~(1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+	/* BUS Master/IO/MEM access */
+	ltq_pci_cfg_w32(ltq_pci_cfg_r32(PCI_CS_STS_CMD) | 7, PCI_CS_STS_CMD);
+
+	/* enable external 2 PCI masters */
+	temp_buffer = ltq_pci_r32(PCI_CR_PC_ARB);
+	temp_buffer &= (~(ltq_pci_req_mask << 16));
+	/* enable internal arbiter */
+	temp_buffer |= (1 << INTERNAL_ARB_ENABLE_BIT);
+	/* enable internal PCI master reqest */
+	temp_buffer &= (~(3 << PCI_MASTER0_REQ_MASK_2BITS));
+
+	/* enable EBU request */
+	temp_buffer &= (~(3 << PCI_MASTER1_REQ_MASK_2BITS));
+
+	/* enable all external masters request */
+	temp_buffer &= (~(3 << PCI_MASTER2_REQ_MASK_2BITS));
+	ltq_pci_w32(temp_buffer, PCI_CR_PC_ARB);
+	wmb();
+
+	/* setup BAR memory regions */
+	ltq_pci_w32(0x18000000, PCI_CR_FCI_ADDR_MAP0);
+	ltq_pci_w32(0x18400000, PCI_CR_FCI_ADDR_MAP1);
+	ltq_pci_w32(0x18800000, PCI_CR_FCI_ADDR_MAP2);
+	ltq_pci_w32(0x18c00000, PCI_CR_FCI_ADDR_MAP3);
+	ltq_pci_w32(0x19000000, PCI_CR_FCI_ADDR_MAP4);
+	ltq_pci_w32(0x19400000, PCI_CR_FCI_ADDR_MAP5);
+	ltq_pci_w32(0x19800000, PCI_CR_FCI_ADDR_MAP6);
+	ltq_pci_w32(0x19c00000, PCI_CR_FCI_ADDR_MAP7);
+	ltq_pci_w32(0x1ae00000, PCI_CR_FCI_ADDR_MAP11hg);
+	ltq_pci_w32(ltq_calc_bar11mask(), PCI_CR_BAR11MASK);
+	ltq_pci_w32(0, PCI_CR_PCI_ADDR_MAP11);
+	ltq_pci_w32(0, PCI_CS_BASE_ADDR1);
+	/* both TX and RX endian swap are enabled */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_EOI) | 3, PCI_CR_PCI_EOI);
+	wmb();
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR12MASK) | 0x80000000,
+		PCI_CR_BAR12MASK);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR13MASK) | 0x80000000,
+		PCI_CR_BAR13MASK);
+	/*use 8 dw burst length */
+	ltq_pci_w32(0x303, PCI_CR_FCI_BURST_LENGTH);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) | (1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+
+	/* setup irq line */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_CON) | 0xc, LTQ_EBU_PCC_CON);
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_IEN) | 0x10, LTQ_EBU_PCC_IEN);
+
+	/* toggle reset pin */
+	__gpio_set_value(21, 0);
+	wmb();
+	mdelay(1);
+	__gpio_set_value(21, 1);
+	return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (ltq_pci_irq_map[slot])
+		return ltq_pci_irq_map[slot];
+	printk(KERN_ERR "lq_pci: trying to map irq for unknown slot %d\n",
+		slot);
+
+	return 0;
+}
+
+static int __devinit ltq_pci_probe(struct platform_device *pdev)
+{
+	struct ltq_pci_data *ltq_pci_data =
+		(struct ltq_pci_data *) pdev->dev.platform_data;
+	pci_probe_only = 0;
+	ltq_pci_irq_map = ltq_pci_data->irq;
+	ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE);
+	ltq_pci_mapped_cfg =
+		ioremap_nocache(LTQ_PCI_CFG_BASE, LTQ_PCI_CFG_BASE);
+	ltq_pci_controller.io_map_base =
+		(unsigned long)ioremap(LTQ_PCI_IO_BASE, LTQ_PCI_IO_SIZE - 1);
+	ltq_pci_startup(ltq_pci_data);
+	register_pci_controller(&ltq_pci_controller);
+
+	return 0;
+}
+
+static struct platform_driver
+ltq_pci_driver = {
+	.probe = ltq_pci_probe,
+	.driver = {
+		.name = "ltq_pci",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init pcibios_init(void)
+{
+	int ret = platform_driver_register(&ltq_pci_driver);
+	if (ret)
+		printk(KERN_INFO "ltq_pci: Error registering platfom driver!");
+	return ret;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/pci/pci-lantiq.h b/arch/mips/pci/pci-lantiq.h
new file mode 100644
index 00000000000..66bf6cd6be3
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PCI_H__
+#define _LTQ_PCI_H__
+
+extern __iomem void *ltq_pci_mapped_cfg;
+extern int ltq_pci_read_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 *val);
+extern int ltq_pci_write_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 val);
+
+#endif
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
new file mode 100644
index 00000000000..38fece16c43
--- /dev/null
+++ b/arch/mips/pci/pci-xlr.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+static void *pci_config_base;
+
+#define	pci_cfg_addr(bus, devfn, off) (((bus) << 16) | ((devfn) << 8) | (off))
+
+/* PCI ops */
+static inline u32 pci_cfg_read_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where)
+{
+	u32 data;
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	data = *cfgaddr;
+	return cpu_to_le32(data);
+}
+
+static inline void pci_cfg_write_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where, u32 data)
+{
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	*cfgaddr = cpu_to_le32(data);
+}
+
+static int nlm_pcibios_read(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int nlm_pcibios_write(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		data = (data & ~(0xff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else if (size == 2)
+		data = (data & ~(0xffff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else
+		data = val;
+
+	pci_cfg_write_32bit(bus, devfn, where, data);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops nlm_pci_ops = {
+	.read  = nlm_pcibios_read,
+	.write = nlm_pcibios_write
+};
+
+static struct resource nlm_pci_mem_resource = {
+	.name           = "XLR PCI MEM",
+	.start          = 0xd0000000UL,	/* 256MB PCI mem @ 0xd000_0000 */
+	.end            = 0xdfffffffUL,
+	.flags          = IORESOURCE_MEM,
+};
+
+static struct resource nlm_pci_io_resource = {
+	.name           = "XLR IO MEM",
+	.start          = 0x10000000UL,	/* 16MB PCI IO @ 0x1000_0000 */
+	.end            = 0x100fffffUL,
+	.flags          = IORESOURCE_IO,
+};
+
+struct pci_controller nlm_pci_controller = {
+	.index          = 0,
+	.pci_ops        = &nlm_pci_ops,
+	.mem_resource   = &nlm_pci_mem_resource,
+	.mem_offset     = 0x00000000UL,
+	.io_resource    = &nlm_pci_io_resource,
+	.io_offset      = 0x00000000UL,
+};
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (!nlm_chip_is_xls())
+		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/
+
+	/*
+	 * For XLS PCIe, there is an IRQ per Link, find out which
+	 * link the device is on to assign interrupts
+	*/
+	if (dev->bus->self == NULL)
+		return 0;
+
+	switch	(dev->bus->self->devfn) {
+	case 0x0:
+		return PIC_PCIE_LINK0_IRQ;
+	case 0x8:
+		return PIC_PCIE_LINK1_IRQ;
+	case 0x10:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK2_IRQ;
+		else
+			return PIC_PCIE_LINK2_IRQ;
+	case 0x18:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK3_IRQ;
+		else
+			return PIC_PCIE_LINK3_IRQ;
+	}
+	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn);
+	return 0;
+}
+
+/* Do platform specific device initialization at pci_enable_device() time */
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	return 0;
+}
+
+static int __init pcibios_init(void)
+{
+	/* PSB assigns PCI resources */
+	pci_probe_only = 1;
+	pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20);
+
+	/* Extend IO port for memory mapped io */
+	ioport_resource.start =  0;
+	ioport_resource.end   = ~0;
+
+	set_io_port_base(CKSEG1);
+	nlm_pci_controller.io_map_base = CKSEG1;
+
+	pr_info("Registering XLR/XLS PCIX/PCIE Controller.\n");
+	register_pci_controller(&nlm_pci_controller);
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
+
+struct pci_fixup pcibios_fixups[] = {
+	{0}
+};
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
index f9b9dcdfa9d..98fd0099d96 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
@@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d,
 
 static struct irq_chip msp_per_irq_controller = {
 	.name = "MSP_PER",
-	.irq_enable = unmask_per_irq.
+	.irq_enable = unmask_per_irq,
 	.irq_disable = mask_per_irq,
 	.irq_ack = msp_per_irq_ack,
 #ifdef CONFIG_SMP
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index efc9e889b34..2608752898c 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 
 	case 1:
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 	}
 }
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index dbb5c7b4b70..f8a751c0328 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume)
 0:
 	PTR_L t1, PBE_ADDRESS(t0)   /* source */
 	PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */
-	PTR_ADDIU t3, t1, PAGE_SIZE
+	PTR_ADDU t3, t1, PAGE_SIZE
 1:
 	REG_L t8, (t1)
 	REG_S t8, (t2)
diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index deddbf0ebe5..698904daf90 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -132,7 +132,7 @@ static struct platform_device eth1_device = {
  */
 static int __init sgiseeq_devinit(void)
 {
-	unsigned int tmp;
+	unsigned int pbdma __maybe_unused;
 	int res, i;
 
 	eth0_pd.hpc = hpc3c0;
@@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void)
 
 	/* Second HPC is missing? */
 	if (ip22_is_fullhouse() ||
-	    get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1]))
+	    get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1]))
 		return 0;
 
 	sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 |
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 603fc91c103..1a94c989418 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -32,7 +32,7 @@
 static unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	u8 msb, lsb;
+	u8 msb;
 
 	/* Start the counter. */
 	sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL |
@@ -46,7 +46,7 @@ static unsigned long dosample(void)
 	/* Latch and spin until top byte of counter2 is zero */
 	do {
 		writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword);
-		lsb = readb(&sgint->tcnt2);
+		(void) readb(&sgint->tcnt2);
 		msb = readb(&sgint->tcnt2);
 		ct1 = read_c0_count();
 	} while (msb);
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index a1fa4abb3f6..cd0d5b06cd8 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size)
 {
 	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-	volatile hubreg_t junk;
 	unsigned i;
 
 	/* use small-window mapping if possible */
@@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 		 * after we write it.
 		 */
 		IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);
-		junk = HUB_L(IIO_ITTE_GET(nasid, i));
+		(void) HUB_L(IIO_ITTE_GET(nasid, i));
 
 		return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE);
 	}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 0a04603d577..b18b04e4857 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 #ifdef CONFIG_SMP
 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
 		smp_call_function_interrupt();
diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index c3d30a88daf..1d1919a44e8 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -54,11 +54,8 @@ void __init setup_replication_mask(void)
 
 static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid)
 {
-	cnodeid_t client_cnode;
 	kern_vars_t *kvp;
 
-	client_cnode = NASID_TO_COMPACT_NODEID(client_nasid);
-
 	kvp = &hub_data(client_nasid)->kern_vars;
 
 	KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index a152538d3c9..ef74f3267f9 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -66,18 +66,7 @@ static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 static void rt_set_mode(enum clock_event_mode mode,
 		struct clock_event_device *evt)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* The only mode supported */
-		break;
-
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do  */
-		break;
-	}
+	/* Nothing to do ...  */
 }
 
 int rt_timer_irq;
@@ -174,8 +163,7 @@ static void __init hub_rt_clocksource_init(void)
 {
 	struct clocksource *cs = &hub_rt_clocksource;
 
-	clocksource_set_clock(cs, CYCLES_PER_SEC);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, CYCLES_PER_SEC);
 }
 
 void __init plat_time_init(void)
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 47b347c992e..d667875be56 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c00a5cb1128..38e7f6bd792 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index c76151b5656..0904d4d30cb 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void)
 static __init unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	volatile u8 msb, lsb;
+	volatile u8 msb;
 
 	/* Start the counter. */
 	outb_p(0x34, 0x43);
@@ -108,7 +108,7 @@ static __init unsigned long dosample(void)
 	/* Latch and spin until top byte of counter0 is zero */
 	do {
 		outb(0x00, 0x43);
-		lsb = inb(0x40);
+		(void) inb(0x40);
 		msb = inb(0x40);
 		ct1 = read_c0_count();
 	} while (msb);
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 3dc19f48295..e9f95dcde37 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
 }
 
 #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE)
-static int tx4939_get_eth_speed(struct net_device *dev)
+static u32 tx4939_get_eth_speed(struct net_device *dev)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
-	int speed = 100;	/* default 100Mbps */
-	int err;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return speed;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return speed;
-	speed = cmd.speed == SPEED_100 ? 100 : 10;
-	return speed;
+	struct ethtool_cmd cmd;
+	if (dev_ethtool_get_settings(dev, &cmd))
+		return 100;	/* default 100Mbps */
+
+	return ethtool_cmd_speed(&cmd);
 }
+
 static int tx4939_netdev_event(struct notifier_block *this,
 			       unsigned long event,
 			       void *ptr)
@@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this,
 		else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1))
 			bit = TX4939_PCFG_SPEED1;
 		if (bit) {
-			int speed = tx4939_get_eth_speed(dev);
-			if (speed == 100)
+			if (tx4939_get_eth_speed(dev) == 100)
 				txx9_set64(&tx4939_ccfgptr->pcfg, bit);
 			else
 				txx9_clear64(&tx4939_ccfgptr->pcfg, bit);
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 226c826a219..83fb2791223 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
  * @irq: The interrupt number.
  * @dev_id: The device ID.
  *
- * We need do nothing here, since the scheduling will be effected on our way
- * back through entry.S.
- *
  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
  */
 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
 {
-	/* do nothing */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index d18328b3f93..da601dd34c0 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
 
 #include <linux/mm.h>
 #include <linux/uaccess.h>
+#include <asm/tlbflush.h>
 
 /* The usual comment is "Caches aren't brain-dead on the <architecture>".
  * Unfortunately, that doesn't apply to PA-RISC. */
@@ -112,8 +113,10 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 static inline void
 flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
 {
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		flush_tlb_page(vma, vmaddr);
 		flush_dcache_page_asm(page_to_phys(page), vmaddr);
+	}
 }
 
 #ifdef CONFIG_DEBUG_RODATA
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 5d7b8ce9fdf..22dadeb5869 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -177,7 +177,10 @@ struct vm_area_struct;
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_KERNEL	(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_EXEC	(_PAGE_KERNEL_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_KERNEL_EXEC | _PAGE_WRITE)
+#define _PAGE_KERNEL		(_PAGE_KERNEL_RO | _PAGE_WRITE)
 
 /* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
  * are page-aligned, we don't care about the PAGE_OFFSET bits, except
@@ -208,7 +211,9 @@ struct vm_area_struct;
 #define PAGE_COPY       PAGE_EXECREAD
 #define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RWX	__pgprot(_PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL_RO)
 #define PAGE_KERNEL_UNC	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
 #define PAGE_GATEWAY    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
 
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2a5ec..9cbc2c3bf63 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,8 +814,14 @@
 #define __NR_recvmmsg		(__NR_Linux + 319)
 #define __NR_accept4		(__NR_Linux + 320)
 #define __NR_prlimit64		(__NR_Linux + 321)
-
-#define __NR_Linux_syscalls	(__NR_prlimit64 + 1)
+#define __NR_fanotify_init	(__NR_Linux + 322)
+#define __NR_fanotify_mark	(__NR_Linux + 323)
+#define __NR_clock_adjtime	(__NR_Linux + 324)
+#define __NR_name_to_handle_at	(__NR_Linux + 325)
+#define __NR_open_by_handle_at	(__NR_Linux + 326)
+#define __NR_syncfs		(__NR_Linux + 327)
+
+#define __NR_Linux_syscalls	(__NR_syncfs + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 3f11331c277..83335f3da5f 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -304,10 +304,20 @@ void flush_dcache_page(struct page *page)
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		addr = mpnt->vm_start + offset;
 
+		/* The TLB is the engine of coherence on parisc: The
+		 * CPU is entitled to speculate any page with a TLB
+		 * mapping, so here we kill the mapping then flush the
+		 * page along a special flush only alias mapping.
+		 * This guarantees that the page is no-longer in the
+		 * cache for any process and nor may it be
+		 * speculatively read in (until the user or kernel
+		 * specifically accesses it, of course) */
+
+		flush_tlb_page(mpnt, addr);
 		if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) {
 			__flush_cache_page(mpnt, addr, page_to_phys(page));
 			if (old_addr)
-				printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
+				printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
 			old_addr = addr;
 		}
 	}
@@ -499,6 +509,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 {
 	BUG_ON(!vma->vm_mm->context);
 
+	flush_tlb_page(vma, vmaddr);
 	__flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn)));
 
 }
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index ead8d2a1034..6f059443914 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -692,6 +692,9 @@ ENTRY(fault_vector_11)
 END(fault_vector_11)
 
 #endif
+	/* Fault vector is separately protected and *must* be on its own page */
+	.align		PAGE_SIZE
+ENTRY(end_fault_vector)
 
 	.import		handle_interruption,code
 	.import		do_cpu_irq_mask,code
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index 145c5e4caaa..37aabd772fb 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -106,8 +106,9 @@ $bss_loop:
 #endif
 
 
-	/* Now initialize the PTEs themselves */
-	ldo		0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */
+	/* Now initialize the PTEs themselves.  We use RWX for
+	 * everything ... it will get remapped correctly later */
+	ldo		0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
 	ldi		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
 	load32		PA(pg0),%r1
 
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 6e81bb596e5..cedbbb8b18d 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -61,8 +61,10 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 
+#include <asm/pgtable.h>
 #include <asm/unwind.h>
 
 #if 0
@@ -214,7 +216,13 @@ void *module_alloc(unsigned long size)
 {
 	if (size == 0)
 		return NULL;
-	return vmalloc(size);
+	/* using RWX means less protection for modules, but it's
+	 * easier than trying to map the text, data, init_text and
+	 * init_data correctly */
+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+				    GFP_KERNEL | __GFP_HIGHMEM,
+				    PAGE_KERNEL_RWX, -1,
+				    __builtin_return_address(0));
 }
 
 #ifndef CONFIG_64BIT
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index a85823668cb..93ff3d90edd 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -817,10 +817,7 @@ ENTRY(purge_kernel_dcache_page)
 	.procend
 ENDPROC(purge_kernel_dcache_page)
 
-
-	.export flush_user_dcache_range_asm
-
-flush_user_dcache_range_asm:
+ENTRY(flush_user_dcache_range_asm)
 	.proc
 	.callinfo NO_CALLS
 	.entry
@@ -839,6 +836,7 @@ flush_user_dcache_range_asm:
 	.exit
 
 	.procend
+ENDPROC(flush_user_dcache_range_asm)
 
 ENTRY(flush_kernel_dcache_range_asm)
 	.proc
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 69d63d354ef..828305f19cf 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 				
 			case IPI_RESCHEDULE:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
-				/*
-				 * Reschedule callback.  Everything to be
-				 * done is done by the interrupt return path.
-				 */
+				scheduler_ipi();
 				break;
 
 			case IPI_CALL_FUNC:
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 88a0ad14a9c..dc9a6246232 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -228,3 +228,11 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
         return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
                              ((loff_t)lenhi << 32) | lenlo);
 }
+
+asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi,
+					 u32 mask_lo, int fd,
+					 const char __user *pathname)
+{
+	return sys_fanotify_mark(fan_fd, flags, ((u64)mask_hi << 32) | mask_lo,
+				 fd, pathname);
+}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee10b8..a5b02ce4d41 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,12 @@
 	ENTRY_COMP(recvmmsg)
 	ENTRY_SAME(accept4)		/* 320 */
 	ENTRY_SAME(prlimit64)
+	ENTRY_SAME(fanotify_init)
+	ENTRY_COMP(fanotify_mark)
+	ENTRY_COMP(clock_adjtime)
+	ENTRY_SAME(name_to_handle_at)	/* 325 */
+	ENTRY_COMP(open_by_handle_at)
+	ENTRY_SAME(syncfs)
 
 	/* Nothing yet */
 
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 8f1e4efd143..e1a55849bfa 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -69,6 +69,9 @@ SECTIONS
 	/* End of text section */
 	_etext = .;
 
+	/* Start of data section */
+	_sdata = .;
+
 	RODATA
 
 	/* writeable */
@@ -134,6 +137,7 @@ SECTIONS
 	. = ALIGN(16384);
 	__init_begin = .;
 	INIT_TEXT_SECTION(16384)
+	. = ALIGN(PAGE_SIZE);
 	INIT_DATA_SECTION(16)
 	/* we have to discard exit text and such at runtime, not link time */
 	.exit.text :
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b7ed8d7a9b3..5fa1e273006 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -266,8 +266,10 @@ static void __init setup_bootmem(void)
 	}
 	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
 
-	for (i = 0; i < npmem_ranges; i++)
+	for (i = 0; i < npmem_ranges; i++) {
+		node_set_state(i, N_NORMAL_MEMORY);
 		node_set_online(i);
+	}
 #endif
 
 	/*
@@ -369,24 +371,158 @@ static void __init setup_bootmem(void)
 	request_resource(&sysram_resources[0], &pdcdata_resource);
 }
 
+static void __init map_pages(unsigned long start_vaddr,
+			     unsigned long start_paddr, unsigned long size,
+			     pgprot_t pgprot, int force)
+{
+	pgd_t *pg_dir;
+	pmd_t *pmd;
+	pte_t *pg_table;
+	unsigned long end_paddr;
+	unsigned long start_pmd;
+	unsigned long start_pte;
+	unsigned long tmp1;
+	unsigned long tmp2;
+	unsigned long address;
+	unsigned long vaddr;
+	unsigned long ro_start;
+	unsigned long ro_end;
+	unsigned long fv_addr;
+	unsigned long gw_addr;
+	extern const unsigned long fault_vector_20;
+	extern void * const linux_gateway_page;
+
+	ro_start = __pa((unsigned long)_text);
+	ro_end   = __pa((unsigned long)&data_start);
+	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
+	gw_addr  = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
+
+	end_paddr = start_paddr + size;
+
+	pg_dir = pgd_offset_k(start_vaddr);
+
+#if PTRS_PER_PMD == 1
+	start_pmd = 0;
+#else
+	start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
+#endif
+	start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
+
+	address = start_paddr;
+	vaddr = start_vaddr;
+	while (address < end_paddr) {
+#if PTRS_PER_PMD == 1
+		pmd = (pmd_t *)__pa(pg_dir);
+#else
+		pmd = (pmd_t *)pgd_address(*pg_dir);
+
+		/*
+		 * pmd is physical at this point
+		 */
+
+		if (!pmd) {
+			pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER);
+			pmd = (pmd_t *) __pa(pmd);
+		}
+
+		pgd_populate(NULL, pg_dir, __va(pmd));
+#endif
+		pg_dir++;
+
+		/* now change pmd to kernel virtual addresses */
+
+		pmd = (pmd_t *)__va(pmd) + start_pmd;
+		for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) {
+
+			/*
+			 * pg_table is physical at this point
+			 */
+
+			pg_table = (pte_t *)pmd_address(*pmd);
+			if (!pg_table) {
+				pg_table = (pte_t *)
+					alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE);
+				pg_table = (pte_t *) __pa(pg_table);
+			}
+
+			pmd_populate_kernel(NULL, pmd, __va(pg_table));
+
+			/* now change pg_table to kernel virtual addresses */
+
+			pg_table = (pte_t *) __va(pg_table) + start_pte;
+			for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) {
+				pte_t pte;
+
+				/*
+				 * Map the fault vector writable so we can
+				 * write the HPMC checksum.
+				 */
+				if (force)
+					pte =  __mk_pte(address, pgprot);
+				else if (core_kernel_text(vaddr) &&
+					 address != fv_addr)
+					pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+				else
+#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+				if (address >= ro_start && address < ro_end
+							&& address != fv_addr
+							&& address != gw_addr)
+					pte = __mk_pte(address, PAGE_KERNEL_RO);
+				else
+#endif
+					pte = __mk_pte(address, pgprot);
+
+				if (address >= end_paddr) {
+					if (force)
+						break;
+					else
+						pte_val(pte) = 0;
+				}
+
+				set_pte(pg_table, pte);
+
+				address += PAGE_SIZE;
+				vaddr += PAGE_SIZE;
+			}
+			start_pte = 0;
+
+			if (address >= end_paddr)
+			    break;
+		}
+		start_pmd = 0;
+	}
+}
+
 void free_initmem(void)
 {
 	unsigned long addr;
 	unsigned long init_begin = (unsigned long)__init_begin;
 	unsigned long init_end = (unsigned long)__init_end;
 
-#ifdef CONFIG_DEBUG_KERNEL
+	/* The init text pages are marked R-X.  We have to
+	 * flush the icache and mark them RW-
+	 *
+	 * This is tricky, because map_pages is in the init section.
+	 * Do a dummy remap of the data section first (the data
+	 * section is already PAGE_KERNEL) to pull in the TLB entries
+	 * for map_kernel */
+	map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+		  PAGE_KERNEL_RWX, 1);
+	/* now remap at PAGE_KERNEL since the TLB is pre-primed to execute
+	 * map_pages */
+	map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+		  PAGE_KERNEL, 1);
+
+	/* force the kernel to see the new TLB entries */
+	__flush_tlb_range(0, init_begin, init_end);
 	/* Attempt to catch anyone trying to execute code here
 	 * by filling the page with BRK insns.
 	 */
 	memset((void *)init_begin, 0x00, init_end - init_begin);
+	/* finally dump all the instructions which were cached, since the
+	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
-#endif
 	
-	/* align __init_begin and __init_end to page size,
-	   ignoring linker script where we might have tried to save RAM */
-	init_begin = PAGE_ALIGN(init_begin);
-	init_end = PAGE_ALIGN(init_end);
 	for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
@@ -616,114 +752,6 @@ void show_mem(unsigned int filter)
 #endif
 }
 
-
-static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot)
-{
-	pgd_t *pg_dir;
-	pmd_t *pmd;
-	pte_t *pg_table;
-	unsigned long end_paddr;
-	unsigned long start_pmd;
-	unsigned long start_pte;
-	unsigned long tmp1;
-	unsigned long tmp2;
-	unsigned long address;
-	unsigned long ro_start;
-	unsigned long ro_end;
-	unsigned long fv_addr;
-	unsigned long gw_addr;
-	extern const unsigned long fault_vector_20;
-	extern void * const linux_gateway_page;
-
-	ro_start = __pa((unsigned long)_text);
-	ro_end   = __pa((unsigned long)&data_start);
-	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
-	gw_addr  = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
-
-	end_paddr = start_paddr + size;
-
-	pg_dir = pgd_offset_k(start_vaddr);
-
-#if PTRS_PER_PMD == 1
-	start_pmd = 0;
-#else
-	start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
-#endif
-	start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
-
-	address = start_paddr;
-	while (address < end_paddr) {
-#if PTRS_PER_PMD == 1
-		pmd = (pmd_t *)__pa(pg_dir);
-#else
-		pmd = (pmd_t *)pgd_address(*pg_dir);
-
-		/*
-		 * pmd is physical at this point
-		 */
-
-		if (!pmd) {
-			pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER);
-			pmd = (pmd_t *) __pa(pmd);
-		}
-
-		pgd_populate(NULL, pg_dir, __va(pmd));
-#endif
-		pg_dir++;
-
-		/* now change pmd to kernel virtual addresses */
-
-		pmd = (pmd_t *)__va(pmd) + start_pmd;
-		for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) {
-
-			/*
-			 * pg_table is physical at this point
-			 */
-
-			pg_table = (pte_t *)pmd_address(*pmd);
-			if (!pg_table) {
-				pg_table = (pte_t *)
-					alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE);
-				pg_table = (pte_t *) __pa(pg_table);
-			}
-
-			pmd_populate_kernel(NULL, pmd, __va(pg_table));
-
-			/* now change pg_table to kernel virtual addresses */
-
-			pg_table = (pte_t *) __va(pg_table) + start_pte;
-			for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) {
-				pte_t pte;
-
-				/*
-				 * Map the fault vector writable so we can
-				 * write the HPMC checksum.
-				 */
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-				if (address >= ro_start && address < ro_end
-							&& address != fv_addr
-							&& address != gw_addr)
-				    pte = __mk_pte(address, PAGE_KERNEL_RO);
-				else
-#endif
-				    pte = __mk_pte(address, pgprot);
-
-				if (address >= end_paddr)
-					pte_val(pte) = 0;
-
-				set_pte(pg_table, pte);
-
-				address += PAGE_SIZE;
-			}
-			start_pte = 0;
-
-			if (address >= end_paddr)
-			    break;
-		}
-		start_pmd = 0;
-	}
-}
-
 /*
  * pagetable_init() sets up the page tables
  *
@@ -748,14 +776,14 @@ static void __init pagetable_init(void)
 		size = pmem_ranges[range].pages << PAGE_SHIFT;
 
 		map_pages((unsigned long)__va(start_paddr), start_paddr,
-			size, PAGE_KERNEL);
+			  size, PAGE_KERNEL, 0);
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_end && initrd_end > mem_limit) {
 		printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end);
 		map_pages(initrd_start, __pa(initrd_start),
-			initrd_end - initrd_start, PAGE_KERNEL);
+			  initrd_end - initrd_start, PAGE_KERNEL, 0);
 	}
 #endif
 
@@ -780,7 +808,7 @@ static void __init gateway_init(void)
 	 */
 
 	map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page),
-		PAGE_SIZE, PAGE_GATEWAY);
+		  PAGE_SIZE, PAGE_GATEWAY, 1);
 }
 
 #ifdef CONFIG_HPUX
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8f4d50b0adf..a3128ca0fe1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION
 	default y if PMAC_APM_EMU
 	bool
 
+config EPAPR_BOOT
+	bool
+	help
+	  Used to allow a board to specify it wants an ePAPR compliant wrapper.
+	default n
+
 config DEFAULT_UIMAGE
 	bool
 	help
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 2d38a50e66b..a597dd77b90 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO
 	  Select this to enable early debugging for Nintendo GameCube/Wii
 	  consoles via an external USB Gecko adapter.
 
+config PPC_EARLY_DEBUG_WSP
+	bool "Early debugging via WSP's internal UART"
+	depends on PPC_WSP
+	select PPC_UDBG_16550
+
 endchoice
 
 config PPC_EARLY_DEBUG_44x_PHYSLOW
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 89178164af5..c26200b40a4 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
 		cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
 		fsl-soc.c mpc8xx.c pq2.c ugecon.c
 src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
-		cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \
+		cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \
+		prpmc2800.c \
 		ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
 		cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
 		cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
@@ -127,7 +128,7 @@ quiet_cmd_bootas = BOOTAS  $@
       cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
@@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY)		+= dtbImage.holly
 image-$(CONFIG_PPC_PRPMC2800)		+= dtbImage.prpmc2800
 image-$(CONFIG_PPC_ISERIES)		+= zImage.iseries
 image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
+image-$(CONFIG_EPAPR_BOOT)		+= zImage.epapr
 
 #
 # Targets which embed a device tree blob
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index f1c4dfc635b..0f7428a37ef 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -6,16 +6,28 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ * NOTE: this code runs in 32 bit mode, is position-independent,
+ * and is packaged as ELF32.
  */
 
 #include "ppc_asm.h"
 
 	.text
-	/* a procedure descriptor used when booting this as a COFF file */
+	/* A procedure descriptor used when booting this as a COFF file.
+	 * When making COFF, this comes first in the link and we're
+	 * linked at 0x500000.
+	 */
 	.globl	_zimage_start_opd
 _zimage_start_opd:
-	.long	_zimage_start, 0, 0, 0
+	.long	0x500000, 0, 0, 0
+
+p_start:	.long	_start
+p_etext:	.long	_etext
+p_bss_start:	.long	__bss_start
+p_end:		.long	_end
+
+	.weak	_platform_stack_top
+p_pstack:	.long	_platform_stack_top
 
 	.weak	_zimage_start
 	.globl	_zimage_start
@@ -24,37 +36,65 @@ _zimage_start:
 _zimage_start_lib:
 	/* Work out the offset between the address we were linked at
 	   and the address where we're running. */
-	bl	1f
-1:	mflr	r0
-	lis	r9,1b@ha
-	addi	r9,r9,1b@l
-	subf.	r0,r9,r0
-	beq	3f		/* if running at same address as linked */
+	bl	.+4
+p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+	/* grab the link address of the dynamic section in r11 */
+	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
+	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
+	cmpwi	r11,0
+	beq	3f		/* if not linked -pie */
+	/* get the runtime address of the dynamic section in r12 */
+	.weak	__dynamic_start
+	addis	r12,r10,(__dynamic_start-p_base)@ha
+	addi	r12,r12,(__dynamic_start-p_base)@l
+	subf	r11,r11,r12	/* runtime - linktime offset */
+
+	/* The dynamic section contains a series of tagged entries.
+	 * We need the RELA and RELACOUNT entries. */
+RELA = 7
+RELACOUNT = 0x6ffffff9
+	li	r9,0
+	li	r0,0
+9:	lwz	r8,0(r12)	/* get tag */
+	cmpwi	r8,0
+	beq	10f		/* end of list */
+	cmpwi	r8,RELA
+	bne	11f
+	lwz	r9,4(r12)	/* get RELA pointer in r9 */
+	b	12f
+11:	addis	r8,r8,(-RELACOUNT)@ha
+	cmpwi	r8,RELACOUNT@l
+	bne	12f
+	lwz	r0,4(r12)	/* get RELACOUNT value in r0 */
+12:	addi	r12,r12,8
+	b	9b
 
-	/* The .got2 section contains a list of addresses, so add
-	   the address offset onto each entry. */
-	lis	r9,__got2_start@ha
-	addi	r9,r9,__got2_start@l
-	lis	r8,__got2_end@ha
-	addi	r8,r8,__got2_end@l
-	subf.	r8,r9,r8
+	/* The relocation section contains a list of relocations.
+	 * We now do the R_PPC_RELATIVE ones, which point to words
+	 * which need to be initialized with addend + offset.
+	 * The R_PPC_RELATIVE ones come first and there are RELACOUNT
+	 * of them. */
+10:	/* skip relocation if we don't have both */
+	cmpwi	r0,0
 	beq	3f
-	srwi.	r8,r8,2
-	mtctr	r8
-	add	r9,r0,r9
-2:	lwz	r8,0(r9)
-	add	r8,r8,r0
-	stw	r8,0(r9)
-	addi	r9,r9,4
+	cmpwi	r9,0
+	beq	3f
+
+	add	r9,r9,r11	/* Relocate RELA pointer */
+	mtctr	r0
+2:	lbz	r0,4+3(r9)	/* ELF32_R_INFO(reloc->r_info) */
+	cmpwi	r0,22		/* R_PPC_RELATIVE */
+	bne	3f
+	lwz	r12,0(r9)	/* reloc->r_offset */
+	lwz	r0,8(r9)	/* reloc->r_addend */
+	add	r0,r0,r11
+	stwx	r0,r11,r12
+	addi	r9,r9,12
 	bdnz	2b
 
 	/* Do a cache flush for our text, in case the loader didn't */
-3:	lis	r9,_start@ha
-	addi	r9,r9,_start@l
-	add	r9,r0,r9
-	lis	r8,_etext@ha
-	addi	r8,r8,_etext@l
-	add	r8,r0,r8
+3:	lwz	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	lwz	r8,p_etext-p_base(r10)
 4:	dcbf	r0,r9
 	icbi	r0,r9
 	addi	r9,r9,0x20
@@ -64,27 +104,19 @@ _zimage_start_lib:
 	isync
 
 	/* Clear the BSS */
-	lis	r9,__bss_start@ha
-	addi	r9,r9,__bss_start@l
-	add	r9,r0,r9
-	lis	r8,_end@ha
-	addi	r8,r8,_end@l
-	add	r8,r0,r8
-	li	r10,0
-5:	stw	r10,0(r9)
+	lwz	r9,p_bss_start-p_base(r10)
+	lwz	r8,p_end-p_base(r10)
+	li	r0,0
+5:	stw	r0,0(r9)
 	addi	r9,r9,4
 	cmplw	cr0,r9,r8
 	blt	5b
 
 	/* Possibly set up a custom stack */
-.weak	_platform_stack_top
-	lis	r8,_platform_stack_top@ha
-	addi	r8,r8,_platform_stack_top@l
+	lwz	r8,p_pstack-p_base(r10)
 	cmpwi	r8,0
 	beq	6f
-	add	r8,r0,r8
 	lwz	r1,0(r8)
-	add	r1,r0,r1
 	li	r0,0
 	stwu	r0,-16(r1)	/* establish a stack frame */
 6:
diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
index e0668f87779..d6a8ae45813 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p1020si.dtsi"
+
 / {
-	model = "fsl,P1020";
+	model = "fsl,P1020RDB";
 	compatible = "fsl,P1020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		serial0 = &serial0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P1020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P1020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,88 +141,14 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p1020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p1020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p1020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -294,66 +196,7 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p1020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x40000>; // L2,256K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
-
 		mdio@24000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-mdio";
-			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -369,10 +212,6 @@
 		};
 
 		mdio@25000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-tbi";
-			reg = <0x25000 0x1000 0xb1030 0x4>;
 
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
@@ -381,97 +220,25 @@
 		};
 
 		enet0: ethernet@b0000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
-			};
 		};
 
 		enet1: ethernet@b1000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy0>;
 			tbi-handle = <&tbi0>;
 			phy-connection-type = "sgmii";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
-			};
 		};
 
 		enet2: ethernet@b2000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb2000 0x1000>;
-				interrupts = <31 2 32 2 33 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb6000 0x1000>;
-				interrupts = <25 2 26 2 27 2>;
-			};
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
@@ -481,82 +248,23 @@
 		   it enables USB2. OTOH, U-Boot does create a new node
 		   when there isn't any. So, just comment it out.
 		usb@23000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x23000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <46 0x2>;
 			phy_type = "ulpi";
 		};
 		*/
 
-		sdhci@2e000 {
-			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p1020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
 	};
 
 	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -573,18 +281,16 @@
 	};
 
 	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
new file mode 100644
index 00000000000..f0bf7f42f09
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
@@ -0,0 +1,213 @@
+/*
+ * P1020 RDB  Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb,
+ * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
+ *
+ * Please note to add "-b 0" for core0's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		PowerPC,P1020@1 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		serial1: serial@4600 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				spi-max-frequency = <40000000>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1>;
+				reg = <0x1>;
+			};
+		};
+
+		mdio@25000 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet0: ethernet@b0000 {
+			status = "disabled";
+		};
+
+		enet1: ethernet@b1000 {
+			phy-handle = <&phy0>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet2: ethernet@b2000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			phy_type = "ulpi";
+		};
+		*/
+
+		mpic: pic@40000 {
+			protected-sources = <
+			42 29 30 34	/* serial1, enet0-queue-group0 */
+			17 18 24 45	/* enet0-queue-group1, crypto */
+			>;
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
new file mode 100644
index 00000000000..6ec02204a44
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
@@ -0,0 +1,148 @@
+/*
+ * P1020 RDB Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, eth0, crypto.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial1;
+		};
+
+	cpus {
+		PowerPC,P1020@0 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
+		};
+
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
+		dma@21300 {
+			status = "disabled";
+		};
+
+		mdio@24000 {
+			status = "disabled";
+		};
+
+		mdio@25000 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@b0000 {
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+
+		};
+
+		enet1: ethernet@b1000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@b2000 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		mpic: pic@40000 {
+			protected-sources = <
+			16 		/* ecm, mem, L2, pci0, pci1 */
+			43 42 59	/* i2c, serial0, spi */
+			47 63 62 	/* gpio, tdm */
+			20 21 22 23	/* dma */
+			03 02 		/* mdio */
+			35 36 40	/* enet1-queue-group0 */
+			51 52 67	/* enet1-queue-group1 */
+			31 32 33	/* enet2-queue-group0 */
+			25 26 27	/* enet2-queue-group1 */
+			28 72 58 	/* usb, sdhci, crypto */
+			0xb0 0xb1 0xb2	/* message */
+			0xb3 0xb4 0xb5
+			0xb6 0xb7
+			0xe0 0xe1 0xe2	/* msi */
+			0xe3 0xe4 0xe5
+			0xe6 0xe7		/* sdhci, crypto , pci */
+			>;
+		};
+
+		msi@41600 {
+			status = "disabled";
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe0a000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi
new file mode 100644
index 00000000000..5c5acb66c3f
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020si.dtsi
@@ -0,0 +1,377 @@
+/*
+ * P1020si Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P1020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p1020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p1020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <16 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p1020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p1020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x40000>; // L2,256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		mdio@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-mdio";
+			reg = <0x24000 0x1000 0xb0030 0x4>;
+
+		};
+
+		mdio@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-tbi";
+			reg = <0x25000 0x1000 0xb1030 0x4>;
+
+		};
+
+		enet0: ethernet@b0000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb0000 0x1000>;
+				interrupts = <29 2 30 2 34 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb4000 0x1000>;
+				interrupts = <17 2 18 2 24 2>;
+			};
+		};
+
+		enet1: ethernet@b1000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb1000 0x1000>;
+				interrupts = <35 2 36 2 40 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb5000 0x1000>;
+				interrupts = <51 2 52 2 67 2>;
+			};
+		};
+
+		enet2: ethernet@b2000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb2000 0x1000>;
+				interrupts = <31 2 32 2 33 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb6000 0x1000>;
+				interrupts = <25 2 26 2 27 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <46 0x2>;
+			phy_type = "ulpi";
+		};
+		*/
+
+		sdhci@2e000 {
+			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p1020-guts","fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+
+	pci1: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts
index 59ef405c1c9..4f685a779f4 100644
--- a/arch/powerpc/boot/dts/p1022ds.dts
+++ b/arch/powerpc/boot/dts/p1022ds.dts
@@ -52,7 +52,7 @@
 		#size-cells = <1>;
 		compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus";
 		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
+		interrupts = <19 2 0 0>;
 
 		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
 			  0x1 0x0 0xf 0xe0000000 0x08000000
@@ -157,7 +157,7 @@
 			 * IRQ8 is generated if the "EVENT" switch is pressed
 			 * and PX_CTL[EVESEL] is set to 00.
 			 */
-			interrupts = <8 8>;
+			interrupts = <8 8 0 0>;
 		};
 	};
 
@@ -178,13 +178,13 @@
 		ecm@1000 {
 			compatible = "fsl,p1022-ecm", "fsl,ecm";
 			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		memory-controller@2000 {
 			compatible = "fsl,p1022-memory-controller";
 			reg = <0x2000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		i2c@3000 {
@@ -193,7 +193,7 @@
 			cell-index = <0>;
 			compatible = "fsl-i2c";
 			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 		};
 
@@ -203,7 +203,7 @@
 			cell-index = <1>;
 			compatible = "fsl-i2c";
 			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 
 			wm8776:codec@1a {
@@ -220,7 +220,7 @@
 			compatible = "ns16550";
 			reg = <0x4500 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		serial1: serial@4600 {
@@ -229,7 +229,7 @@
 			compatible = "ns16550";
 			reg = <0x4600 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		spi@7000 {
@@ -238,7 +238,7 @@
 			#size-cells = <0>;
 			compatible = "fsl,espi";
 			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
+			interrupts = <59 0x2 0 0>;
 			espi,num-ss-bits = <4>;
 			mode = "cpu";
 
@@ -275,7 +275,7 @@
 			compatible = "fsl,mpc8610-ssi";
 			cell-index = <0>;
 			reg = <0x15000 0x100>;
-			interrupts = <75 2>;
+			interrupts = <75 2 0 0>;
 			fsl,mode = "i2s-slave";
 			codec-handle = <&wm8776>;
 			fsl,playback-dma = <&dma00>;
@@ -294,25 +294,25 @@
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <76 2>;
+				interrupts = <76 2 0 0>;
 			};
 			dma01: dma-channel@80 {
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <77 2>;
+				interrupts = <77 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <78 2>;
+				interrupts = <78 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <79 2>;
+				interrupts = <79 2 0 0>;
 			};
 		};
 
@@ -320,7 +320,7 @@
 			#gpio-cells = <2>;
 			compatible = "fsl,mpc8572-gpio";
 			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
+			interrupts = <47 0x2 0 0>;
 			gpio-controller;
 		};
 
@@ -329,7 +329,7 @@
 			reg = <0x20000 0x1000>;
 			cache-line-size = <32>;	// 32 bytes
 			cache-size = <0x40000>; // L2, 256K
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		dma@21300 {
@@ -343,25 +343,25 @@
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <20 2>;
+				interrupts = <20 2 0 0>;
 			};
 			dma-channel@80 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <21 2>;
+				interrupts = <21 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <22 2>;
+				interrupts = <22 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <23 2>;
+				interrupts = <23 2 0 0>;
 			};
 		};
 
@@ -370,7 +370,7 @@
 			#size-cells = <0>;
 			compatible = "fsl-usb2-dr";
 			reg = <0x22000 0x1000>;
-			interrupts = <28 0x2>;
+			interrupts = <28 0x2 0 0>;
 			phy_type = "ulpi";
 		};
 
@@ -381,11 +381,11 @@
 			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
-				interrupts = <3 1>;
+				interrupts = <3 1 0 0>;
 				reg = <0x1>;
 			};
 			phy1: ethernet-phy@1 {
-				interrupts = <9 1>;
+				interrupts = <9 1 0 0>;
 				reg = <0x2>;
 			};
 		};
@@ -416,13 +416,13 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
+				interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
+				interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>;
 			};
 		};
 
@@ -443,20 +443,20 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
+				interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
+				interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>;
 			};
 		};
 
 		sdhci@2e000 {
 			compatible = "fsl,p1022-esdhc", "fsl,esdhc";
 			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
+			interrupts = <72 0x2 0 0>;
 			fsl,sdhci-auto-cmd12;
 			/* Filled in by U-Boot */
 			clock-frequency = <0>;
@@ -467,7 +467,7 @@
 				     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
 				     "fsl,sec2.0";
 			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
+			interrupts = <45 2 0 0 58 2 0 0>;
 			fsl,num-channels = <4>;
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x97c>;
@@ -478,14 +478,14 @@
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x18000 0x1000>;
 			cell-index = <1>;
-			interrupts = <74 0x2>;
+			interrupts = <74 0x2 0 0>;
 		};
 
 		sata@19000 {
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x19000 0x1000>;
 			cell-index = <2>;
-			interrupts = <41 0x2>;
+			interrupts = <41 0x2 0 0>;
 		};
 
 		power@e0070{
@@ -496,21 +496,33 @@
 		display@10000 {
 			compatible = "fsl,diu", "fsl,p1022-diu";
 			reg = <0x10000 1000>;
-			interrupts = <64 2>;
+			interrupts = <64 2 0 0>;
 		};
 
 		timer@41100 {
 			compatible = "fsl,mpic-global-timer";
-			reg = <0x41100 0x204>;
-			interrupts = <0xf7 0x2>;
+			reg = <0x41100 0x100 0x41300 4>;
+			interrupts = <0 0 3 0
+			              1 0 3 0
+			              2 0 3 0
+			              3 0 3 0>;
+		};
+
+		timer@42100 {
+			compatible = "fsl,mpic-global-timer";
+			reg = <0x42100 0x100 0x42300 4>;
+			interrupts = <4 0 3 0
+			              5 0 3 0
+			              6 0 3 0
+			              7 0 3 0>;
 		};
 
 		mpic: pic@40000 {
 			interrupt-controller;
 			#address-cells = <0>;
-			#interrupt-cells = <2>;
+			#interrupt-cells = <4>;
 			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
+			compatible = "fsl,mpic";
 			device_type = "open-pic";
 		};
 
@@ -519,14 +531,14 @@
 			reg = <0x41600 0x80>;
 			msi-available-ranges = <0 0x100>;
 			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
+				0xe0 0 0 0
+				0xe1 0 0 0
+				0xe2 0 0 0
+				0xe3 0 0 0
+				0xe4 0 0 0
+				0xe5 0 0 0
+				0xe6 0 0 0
+				0xe7 0 0 0>;
 		};
 
 		global-utilities@e0000 {	//global utilities block
@@ -547,7 +559,7 @@
 		ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -582,7 +594,7 @@
 		ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -618,7 +630,7 @@
 		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
index 11019142813..2bcf3683d22 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -1,7 +1,7 @@
 /*
  * P2020 DS Device Tree Source
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020DS";
 	compatible = "fsl,P2020DS";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -27,35 +26,13 @@
 		pci2 = &pci2;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
 
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
 		compatible = "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
-
 		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
 			  0x1 0x0 0x0 0xe0000000 0x08000000
 			  0x2 0x0 0x0 0xffa00000 0x00040000
@@ -158,352 +135,77 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0 0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
 
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		spi@7000 {
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
+				interrupts = <3 1>;
+				reg = <0x0>;
 			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
+			phy2: ethernet-phy@2 {
 				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
+				interrupts = <3 1>;
+				reg = <0x2>;
 			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
 		};
 
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2, 512k
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-				phy2: ethernet-phy@2 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x2>;
-				};
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi2: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
 		};
 
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
 
 		msi@41600 {
 			compatible = "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
 		};
 	};
 
 	pci0: pcie@ffe08000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe08000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <24 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -528,18 +230,8 @@
 	};
 
 	pci1: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
 		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
 		interrupt-map = <
 
@@ -667,18 +359,8 @@
 	};
 
 	pci2: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index e2d48fd4416..3782a58f13b 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR and NAND Flashes */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,90 +141,16 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
+	spi@7000 {
 
-		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
-
-			fsl_m25p80@0 {
+		fsl_m25p80@0 {
 				#address-cells = <1>;
 				#size-cells = <1>;
 				compatible = "fsl,espi-flash";
@@ -294,254 +196,68 @@
 			};
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
-			};
-		};
-
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+				};
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
+				};
+		};
+
+		mdio@25520 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
 		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
+		mdio@26520 {
+			status = "disabled";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
+	};
 
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
+	pci0: pcie@ffe08000 {
+		status = "disabled";
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
-		pcie@0 {
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+			pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
 			#address-cells = <3>;
@@ -556,19 +272,17 @@
 		};
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
index b69c3a5dc85..fc8ddddfccb 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -14,12 +14,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet1 = &enet1;
@@ -29,91 +28,33 @@
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@1 {
+		status = "disabled";
 		};
+
 	};
 
 	memory {
 		device_type = "memory";
 	};
 
-	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
+	localbus@ffe05000 {
+		status = "disabled";
+	};
 
+	soc@ffe00000 {
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
+		serial1: serial@4600 {
+			status = "disabled";
 		};
 
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -161,76 +102,15 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
+		dma@c300 {
+			status = "disabled";
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
 		mdio@24520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-mdio";
-			reg = <0x24520 0x20>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -245,29 +125,21 @@
 		};
 
 		mdio@25520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-tbi";
-			reg = <0x26520 0x20>;
-
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
 				device_type = "tbi-phy";
 			};
 		};
 
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@24000 {
+			status = "disabled";
+		};
+
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
@@ -275,49 +147,12 @@
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
 
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			42 76 77 78 79 /* serial1 , dma2 */
 			29 30 34 26 /* enet0, pci1 */
@@ -326,26 +161,28 @@
 			>;
 		};
 
-		global-utilities@e0000 {
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
+		msi@41600 {
+			status = "disabled";
 		};
+
+
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -360,4 +197,8 @@
 				  0x0 0x100000>;
 		};
 	};
+
+	pci2: pcie@ffe0a000 {
+		status = "disabled";
+	};
 };
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
index 7a31d46c01b..261c34ba45e 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -15,27 +15,21 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
-		serial0 = &serial0;
+		serial0 = &serial1;
 		pci1 = &pci1;
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@0 {
+		status = "disabled";
 		};
 	};
 
@@ -43,20 +37,37 @@
 		device_type = "memory";
 	};
 
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		serial0: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
 		};
 
 		dma@c300 {
@@ -96,6 +107,10 @@
 			};
 		};
 
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
 		L2: l2-cache-controller@20000 {
 			compatible = "fsl,p2020-l2-cache-controller";
 			reg = <0x20000 0x1000>;
@@ -104,31 +119,49 @@
 			interrupt-parent = <&mpic>;
 		};
 
+		dma@21300 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		mdio@24520 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
 		};
 
+		enet1: ethernet@25000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@26000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		crypto@30000 {
+			status = "disabled";
+		};
+
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */
 			16 20 21 22 23 28 	/* L2, dma1, USB */
@@ -152,21 +185,32 @@
 				0xe7 0>;
 			interrupt-parent = <&mpic>;
 		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi
new file mode 100644
index 00000000000..6def17f265d
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020si.dtsi
@@ -0,0 +1,382 @@
+/*
+ * P2020 Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P2020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p2020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p2020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2,512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-mdio";
+			reg = <0x24520 0x20>;
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x520 0x20>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe08000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+	};
+
+	pci1: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+	};
+
+	pci2: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 5b7fc29dd6c..927f94d16e9 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -1,7 +1,7 @@
 /*
  * P4080DS Device Tree Source
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute	it and/or modify it
  * under  the terms of	the GNU General	 Public License as published by the
@@ -33,6 +33,17 @@
 		dma1 = &dma1;
 		sdhc = &sdhc;
 
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
 		rio0 = &rapidio0;
 	};
 
@@ -410,6 +421,79 @@
 			dr_mode = "host";
 			phy_type = "ulpi";
 		};
+
+		crypto: crypto@300000 {
+			compatible = "fsl,sec-v4.0";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x300000 0x10000>;
+			ranges = <0 0x300000 0x10000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <92 2>;
+
+			sec_jr0: jr@1000 {
+				compatible = "fsl,sec-v4.0-job-ring";
+				reg = <0x1000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <88 2>;
+			};
+
+			sec_jr1: jr@2000 {
+				compatible = "fsl,sec-v4.0-job-ring";
+				reg = <0x2000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <89 2>;
+			};
+
+			sec_jr2: jr@3000 {
+				compatible = "fsl,sec-v4.0-job-ring";
+				reg = <0x3000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <90 2>;
+			};
+
+			sec_jr3: jr@4000 {
+				compatible = "fsl,sec-v4.0-job-ring";
+				reg = <0x4000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <91 2>;
+			};
+
+			rtic@6000 {
+				compatible = "fsl,sec-v4.0-rtic";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0x6000 0x100>;
+				ranges = <0x0 0x6100 0xe00>;
+
+				rtic_a: rtic-a@0 {
+					compatible = "fsl,sec-v4.0-rtic-memory";
+					reg = <0x00 0x20 0x100 0x80>;
+				};
+
+				rtic_b: rtic-b@20 {
+					compatible = "fsl,sec-v4.0-rtic-memory";
+					reg = <0x20 0x20 0x200 0x80>;
+				};
+
+				rtic_c: rtic-c@40 {
+					compatible = "fsl,sec-v4.0-rtic-memory";
+					reg = <0x40 0x20 0x300 0x80>;
+				};
+
+				rtic_d: rtic-d@60 {
+					compatible = "fsl,sec-v4.0-rtic-memory";
+					reg = <0x60 0x20 0x500 0x80>;
+				};
+			};
+		};
+
+		sec_mon: sec_mon@314000 {
+			compatible = "fsl,sec-v4.0-mon";
+			reg = <0x314000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <93 2>;
+		};
 	};
 
 	rapidio0: rapidio@ffe0c0000 {
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
new file mode 100644
index 00000000000..06c1961bd12
--- /dev/null
+++ b/arch/powerpc/boot/epapr.c
@@ -0,0 +1,66 @@
+/*
+ * Bootwrapper for ePAPR compliant firmwares
+ *
+ * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation.
+ *
+ * Based on earlier bootwrappers by:
+ * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
+ *   and
+ * Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+
+BSS_STACK(4096);
+
+#define EPAPR_SMAGIC	0x65504150
+#define EPAPR_EMAGIC	0x45504150
+
+static unsigned epapr_magic;
+static unsigned long ima_size;
+static unsigned long fdt_addr;
+
+static void platform_fixups(void)
+{
+	if ((epapr_magic != EPAPR_EMAGIC)
+	    && (epapr_magic != EPAPR_SMAGIC))
+		fatal("r6 contained 0x%08x instead of ePAPR magic number\n",
+		      epapr_magic);
+
+	if (ima_size < (unsigned long)_end)
+		printf("WARNING: Image loaded outside IMA!"
+		       " (_end=%p, ima_size=0x%lx)\n", _end, ima_size);
+	if (ima_size < fdt_addr)
+		printf("WARNING: Device tree address is outside IMA!"
+		       "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr,
+		       ima_size);
+	if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr))
+		printf("WARNING: Device tree extends outside IMA!"
+		       " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n",
+		       fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	epapr_magic = r6;
+	ima_size = r7;
+	fdt_addr = r3;
+
+	/* FIXME: we should process reserve entries */
+
+	simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64);
+
+	fdt_init((void *)fdt_addr);
+
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cb97e7511d7..c74531af72c 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -39,6 +39,7 @@ dts=
 cacheit=
 binary=
 gzip=.gz
+pie=
 
 # cross-compilation prefix
 CROSS=
@@ -157,9 +158,10 @@ pmac|chrp)
     platformo=$object/of.o
     ;;
 coff)
-    platformo=$object/of.o
+    platformo="$object/crt0.o $object/of.o"
     lds=$object/zImage.coff.lds
     link_address='0x500000'
+    pie=
     ;;
 miboot|uboot)
     # miboot and U-boot want just the bare bits, not an ELF binary
@@ -208,6 +210,7 @@ ps3)
     ksection=.kernel:vmlinux.bin
     isection=.kernel:initrd
     link_address=''
+    pie=
     ;;
 ep88xc|ep405|ep8248e)
     platformo="$object/fixed-head.o $object/$platform.o"
@@ -244,6 +247,10 @@ gamecube|wii)
 treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
+epapr)
+    link_address='0x20000000'
+    pie=-pie
+    ;;
 esac
 
 vmz="$tmpdir/`basename \"$kernel\"`.$ext"
@@ -251,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
     ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
 
     if [ -n "$gzip" ]; then
-        gzip -f -9 "$vmz.$$"
+        gzip -n -f -9 "$vmz.$$"
     fi
 
     if [ -n "$cacheit" ]; then
@@ -310,9 +317,9 @@ fi
 
 if [ "$platform" != "miboot" ]; then
     if [ -n "$link_address" ] ; then
-        text_start="-Ttext $link_address --defsym _start=$link_address"
+        text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \
+    ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
@@ -336,7 +343,7 @@ coff)
     $objbin/hack-coff "$ofile"
     ;;
 cuboot*)
-    gzip -f -9 "$ofile"
+    gzip -n -f -9 "$ofile"
     ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \
             $uboot_version -d "$ofile".gz "$ofile"
     ;;
@@ -383,6 +390,6 @@ ps3)
 
     odir="$(dirname "$ofile.bin")"
     rm -f "$odir/otheros.bld"
-    gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
     ;;
 esac
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
index 856dc78b14e..de4c9e3c934 100644
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd)
 EXTERN(_zimage_start_opd)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
@@ -17,9 +17,7 @@ SECTIONS
     *(.data*)
     *(__builtin_*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
 
     _dtb_start = .;
     *(.kernel:dtb)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 0962d62bdb5..2bd8731f136 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -3,49 +3,64 @@ ENTRY(_zimage_start)
 EXTERN(_zimage_start)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
     *(.rodata*)
     *(.data*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
   }
+  .dynsym : { *(.dynsym) }
+  .dynstr : { *(.dynstr) }
+  .dynamic :
+  {
+    __dynamic_start = .;
+    *(.dynamic)
+  }
+  .hash : { *(.hash) }
+  .interp : { *(.interp) }
+  .rela.dyn : { *(.rela*) }
 
   . = ALIGN(8);
-  _dtb_start = .;
-  .kernel:dtb : { *(.kernel:dtb) }
-  _dtb_end = .;
-
-  . = ALIGN(4096);
-  _vmlinux_start =  .;
-  .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) }
-  _vmlinux_end =  .;
+  .kernel:dtb :
+  {
+    _dtb_start = .;
+    *(.kernel:dtb)
+    _dtb_end = .;
+  }
 
   . = ALIGN(4096);
-  _initrd_start =  .;
-  .kernel:initrd : { *(.kernel:initrd) }
-  _initrd_end =  .;
+  .kernel:vmlinux.strip :
+  {
+    _vmlinux_start =  .;
+    *(.kernel:vmlinux.strip)
+    _vmlinux_end =  .;
+  }
 
   . = ALIGN(4096);
-  _edata  =  .;
+  .kernel:initrd :
+  {
+    _initrd_start =  .;
+    *(.kernel:initrd)
+    _initrd_end =  .;
+  }
 
   . = ALIGN(4096);
-  __bss_start = .;
   .bss       :
   {
-   *(.sbss)
-   *(.bss)
+    _edata  =  .;
+    __bss_start = .;
+    *(.sbss)
+    *(.bss)
+    *(COMMON)
+    _end = . ;
   }
-  . = ALIGN(4096);
-  _end = . ;
 }
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index c683bce4c26..126ef1b08a0 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index a721cd3d793..abcf00ad939 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 55e0725500d..11662c217ac 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index d724095530a..ebe9b30b072 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 4b44beaa21a..eb25229b387 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
index b614508d6fd..f51c7ebc181 100644
--- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
+++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
@@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index f9e6a3ea5a6..2a84fd7f631 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_IND=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
index 9fa1613e5e2..d32283555b5 100644
--- a/arch/powerpc/configs/e55xx_smp_defconfig
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SPARSE_IRQ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
@@ -25,8 +25,32 @@ CONFIG_P5020_DS=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
 # CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_ARPD=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
@@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
 CONFIG_MISC_DEVICES=y
 CONFIG_EEPROM_LEGACY=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -64,22 +91,14 @@ CONFIG_NLS=y
 CONFIG_NLS_UTF8=m
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC_ITU_T=m
-CONFIG_LIBCRC32C=m
 CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_VIRQ_DEBUG=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index c06a86c3309..96b89df7752 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 942ced90557..de65841aa04 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig
index 038a308cbfc..a1cc8179e9f 100644
--- a/arch/powerpc/configs/mpc86xx_defconfig
+++ b/arch/powerpc/configs/mpc86xx_defconfig
@@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index ac4fc41035f..f8b394a76ac 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y
 CONFIG_IRDA_FAST_RR=y
 CONFIG_IRTTY_SIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 0a10fb009ef..214208924a9 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m
 CONFIG_VIA_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index caba919f65d..6472322bf13 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y
 # CONFIG_INET_DIAG is not set
 CONFIG_IPV6=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 249ddd0a27c..7de13865508 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
 CONFIG_SCSI_IBMVSCSI=y
 CONFIG_SCSI_IBMVFC=m
 CONFIG_SCSI_SYM53C8XX_2=y
 CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
 CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
 CONFIG_ATA=y
 # CONFIG_ATA_SFF is not set
@@ -197,6 +203,8 @@ CONFIG_S2IO=m
 CONFIG_MYRI10GE=m
 CONFIG_NETXEN_NIC=m
 CONFIG_MLX4_EN=m
+CONFIG_QLGE=m
+CONFIG_BE2NET=m
 CONFIG_PPP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
index 6b6dc20b0be..bdf0563ba42 100644
--- a/arch/powerpc/include/asm/8xx_immap.h
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -393,8 +393,8 @@ typedef struct fec {
 	uint	fec_addr_low;		/* lower 32 bits of station address	*/
 	ushort	fec_addr_high;		/* upper 16 bits of station address	*/
 	ushort	res1;			/* reserved				*/
-	uint	fec_hash_table_high;	/* upper 32-bits of hash table		*/
-	uint	fec_hash_table_low;	/* lower 32-bits of hash table		*/
+	uint	fec_grp_hash_table_high;	/* upper 32-bits of hash table		*/
+	uint	fec_grp_hash_table_low;	/* lower 32-bits of hash table		*/
 	uint	fec_r_des_start;	/* beginning of Rx descriptor ring	*/
 	uint	fec_x_des_start;	/* beginning of Tx descriptor ring	*/
 	uint	fec_r_buff_size;	/* Rx buffer size			*/
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 1833d1a07e7..c0d842cfd01 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -157,6 +157,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_476_DD2			ASM_CONST(0x0000000000010000)
 #define CPU_FTR_NEED_COHERENT		ASM_CONST(0x0000000000020000)
 #define CPU_FTR_NO_BTIC			ASM_CONST(0x0000000000040000)
+#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x0000000000080000)
 #define CPU_FTR_NODSISRALIGN		ASM_CONST(0x0000000000100000)
 #define CPU_FTR_PPC_LE			ASM_CONST(0x0000000000200000)
 #define CPU_FTR_REAL_LE			ASM_CONST(0x0000000000400000)
@@ -178,22 +179,18 @@ extern const char *powerpc_base_platform;
 #define LONG_ASM_CONST(x)		0
 #endif
 
-#define CPU_FTR_SLB			LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_16M_PAGE		LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_TLBIEL			LONG_ASM_CONST(0x0000000400000000)
+
+#define CPU_FTR_HVMODE_206		LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000001000000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
 #define CPU_FTR_SMT			LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_LOCKLESS_TLBIE		LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_CI_LARGE_PAGE		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000200000000000)
 #define CPU_FTR_PURR			LONG_ASM_CONST(0x0000400000000000)
 #define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000800000000000)
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0001000000000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_1T_SEGMENT		LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_NO_SLBIE_B		LONG_ASM_CONST(0x0008000000000000)
 #define CPU_FTR_VSX			LONG_ASM_CONST(0x0010000000000000)
 #define CPU_FTR_SAO			LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
@@ -202,12 +199,14 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
 #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
+#define CPU_FTR_ICSWX			LONG_ASM_CONST(0x1000000000000000)
 
 #ifndef __ASSEMBLY__
 
-#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_SLB | \
-				 CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
-				 CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE)
+#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
+#define MMU_FTR_PPCAS_ARCH_V2 	(MMU_FTR_SLB | MMU_FTR_TLBIEL | \
+				 MMU_FTR_16M_PAGE)
 
 /* We only set the altivec features if the kernel was compiled with altivec
  * support
@@ -387,7 +386,8 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_DBELL)
 #define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
-	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DEBUG_LVL_EXC)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
@@ -407,44 +407,45 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
-	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
-	    CPU_FTR_POPCNTB)
+	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
 #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_ICSWX | CPU_FTR_CFAR)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
+	    CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
 	    CPU_FTR_UNALIGNED_LD_STD)
 #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | \
-	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B)
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
+#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
+		     CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500)
+#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500 | CPU_FTRS_A2)
 #else
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
 	    CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |		\
-	    CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+	    CPU_FTR_VSX)
 #endif
 #else
 enum {
@@ -487,7 +488,7 @@ enum {
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500)
+#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500 & CPU_FTRS_A2)
 #else
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index f71bb4c118b..ce516e5eb0d 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask;
  * This can typically be used for things like IPI for tlb invalidations
  * since those need to be done only once per core/TLB
  */
-static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads)
+static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 {
 	cpumask_t	tmp, res;
 	int		i;
 
-	res = CPU_MASK_NONE;
+	cpumask_clear(&res);
 	for (i = 0; i < NR_CPUS; i += threads_per_core) {
-		cpus_shift_left(tmp, threads_core_mask, i);
-		if (cpus_intersects(threads, tmp))
-			cpu_set(i, res);
+		cpumask_shift_left(&tmp, &threads_core_mask, i);
+		if (cpumask_intersects(threads, &tmp))
+			cpumask_set_cpu(i, &res);
 	}
 	return res;
 }
@@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void)
 
 static inline cpumask_t cpu_online_cores_map(void)
 {
-	return cpu_thread_mask_to_cores(cpu_online_map);
+	return cpu_thread_mask_to_cores(cpu_online_mask);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 0893ab9343a..9c70d0ca96d 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -27,9 +27,8 @@ enum ppc_dbell {
 	PPC_G_DBELL_MC = 4,	/* guest mcheck doorbell */
 };
 
-extern void doorbell_message_pass(int target, int msg);
+extern void doorbell_cause_ipi(int cpu, unsigned long data);
 extern void doorbell_exception(struct pt_regs *regs);
-extern void doorbell_check_self(void);
 extern void doorbell_setup_this_cpu(void);
 
 static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f0fb4fc1f6e..45921672b97 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -52,6 +52,10 @@ extern struct ppc_emulated {
 #ifdef CONFIG_VSX
 	struct ppc_emulated_entry vsx;
 #endif
+#ifdef CONFIG_PPC64
+	struct ppc_emulated_entry mfdscr;
+	struct ppc_emulated_entry mtdscr;
+#endif
 } ppc_emulated;
 
 extern u32 ppc_warn_emulated;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7778d6f0c87..f5dfe3411f6 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -46,6 +46,7 @@
 #define EX_CCR		60
 #define EX_R3		64
 #define EX_LR		72
+#define EX_CFAR		80
 
 /*
  * We're short on space and time in the exception prolog, so we can't
@@ -56,30 +57,40 @@
 #define LOAD_HANDLER(reg, label)					\
 	addi	reg,reg,(label)-_stext;	/* virt addr of handler ... */
 
-#define EXCEPTION_PROLOG_1(area)				\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+/* Exception register prefixes */
+#define EXC_HV	H
+#define EXC_STD
+
+#define EXCEPTION_PROLOG_1(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
 	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG_SCRATCH0;					\
+	BEGIN_FTR_SECTION_NESTED(66);					\
+	mfspr	r10,SPRN_CFAR;						\
+	std	r10,area+EX_CFAR(r13);					\
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		\
+	GET_SCRATCH0(r9);						\
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
 
-#define EXCEPTION_PROLOG_PSERIES_1(label)				\
+#define __EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label)						\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
+	__EXCEPTION_PROLOG_PSERIES_1(label, h)
 
-#define EXCEPTION_PROLOG_PSERIES(area, label)				\
+#define EXCEPTION_PROLOG_PSERIES(area, label, h)			\
 	EXCEPTION_PROLOG_1(area);					\
-	EXCEPTION_PROLOG_PSERIES_1(label);
+	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
 /*
  * The common exception prolog is used for all except a few exceptions
@@ -98,10 +109,11 @@
 	beq-	1f;							   \
 	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
 1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
-	bge-	cr1,2f;			/* abort if it is		*/ \
-	b	3f;							   \
-2:	li	r1,(n);			/* will be reloaded later	*/ \
+	blt+	cr1,3f;			/* abort if it is		*/ \
+	li	r1,(n);			/* will be reloaded later	*/ \
 	sth	r1,PACA_TRAP_SAVE(r13);					   \
+	std	r3,area+EX_R3(r13);					   \
+	addi	r3,r13,area;		/* r3 -> where regs are saved*/	   \
 	b	bad_stack;						   \
 3:	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
 	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
@@ -123,6 +135,10 @@
 	std	r9,GPR11(r1);						   \
 	std	r10,GPR12(r1);						   \
 	std	r11,GPR13(r1);						   \
+	BEGIN_FTR_SECTION_NESTED(66);					   \
+	ld	r10,area+EX_CFAR(r13);					   \
+	std	r10,ORIG_GPR3(r1);					   \
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
 	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
 	mflr	r9;			/* save LR in stackframe	*/ \
 	std	r9,_LINK(r1);						   \
@@ -143,57 +159,62 @@
 /*
  * Exception vectors.
  */
-#define STD_EXCEPTION_PSERIES(n, label)			\
-	. = n;						\
+#define STD_EXCEPTION_PSERIES(loc, vec, label)		\
+	. = loc;					\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	HMT_MEDIUM;					\
-	DO_KVM	n;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	DO_KVM	vec;					\
+	SET_SCRATCH0(r13);		/* save r13 */		\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
 
-#define HSTD_EXCEPTION_PSERIES(n, label)		\
-	. = n;						\
-	.globl label##_pSeries;				\
-label##_pSeries:					\
+#define STD_EXCEPTION_HV(loc, vec, label)		\
+	. = loc;					\
+	.globl label##_hv;				\
+label##_hv:						\
 	HMT_MEDIUM;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r20;	/* save r20 */	\
-	mfspr	r20,SPRN_HSRR0;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR0,r20;				\
-	mfspr	r20,SPRN_HSRR1;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR1,r20;				\
-	mfspr	r20,SPRN_SPRG_SCRATCH0;	/* restore r20 */ \
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	DO_KVM	vec;					\
+	SET_SCRATCH0(r13);	/* save r13 */		\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
 
-
-#define MASKABLE_EXCEPTION_PSERIES(n, label)				\
-	. = n;								\
-	.globl label##_pSeries;						\
-label##_pSeries:							\
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
 	HMT_MEDIUM;							\
-	DO_KVM	n;							\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+	DO_KVM	vec;							\
+	SET_SCRATCH0(r13);    /* save r13 */				\
+	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	lbz	r10,PACASOFTIRQEN(r13);					\
 	mfcr	r9;							\
 	cmpwi	r10,0;							\
-	beq	masked_interrupt;					\
-	mfspr	r10,SPRN_SPRG_SCRATCH0;					\
+	beq	masked_##h##interrupt;					\
+	GET_SCRATCH0(r10);						\
 	std	r10,PACA_EXGEN+EX_R13(r13);				\
 	std	r11,PACA_EXGEN+EX_R11(r13);				\
 	std	r12,PACA_EXGEN+EX_R12(r13);				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label##_common)				\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
+	__MASKABLE_EXCEPTION_PSERIES(vec, label, h)
+
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label)			\
+	. = loc;							\
+	.globl label##_pSeries;						\
+label##_pSeries:							\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD)
+
+#define MASKABLE_EXCEPTION_HV(loc, vec, label)				\
+	. = loc;							\
+	.globl label##_hv;						\
+label##_hv:								\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV)
 
 #ifdef CONFIG_PPC_ISERIES
 #define DISABLE_INTS				\
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 921a8470e18..9a67a38bf7b 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -49,7 +49,7 @@ label##5:							\
 	FTR_ENTRY_OFFSET label##2b-label##5b;			\
 	FTR_ENTRY_OFFSET label##3b-label##5b;			\
 	FTR_ENTRY_OFFSET label##4b-label##5b;			\
-	.ifgt (label##4b-label##3b)-(label##2b-label##1b);	\
+	.ifgt (label##4b- label##3b)-(label##2b- label##1b);	\
 	.error "Feature section else case larger than body";	\
 	.endif;							\
 	.popsection;
@@ -146,6 +146,19 @@ label##5:							\
 
 #ifndef __ASSEMBLY__
 
+#define ASM_FTR_IF(section_if, section_else, msk, val)	\
+	stringify_in_c(BEGIN_FTR_SECTION)			\
+	section_if "; "						\
+	stringify_in_c(FTR_SECTION_ELSE)			\
+	section_else "; "					\
+	stringify_in_c(ALT_FTR_SECTION_END((msk), (val)))
+
+#define ASM_FTR_IFSET(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_FTR_IFCLR(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), 0)
+
 #define ASM_MMU_FTR_IF(section_if, section_else, msk, val)	\
 	stringify_in_c(BEGIN_MMU_FTR_SECTION)			\
 	section_if "; "						\
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 4ef662e4a31..3a6c586c4e4 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -47,6 +47,7 @@
 #define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
 #define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
+#define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
 
 #ifndef __ASSEMBLY__
 
@@ -60,7 +61,7 @@ enum {
 		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
 		FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
 		FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
-		FW_FEATURE_CMO | FW_FEATURE_VPHN,
+		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
 	FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8edec710cc6..852b8c1c09d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -102,6 +102,7 @@
 #define H_ANDCOND		(1UL<<(63-33))
 #define H_ICACHE_INVALIDATE	(1UL<<(63-40))	/* icbi, etc.  (ignored for IO pages) */
 #define H_ICACHE_SYNCHRONIZE	(1UL<<(63-41))	/* dcbst, icbi, etc (ignored for IO pages */
+#define H_COALESCE_CAND	(1UL<<(63-42))	/* page is a good candidate for coalescing */
 #define H_ZERO_PAGE		(1UL<<(63-48))	/* zero the page before mapping (ignored for IO pages) */
 #define H_COPY_PAGE		(1UL<<(63-49))
 #define H_N			(1UL<<(63-61))
@@ -234,6 +235,7 @@
 #define H_GET_MPP		0x2D4
 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC
 #define H_BEST_ENERGY		0x2F4
+#define H_GET_MPP_X		0x314
 #define MAX_HCALL_OPCODE	H_BEST_ENERGY
 
 #ifndef __ASSEMBLY__
@@ -312,6 +314,16 @@ struct hvcall_mpp_data {
 
 int h_get_mpp(struct hvcall_mpp_data *);
 
+struct hvcall_mpp_x_data {
+	unsigned long coalesced_bytes;
+	unsigned long pool_coalesced_bytes;
+	unsigned long pool_purr_cycles;
+	unsigned long pool_spurr_cycles;
+	unsigned long reserved[3];
+};
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+
 #ifdef CONFIG_PPC_PSERIES
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 6efc7782ebf..fbae4928692 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -31,7 +31,6 @@ struct iowa_bus {
 	void   *private;
 };
 
-void __devinit io_workaround_init(void);
 void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
 				 int (*)(struct iowa_bus *, void *), void *);
 struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 001f2f11c19..45698d55cd6 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -2,6 +2,8 @@
 #define _ASM_POWERPC_IO_H
 #ifdef __KERNEL__
 
+#define ARCH_HAS_IOREMAP_WC
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -481,10 +483,16 @@ __do_out_asm(_rec_outl, "stwbrx")
 				_memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
 #endif /* !CONFIG_EEH */
 
-#ifdef CONFIG_PPC_INDIRECT_IO
-#define DEF_PCI_HOOK(x)		x
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK_pio(x)	x
+#else
+#define DEF_PCI_HOOK_pio(x)	NULL
+#endif
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define DEF_PCI_HOOK_mem(x)	x
 #else
-#define DEF_PCI_HOOK(x)		NULL
+#define DEF_PCI_HOOK_mem(x)	NULL
 #endif
 
 /* Structure containing all the hooks */
@@ -504,7 +512,7 @@ extern struct ppc_pci_io {
 #define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
 static inline ret name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)	\
 		return ppc_pci_io.name al;			\
 	return __do_##name al;					\
 }
@@ -512,7 +520,7 @@ static inline ret name at					\
 #define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
 static inline void name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)		\
 		ppc_pci_io.name al;				\
 	else							\
 		__do_##name al;					\
@@ -616,12 +624,13 @@ static inline void iosync(void)
  * * ioremap is the standard one and provides non-cacheable guarded mappings
  *   and can be hooked by the platform via ppc_md
  *
- * * ioremap_flags allows to specify the page flags as an argument and can
- *   also be hooked by the platform via ppc_md. ioremap_prot is the exact
- *   same thing as ioremap_flags.
+ * * ioremap_prot allows to specify the page flags as an argument and can
+ *   also be hooked by the platform via ppc_md.
  *
  * * ioremap_nocache is identical to ioremap
  *
+ * * ioremap_wc enables write combining
+ *
  * * iounmap undoes such a mapping and can be hooked
  *
  * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -629,7 +638,7 @@ static inline void iosync(void)
  *   currently be hooked. Must be page aligned.
  *
  * * __ioremap is the low level implementation used by ioremap and
- *   ioremap_flags and cannot be hooked (but can be used by a hook on one
+ *   ioremap_prot and cannot be hooked (but can be used by a hook on one
  *   of the previous ones)
  *
  * * __ioremap_caller is the same as above but takes an explicit caller
@@ -640,10 +649,10 @@ static inline void iosync(void)
  *
  */
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
-				   unsigned long flags);
+extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
+				  unsigned long flags);
+extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
-#define ioremap_prot(addr, size, prot)	ioremap_flags((addr), (size), (prot))
 
 extern void iounmap(volatile void __iomem *addr);
 
diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
new file mode 100644
index 00000000000..b1a9a1be3c2
--- /dev/null
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
+#define _ASM_POWERPC_IO_EVENT_IRQ_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+
+#define PSERIES_IOEI_RPC_MAX_LEN 216
+
+#define PSERIES_IOEI_TYPE_ERR_DETECTED		0x01
+#define PSERIES_IOEI_TYPE_ERR_RECOVERED		0x02
+#define PSERIES_IOEI_TYPE_EVENT			0x03
+#define PSERIES_IOEI_TYPE_RPC_PASS_THRU		0x04
+
+#define PSERIES_IOEI_SUBTYPE_NOT_APP		0x00
+#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ	0x01
+#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE	0x03
+#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE	0x04
+#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE	0x05
+#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE	0x06
+#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED	0x07
+
+#define PSERIES_IOEI_SCOPE_NOT_APP		0x00
+#define PSERIES_IOEI_SCOPE_RIO_HUB		0x36
+#define PSERIES_IOEI_SCOPE_RIO_BRIDGE		0x37
+#define PSERIES_IOEI_SCOPE_PHB			0x38
+#define PSERIES_IOEI_SCOPE_EADS_GLOBAL		0x39
+#define PSERIES_IOEI_SCOPE_EADS_SLOT		0x3A
+#define PSERIES_IOEI_SCOPE_TORRENT_HUB		0x3B
+#define PSERIES_IOEI_SCOPE_SERVICE_PROC		0x51
+
+/* Platform Event Log Format, Version 6, data portition of IO event section */
+struct pseries_io_event {
+	uint8_t event_type;		/* 0x00 IO-Event Type		*/
+	uint8_t rpc_data_len;		/* 0x01 RPC data length		*/
+	uint8_t scope;			/* 0x02 Error/Event Scope	*/
+	uint8_t event_subtype;		/* 0x03 I/O-Event Sub-Type	*/
+	uint32_t drc_index;		/* 0x04 DRC Index		*/
+	uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN];
+					/* 0x08 RPC Data (0-216 bytes,	*/
+					/* padded to 4 bytes alignment)	*/
+};
+
+extern struct atomic_notifier_head pseries_ioei_notifier_list;
+
+#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 67ab5fb7d15..1bff591f7f7 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -88,9 +88,6 @@ struct irq_host_ops {
 	/* Dispose of such a mapping */
 	void (*unmap)(struct irq_host *h, unsigned int virq);
 
-	/* Update of such a mapping  */
-	void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
 	/* Translate device-tree interrupt specifier from raw format coming
 	 * from the firmware to a irq_hw_number_t (interrupt line number) and
 	 * type (sense) that can be passed to set_irq_type(). In the absence
@@ -128,19 +125,10 @@ struct irq_host {
 	struct device_node	*of_node;
 };
 
-/* The main irq map itself is an array of NR_IRQ entries containing the
- * associate host and irq number. An entry with a host of NULL is free.
- * An entry can be allocated if it's free, the allocator always then sets
- * hwirq first to the host's invalid irq number and then fills ops.
- */
-struct irq_map_entry {
-	irq_hw_number_t	hwirq;
-	struct irq_host	*host;
-};
-
-extern struct irq_map_entry irq_map[NR_IRQS];
-
+struct irq_data;
+extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
+extern bool virq_is_host(unsigned int virq, struct irq_host *host);
 
 /**
  * irq_alloc_host - Allocate a new irq_host data structure
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f54408d995b..8a33698c61b 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
 extern cpumask_t cpus_in_sr;
 static inline int kexec_sr_activated(int cpu)
 {
-	return cpu_isset(cpu,cpus_in_sr);
+	return cpumask_test_cpu(cpu, &cpus_in_sr);
 }
 
 struct kimage;
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 18ea6963ad7..d2ca5ed3877 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -45,6 +45,114 @@ struct kvm_regs {
 	__u64 gpr[32];
 };
 
+#define KVM_SREGS_E_IMPL_NONE	0
+#define KVM_SREGS_E_IMPL_FSL	1
+
+#define KVM_SREGS_E_FSL_PIDn	(1 << 0) /* PID1/PID2 */
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS.  On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified.  This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE		(1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206		(1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64			(1 << 2)
+
+#define KVM_SREGS_E_SPRG8		(1 << 3)
+#define KVM_SREGS_E_MCIVPR		(1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR		(1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU		(1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG		(1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED			(1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE			(1 << 9)
+
+/* External Proxy (EXP) -- EPR */
+#define KVM_SREGS_EXP			(1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD			(1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC			(1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT			(1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM			(1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS.  To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR		(1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR		(1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value.  It does not trigger any special semantics such
+ * as write-one-to-clear.  Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
 struct kvm_sregs {
 	__u32 pvr;
 	union {
@@ -62,6 +170,82 @@ struct kvm_sregs {
 				__u64 dbat[8]; 
 			} ppc32;
 		} s;
+		struct {
+			union {
+				struct { /* KVM_SREGS_E_IMPL_FSL */
+					__u32 features; /* KVM_SREGS_E_FSL_ */
+					__u32 svr;
+					__u64 mcar;
+					__u32 hid0;
+
+					/* KVM_SREGS_E_FSL_PIDn */
+					__u32 pid1, pid2;
+				} fsl;
+				__u8 pad[256];
+			} impl;
+
+			__u32 features; /* KVM_SREGS_E_ */
+			__u32 impl_id;	/* KVM_SREGS_E_IMPL_ */
+			__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+			__u32 pir;	/* read-only */
+			__u64 sprg8;
+			__u64 sprg9;	/* E.ED */
+			__u64 csrr0;
+			__u64 dsrr0;	/* E.ED */
+			__u64 mcsrr0;
+			__u32 csrr1;
+			__u32 dsrr1;	/* E.ED */
+			__u32 mcsrr1;
+			__u32 esr;
+			__u64 dear;
+			__u64 ivpr;
+			__u64 mcivpr;
+			__u64 mcsr;	/* KVM_SREGS_E_UPDATE_MCSR */
+
+			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
+			__u32 tcr;
+			__u32 decar;
+			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
+
+			/*
+			 * Userspace can read TB directly, but the
+			 * value reported here is consistent with "dec".
+			 *
+			 * Read-only.
+			 */
+			__u64 tb;
+
+			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
+			__u32 dbcr[3];
+			__u32 iac[4];
+			__u32 dac[2];
+			__u32 dvc[2];
+			__u8 num_iac;	/* read-only */
+			__u8 num_dac;	/* read-only */
+			__u8 num_dvc;	/* read-only */
+			__u8 pad;
+
+			__u32 epr;	/* EXP */
+			__u32 vrsave;	/* a.k.a. USPRG0 */
+			__u32 epcr;	/* KVM_SREGS_E_64 */
+
+			__u32 mas0;
+			__u32 mas1;
+			__u64 mas2;
+			__u64 mas7_3;
+			__u32 mas4;
+			__u32 mas6;
+
+			__u32 ivor_low[16]; /* IVOR0-15 */
+			__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+			__u32 mmucfg;	/* read-only */
+			__u32 eptcfg;	/* E.PT, read-only */
+			__u32 tlbcfg[4];/* read-only */
+			__u32 tlbps[4]; /* read-only */
+
+			__u32 eplc, epsc; /* E.PD */
+		} e;
 		__u8 pad[1020];
 	} u;
 };
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index d22d39942a9..a0e57618ff3 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
 }
 
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
 void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
 void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5b750467439..0951b17f4eb 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -59,6 +59,7 @@
 #define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
 #define BOOK3S_INTERRUPT_EXTERNAL	0x500
 #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL	0x501
+#define BOOK3S_INTERRUPT_EXTERNAL_HV	0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT	0x600
 #define BOOK3S_INTERRUPT_PROGRAM	0x700
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 36fdb3aff30..d5a8a386163 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -34,6 +34,7 @@
 	    (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
 	    (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_PROGRAM) || \
 	    (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26fffb2..7a2a565f88c 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 {
 
 	u32 host_pid[E500_PID_NUM];
 	u32 pid[E500_PID_NUM];
+	u32 svr;
 
 	u32 mas0;
 	u32 mas1;
@@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 {
 	u32 hid1;
 	u32 tlb0cfg;
 	u32 tlb1cfg;
+	u64 mcar;
 
 	struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bba3b9b72a3..186f150b9b8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -223,6 +223,7 @@ struct kvm_vcpu_arch {
 	ulong hflags;
 	ulong guest_owned_ext;
 #endif
+	u32 vrsave; /* also USPRG0 */
 	u32 mmucr;
 	ulong sprg4;
 	ulong sprg5;
@@ -232,6 +233,9 @@ struct kvm_vcpu_arch {
 	ulong csrr1;
 	ulong dsrr0;
 	ulong dsrr1;
+	ulong mcsrr0;
+	ulong mcsrr1;
+	ulong mcsr;
 	ulong esr;
 	u32 dec;
 	u32 decar;
@@ -255,6 +259,7 @@ struct kvm_vcpu_arch {
 	u32 dbsr;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
+	struct mutex exit_timing_lock;
 	struct kvmppc_exit_timing timing_exit;
 	struct kvmppc_exit_timing timing_last_enter;
 	u32 last_exit_type;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ecb3bc74c34..9345238edec 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 
 /* Core-specific hooks */
 
@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 	return r;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index a077adc0b35..e0298d26ce5 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -210,6 +210,8 @@ struct dtl_entry {
 #define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
 #define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
 
+extern struct kmem_cache *dtl_cache;
+
 /*
  * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e4f01915fbb..47cacddb14c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,21 +29,6 @@ struct file;
 struct pci_controller;
 struct kimage;
 
-#ifdef CONFIG_SMP
-struct smp_ops_t {
-	void  (*message_pass)(int target, int msg);
-	int   (*probe)(void);
-	void  (*kick_cpu)(int nr);
-	void  (*setup_cpu)(int nr);
-	void  (*bringup_done)(void);
-	void  (*take_timebase)(void);
-	void  (*give_timebase)(void);
-	int   (*cpu_disable)(void);
-	void  (*cpu_die)(unsigned int nr);
-	int   (*cpu_bootable)(unsigned int nr);
-};
-#endif
-
 struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
@@ -267,6 +252,7 @@ struct machdep_calls {
 
 extern void e500_idle(void);
 extern void power4_idle(void);
+extern void power7_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
@@ -311,12 +297,6 @@ extern sys_ctrler_t sys_ctrler;
 
 #endif /* CONFIG_PPC_PMAC */
 
-#ifdef CONFIG_SMP
-/* Poor default implementations */
-extern void __devinit smp_generic_give_timebase(void);
-extern void __devinit smp_generic_take_timebase(void);
-#endif /* CONFIG_SMP */
-
 
 /* Functions to produce codes on the leds.
  * The SRC code should be unique for the message category and should
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 17194fcd404..3ea0f9a259d 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -43,6 +43,7 @@
 #define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
 #define MAS0_ESEL(x)		(((x) << 16) & 0x0FFF0000)
 #define MAS0_NV(x)		((x) & 0x00000FFF)
+#define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_HES		0x00004000
 #define MAS0_WQ_ALLWAYS		0x00000000
 #define MAS0_WQ_COND		0x00001000
@@ -137,6 +138,21 @@
 #define MMUCSR0_TLB2PS	0x00078000	/* TLB2 Page Size */
 #define MMUCSR0_TLB3PS	0x00780000	/* TLB3 Page Size */
 
+/* MMUCFG bits */
+#define MMUCFG_MAVN_NASK	0x00000003
+#define MMUCFG_MAVN_V1_0	0x00000000
+#define MMUCFG_MAVN_V2_0	0x00000001
+#define MMUCFG_NTLB_MASK	0x0000000c
+#define MMUCFG_NTLB_SHIFT	2
+#define MMUCFG_PIDSIZE_MASK	0x000007c0
+#define MMUCFG_PIDSIZE_SHIFT	6
+#define MMUCFG_TWC		0x00008000
+#define MMUCFG_LRAT		0x00010000
+#define MMUCFG_RASIZE_MASK	0x00fe0000
+#define MMUCFG_RASIZE_SHIFT	17
+#define MMUCFG_LPIDSIZE_MASK	0x0f000000
+#define MMUCFG_LPIDSIZE_SHIFT	24
+
 /* TLBnCFG encoding */
 #define TLBnCFG_N_ENTRY		0x00000fff	/* number of entries */
 #define TLBnCFG_HES		0x00002000	/* HW select supported */
@@ -229,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 extern int mmu_linear_psize;
 extern int mmu_vmemmap_psize;
 
+#ifdef CONFIG_PPC64
+extern unsigned long linear_map_top;
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ae7b3efec8e..d865bd909c7 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
 typedef unsigned long mm_context_id_t;
+struct spinlock;
 
 typedef struct {
 	mm_context_id_t id;
@@ -423,6 +424,11 @@ typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
 } mm_context_t;
 
 
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb40a06d3b7..4138b21ae80 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -56,11 +56,6 @@
  */
 #define MMU_FTR_NEED_DTLB_SW_LRU	ASM_CONST(0x00200000)
 
-/* This indicates that the processor uses the ISA 2.06 server tlbie
- * mnemonics
- */
-#define MMU_FTR_TLBIE_206		ASM_CONST(0x00400000)
-
 /* Enable use of TLB reservation.  Processor should support tlbsrx.
  * instruction and MAS0[WQ].
  */
@@ -70,6 +65,53 @@
  */
 #define MMU_FTR_USE_PAIRED_MAS		ASM_CONST(0x01000000)
 
+/* MMU is SLB-based
+ */
+#define MMU_FTR_SLB			ASM_CONST(0x02000000)
+
+/* Support 16M large pages
+ */
+#define MMU_FTR_16M_PAGE		ASM_CONST(0x04000000)
+
+/* Supports TLBIEL variant
+ */
+#define MMU_FTR_TLBIEL			ASM_CONST(0x08000000)
+
+/* Supports tlbies w/o locking
+ */
+#define MMU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x10000000)
+
+/* Large pages can be marked CI
+ */
+#define MMU_FTR_CI_LARGE_PAGE		ASM_CONST(0x20000000)
+
+/* 1T segments available
+ */
+#define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
+
+/* Doesn't support the B bit (1T segment) in SLBIE
+ */
+#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x80000000)
+
+/* MMU feature bit sets for various CPUs */
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
+	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
+#define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970		MMU_FTRS_POWER4
+#define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE
+#define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
+#define MMU_FTRS_A2		MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \
+				MMU_FTR_USE_TLBIVAX_BCAST | \
+				MMU_FTR_LOCK_BCAST_INVAL | \
+				MMU_FTR_USE_TLBRSRV | \
+				MMU_FTR_USE_PAIRED_MAS | \
+				MMU_FTR_TLBIEL | \
+				MMU_FTR_16M_PAGE
 #ifndef __ASSEMBLY__
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 81fb41289d6..a73668a5f30 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
 #endif
 
+extern void switch_cop(struct mm_struct *next);
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
 /*
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
@@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	if (prev == next)
 		return;
 
+#ifdef CONFIG_PPC_ICSWX
+	/* Switch coprocessor context only if prev or next uses a coprocessor */
+	if (prev->context.acop || next->context.acop)
+		switch_cop(next);
+#endif /* CONFIG_PPC_ICSWX */
+
 	/* We must stop all altivec streams before changing the HW
 	 * context
 	 */
@@ -67,7 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures.
 	 */
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		switch_slb(tsk, next);
 	else
 		switch_stab(tsk, next);
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 7005ee0b074..df18989e78d 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -3,7 +3,6 @@
 #ifdef __KERNEL__
 
 #include <linux/irq.h>
-#include <linux/sysdev.h>
 #include <asm/dcr.h>
 #include <asm/msi_bitmap.h>
 
@@ -263,6 +262,7 @@ struct mpic
 #ifdef CONFIG_SMP
 	struct irq_chip		hc_ipi;
 #endif
+	struct irq_chip		hc_tm;
 	const char		*name;
 	/* Flags */
 	unsigned int		flags;
@@ -281,7 +281,7 @@ struct mpic
 
 	/* vector numbers used for internal sources (ipi/timers) */
 	unsigned int		ipi_vecs[4];
-	unsigned int		timer_vecs[4];
+	unsigned int		timer_vecs[8];
 
 	/* Spurious vector to program into unused sources */
 	unsigned int		spurious_vec;
@@ -320,8 +320,6 @@ struct mpic
 	/* link */
 	struct mpic		*next;
 
-	struct sys_device	sysdev;
-
 #ifdef CONFIG_PM
 	struct mpic_irq_save	*save_data;
 #endif
@@ -371,6 +369,8 @@ struct mpic
  * NOTE: This flag trumps MPIC_WANTS_RESET.
  */
 #define MPIC_NO_RESET			0x00004000
+/* Freescale MPIC (compatible includes "fsl,mpic") */
+#define MPIC_FSL			0x00008000
 
 /* MPIC HW modification ID */
 #define MPIC_REGSET_MASK		0xf0000000
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index d4b4bfa26fb..89d2f99c1bf 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -18,13 +18,18 @@
 extern int pSeries_reconfig_notifier_register(struct notifier_block *);
 extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
 extern struct blocking_notifier_head pSeries_reconfig_chain;
+/* Not the best place to put this, will be fixed when we move some
+ * of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
 #else /* !CONFIG_PPC_PSERIES */
 static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
 	return 0;
 }
 static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
+static inline void pSeries_coalesce_init(void) { }
 #endif /* CONFIG_PPC_PSERIES */
 
+
 #endif /* __KERNEL__ */
 #endif /* _PPC64_PSERIES_RECONFIG_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec57540cd7a..74126765106 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -92,9 +92,9 @@ struct paca_struct {
 	 * Now, starting in cacheline 2, the exception save areas
 	 */
 	/* used for most interrupts/exceptions */
-	u64 exgen[10] __attribute__((aligned(0x80)));
-	u64 exmc[10];		/* used for machine checks */
-	u64 exslb[10];		/* used for SLB/segment table misses
+	u64 exgen[11] __attribute__((aligned(0x80)));
+	u64 exmc[11];		/* used for machine checks */
+	u64 exslb[11];		/* used for SLB/segment table misses
  				 * on the linear mapping */
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
@@ -106,7 +106,8 @@ struct paca_struct {
 	pgd_t *pgd;			/* Current PGD */
 	pgd_t *kernel_pgd;		/* Kernel PGD */
 	u64 exgen[8] __attribute__((aligned(0x80)));
-	u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+	/* We can have up to 3 levels of reentrancy in the TLB miss handler */
+	u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80)));
 	u64 exmc[8];		/* used for machine checks */
 	u64 excrit[8];		/* used for crit interrupts */
 	u64 exdbg[8];		/* used for debug interrupts */
@@ -125,7 +126,7 @@ struct paca_struct {
 	struct task_struct *__current;	/* Pointer to current */
 	u64 kstack;			/* Saved Kernel stack addr */
 	u64 stab_rr;			/* stab/slb round-robin counter */
-	u64 saved_r1;			/* r1 save for RTAS calls */
+	u64 saved_r1;			/* r1 save for RTAS calls or PM */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 soft_enabled;		/* irq soft-enable flag */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 812b2cd80ae..9356262fd3c 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr)
 	: "ctr", "memory");
 }
 
-extern void copy_4K_page(void *to, void *from);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static inline void copy_page(void *to, void *from)
-{
-	unsigned int i;
-	for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
-		copy_4K_page(to, from);
-		to += 4096;
-		from += 4096;
-	}
-}
-#else /* CONFIG_PPC_64K_PAGES */
-static inline void copy_page(void *to, void *from)
-{
-	copy_4K_page(to, from);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
+extern void copy_page(void *to, void *from);
 
 /* Log 2 of page table size */
 extern u64 ppc64_pft_size;
@@ -130,7 +113,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
 extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 				  unsigned long len, unsigned int psize);
 
-#define slice_mm_new_context(mm)	((mm)->context.id == 0)
+#define slice_mm_new_context(mm)	((mm)->context.id == MMU_NO_CONTEXT)
 
 #endif /* __ASSEMBLY__ */
 #else
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 2b09cd522d3..81576ee0cfb 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	unsigned long old;
 
-       	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-       		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	unsigned long old;
-
 	if ((pte_val(*ptep) & _PAGE_RW) == 0)
 		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 1);
 }
 
 /*
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1255569387b..e472659d906 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -41,6 +41,10 @@
 #define PPC_INST_RFCI			0x4c000066
 #define PPC_INST_RFDI			0x4c00004e
 #define PPC_INST_RFMCI			0x4c00004c
+#define PPC_INST_MFSPR_DSCR		0x7c1102a6
+#define PPC_INST_MFSPR_DSCR_MASK	0xfc1fffff
+#define PPC_INST_MTSPR_DSCR		0x7c1103a6
+#define PPC_INST_MTSPR_DSCR_MASK	0xfc1fffff
 
 #define PPC_INST_STRING			0x7c00042a
 #define PPC_INST_STRING_MASK		0xfc0007fe
@@ -56,6 +60,17 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
 
+#define PPC_INST_NAP			0x4c000364
+#define PPC_INST_SLEEP			0x4c0003a4
+
+/* A2 specific instructions */
+#define PPC_INST_ERATWE			0x7c0001a6
+#define PPC_INST_ERATRE			0x7c000166
+#define PPC_INST_ERATILX		0x7c000066
+#define PPC_INST_ERATIVAX		0x7c000666
+#define PPC_INST_ERATSX			0x7c000126
+#define PPC_INST_ERATSX_DOT		0x7c000127
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -67,6 +82,8 @@
 #define __PPC_XT(s)	__PPC_XS(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
+#define __PPC_WS(w)	(((w) & 0x1f) << 11)
+
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
  * larx with EH set as an illegal instruction.
@@ -113,6 +130,21 @@
 #define PPC_TLBIVAX(a,b)	stringify_in_c(.long PPC_INST_TLBIVAX | \
 					__PPC_RA(a) | __PPC_RB(b))
 
+#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_INST_ERATWE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_INST_ERATRE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_INST_ERATILX | \
+					__PPC_T_TLB(t) | __PPC_RA(a) | \
+					__PPC_RB(b))
+#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_INST_ERATIVAX | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX_DOT | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+
+
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
@@ -126,4 +158,7 @@
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), (a), (b)))
 
+#define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
+#define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 98210067c1c..1b422381fc1 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define HMT_MEDIUM	or	2,2,2
 #define HMT_MEDIUM_HIGH or	5,5,5		# medium high priority
 #define HMT_HIGH	or	3,3,3
+#define HMT_EXTRA_HIGH	or	7,7,7		# power7 only
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index de1967a1ff5..d50c2b6d9bc 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -238,6 +238,10 @@ struct thread_struct {
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	void*		kvm_shadow_vcpu; /* KVM internal data */
 #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#ifdef CONFIG_PPC64
+	unsigned long	dscr;
+	int		dscr_inherit;
+#endif
 };
 
 #define ARCH_MIN_TASKALIGN 16
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e4abebe76c..c5cae0dd176 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -99,17 +99,23 @@
 #define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
 
 #if defined(CONFIG_PPC_BOOK3S_64)
+#define MSR_64BIT	MSR_SF
+
 /* Server variant */
 #define MSR_		MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
-#define MSR_KERNEL      MSR_ | MSR_SF
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE
-#define MSR_USER64	MSR_USER32 | MSR_SF
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
 /* Default MSR for kernel mode. */
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
+#ifndef MSR_64BIT
+#define MSR_64BIT	0
+#endif
+
 /* Floating Point Status and Control Register (FPSCR) Fields */
 #define FPSCR_FX	0x80000000	/* FPU exception summary */
 #define FPSCR_FEX	0x40000000	/* FPU enabled exception summary */
@@ -182,6 +188,8 @@
 
 #define SPRN_CTR	0x009	/* Count Register */
 #define SPRN_DSCR	0x11
+#define SPRN_CFAR	0x1c	/* Come From Address Register */
+#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
 #define   CTRL_CT	0xc0000000	/* current thread */
@@ -210,8 +218,43 @@
 #define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
 #define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
 #define SPRN_SPURR	0x134	/* Scaled PURR */
+#define SPRN_HSPRG0	0x130	/* Hypervisor Scratch 0 */
+#define SPRN_HSPRG1	0x131	/* Hypervisor Scratch 1 */
+#define SPRN_HDSISR     0x132
+#define SPRN_HDAR       0x133
+#define SPRN_HDEC	0x136	/* Hypervisor Decrementer */
 #define SPRN_HIOR	0x137	/* 970 Hypervisor interrupt offset */
+#define SPRN_RMOR	0x138	/* Real mode offset register */
+#define SPRN_HRMOR	0x139	/* Real mode offset register */
+#define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
+#define   LPCR_VPM0	(1ul << (63-0))
+#define   LPCR_VPM1	(1ul << (63-1))
+#define   LPCR_ISL	(1ul << (63-2))
+#define   LPCR_DPFD_SH	(63-11)
+#define   LPCR_VRMA_L	(1ul << (63-12))
+#define   LPCR_VRMA_LP0	(1ul << (63-15))
+#define   LPCR_VRMA_LP1	(1ul << (63-16))
+#define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
+#define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
+#define   LPCR_PECE	0x00007000	/* powersave exit cause enable */
+#define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
+#define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
+#define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
+#define   LPCR_MER	0x00000800	/* Mediated External Exception */
+#define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
+#define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
+#define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
+#define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
+#define SPRN_LPID	0x13F	/* Logical Partition Identifier */
+#define	SPRN_HMER	0x150	/* Hardware m? error recovery */
+#define	SPRN_HMEER	0x151	/* Hardware m? enable error recovery */
+#define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
+#define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
+#define SPRN_TLBVPNR	0x155	/* P7 TLB control register */
+#define SPRN_TLBRPNR	0x156	/* P7 TLB control register */
+#define SPRN_TLBLPIDR	0x157	/* P7 TLB control register */
 #define SPRN_DBAT0L	0x219	/* Data BAT 0 Lower Register */
 #define SPRN_DBAT0U	0x218	/* Data BAT 0 Upper Register */
 #define SPRN_DBAT1L	0x21B	/* Data BAT 1 Lower Register */
@@ -434,16 +477,23 @@
 #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
 #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
-#define   SRR1_WAKERESET	0x00380000 /* System reset */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
+#define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
 #define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
+#define	  SRR1_WAKERESET	0x00100000 /* System reset */
+#define	  SRR1_WAKESTATE	0x00030000 /* Powersave exit mask [46:47] */
+#define	  SRR1_WS_DEEPEST	0x00030000 /* Some resources not maintained,
+					  * may not be recoverable */
+#define	  SRR1_WS_DEEPER	0x00020000 /* Some resources not maintained */
+#define	  SRR1_WS_DEEP		0x00010000 /* All resources maintained */
 #define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
 #define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
 #define   SRR1_PROGTRAP		0x00020000 /* Trap */
 #define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
+
 #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
 #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
 
@@ -673,12 +723,15 @@
  * SPRG usage:
  *
  * All 64-bit:
- *	- SPRG1 stores PACA pointer
+ *	- SPRG1 stores PACA pointer except 64-bit server in
+ *        HV mode in which case it is HSPRG0
  *
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
  *	- SPRG2 scratch for exception vectors
  *	- SPRG3 unused (user visible)
+ *      - HSPRG0 stores PACA in HV mode
+ *      - HSPRG1 scratch for "HV" exceptions
  *
  * 64-bit embedded
  *	- SPRG0 generic exception scratch
@@ -741,6 +794,41 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
+#define SPRN_SPRG_HPACA		SPRN_HSPRG0
+#define SPRN_SPRG_HSCRATCH0	SPRN_HSPRG1
+
+#define GET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_PACA;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HPACA;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_PACA,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HPACA,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define GET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -750,6 +838,10 @@
 #define SPRN_SPRG_TLB_EXFRAME	SPRN_SPRG2
 #define SPRN_SPRG_TLB_SCRATCH	SPRN_SPRG6
 #define SPRN_SPRG_GEN_SCRATCH	SPRN_SPRG0
+
+#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
+#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -800,6 +892,8 @@
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
 #endif
 
+
+
 /*
  * An mtfsf instruction with the L bit set. On CPUs that support this a
  * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored.
@@ -894,6 +988,8 @@
 #define PV_POWER5p	0x003B
 #define PV_POWER7	0x003F
 #define PV_970FX	0x003C
+#define PV_POWER6	0x003E
+#define PV_POWER7	0x003F
 #define PV_630		0x0040
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
new file mode 100644
index 00000000000..3d52a1132f3
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -0,0 +1,165 @@
+/*
+ *  Register definitions specific to the A2 core
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_POWERPC_REG_A2_H__
+#define __ASM_POWERPC_REG_A2_H__
+
+#define SPRN_TENSR	0x1b5
+#define SPRN_TENS	0x1b6	/* Thread ENable Set */
+#define SPRN_TENC	0x1b7	/* Thread ENable Clear */
+
+#define SPRN_A2_CCR0	0x3f0	/* Core Configuration Register 0 */
+#define SPRN_A2_CCR1	0x3f1	/* Core Configuration Register 1 */
+#define SPRN_A2_CCR2	0x3f2	/* Core Configuration Register 2 */
+#define SPRN_MMUCR0	0x3fc	/* MMU Control Register 0 */
+#define SPRN_MMUCR1	0x3fd	/* MMU Control Register 1 */
+#define SPRN_MMUCR2	0x3fe	/* MMU Control Register 2 */
+#define SPRN_MMUCR3	0x3ff	/* MMU Control Register 3 */
+
+#define SPRN_IAR	0x372
+
+#define SPRN_IUCR0	0x3f3
+#define IUCR0_ICBI_ACK	0x1000
+
+#define SPRN_XUCR0	0x3f6	/* Execution Unit Config Register 0 */
+
+#define A2_IERAT_SIZE	16
+#define A2_DERAT_SIZE	32
+
+/* A2 MMUCR0 bits */
+#define MMUCR0_ECL	0x80000000	/* Extended Class for TLB fills */
+#define MMUCR0_TID_NZ	0x40000000	/* TID is non-zero */
+#define MMUCR0_TS	0x10000000	/* Translation space for TLB fills */
+#define MMUCR0_TGS	0x20000000	/* Guest space for TLB fills */
+#define MMUCR0_TLBSEL	0x0c000000	/* TLB or ERAT target for TLB fills */
+#define MMUCR0_TLBSEL_U	0x00000000	/*  TLBSEL = UTLB */
+#define MMUCR0_TLBSEL_I	0x08000000	/*  TLBSEL = I-ERAT */
+#define MMUCR0_TLBSEL_D	0x0c000000	/*  TLBSEL = D-ERAT */
+#define MMUCR0_LOCKSRSH	0x02000000	/* Use TLB lock on tlbsx. */
+#define MMUCR0_TID_MASK	0x000000ff	/* TID field */
+
+/* A2 MMUCR1 bits */
+#define MMUCR1_IRRE		0x80000000	/* I-ERAT round robin enable */
+#define MMUCR1_DRRE		0x40000000	/* D-ERAT round robin enable */
+#define MMUCR1_REE		0x20000000	/* Reference Exception Enable*/
+#define MMUCR1_CEE		0x10000000	/* Change exception enable */
+#define MMUCR1_CSINV_ALL	0x00000000	/* Inval ERAT on all CS evts */
+#define MMUCR1_CSINV_NISYNC	0x04000000	/* Inval ERAT on all ex isync*/
+#define MMUCR1_CSINV_NEVER	0x0c000000	/* Don't inval ERAT on CS */
+#define MMUCR1_ICTID		0x00080000	/* IERAT class field as TID */
+#define MMUCR1_ITTID		0x00040000	/* IERAT thdid field as TID */
+#define MMUCR1_DCTID		0x00020000	/* DERAT class field as TID */
+#define MMUCR1_DTTID		0x00010000	/* DERAT thdid field as TID */
+#define MMUCR1_DCCD		0x00008000	/* DERAT class ignore */
+#define MMUCR1_TLBWE_BINV	0x00004000	/* back invalidate on tlbwe */
+
+/* A2 MMUCR2 bits */
+#define MMUCR2_PSSEL_SHIFT	4
+
+/* A2 MMUCR3 bits */
+#define MMUCR3_THID		0x0000000f	/* Thread ID */
+
+/* *** ERAT TLB bits definitions */
+#define TLB0_EPN_MASK		ASM_CONST(0xfffffffffffff000)
+#define TLB0_CLASS_MASK		ASM_CONST(0x0000000000000c00)
+#define TLB0_CLASS_00		ASM_CONST(0x0000000000000000)
+#define TLB0_CLASS_01		ASM_CONST(0x0000000000000400)
+#define TLB0_CLASS_10		ASM_CONST(0x0000000000000800)
+#define TLB0_CLASS_11		ASM_CONST(0x0000000000000c00)
+#define TLB0_V			ASM_CONST(0x0000000000000200)
+#define TLB0_X			ASM_CONST(0x0000000000000100)
+#define TLB0_SIZE_MASK		ASM_CONST(0x00000000000000f0)
+#define TLB0_SIZE_4K		ASM_CONST(0x0000000000000010)
+#define TLB0_SIZE_64K		ASM_CONST(0x0000000000000030)
+#define TLB0_SIZE_1M		ASM_CONST(0x0000000000000050)
+#define TLB0_SIZE_16M		ASM_CONST(0x0000000000000070)
+#define TLB0_SIZE_1G		ASM_CONST(0x00000000000000a0)
+#define TLB0_THDID_MASK		ASM_CONST(0x000000000000000f)
+#define TLB0_THDID_0		ASM_CONST(0x0000000000000001)
+#define TLB0_THDID_1		ASM_CONST(0x0000000000000002)
+#define TLB0_THDID_2		ASM_CONST(0x0000000000000004)
+#define TLB0_THDID_3		ASM_CONST(0x0000000000000008)
+#define TLB0_THDID_ALL		ASM_CONST(0x000000000000000f)
+
+#define TLB1_RESVATTR		ASM_CONST(0x00f0000000000000)
+#define TLB1_U0			ASM_CONST(0x0008000000000000)
+#define TLB1_U1			ASM_CONST(0x0004000000000000)
+#define TLB1_U2			ASM_CONST(0x0002000000000000)
+#define TLB1_U3			ASM_CONST(0x0001000000000000)
+#define TLB1_R			ASM_CONST(0x0000800000000000)
+#define TLB1_C			ASM_CONST(0x0000400000000000)
+#define TLB1_RPN_MASK		ASM_CONST(0x000003fffffff000)
+#define TLB1_W			ASM_CONST(0x0000000000000800)
+#define TLB1_I			ASM_CONST(0x0000000000000400)
+#define TLB1_M			ASM_CONST(0x0000000000000200)
+#define TLB1_G			ASM_CONST(0x0000000000000100)
+#define TLB1_E			ASM_CONST(0x0000000000000080)
+#define TLB1_VF			ASM_CONST(0x0000000000000040)
+#define TLB1_UX			ASM_CONST(0x0000000000000020)
+#define TLB1_SX			ASM_CONST(0x0000000000000010)
+#define TLB1_UW			ASM_CONST(0x0000000000000008)
+#define TLB1_SW			ASM_CONST(0x0000000000000004)
+#define TLB1_UR			ASM_CONST(0x0000000000000002)
+#define TLB1_SR			ASM_CONST(0x0000000000000001)
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+#define WSP_UART_PHYS	0xffc000c000
+/* This needs to be careful chosen to hit a !0 congruence class
+ * in the TLB since we bolt it in way 3, which is already occupied
+ * by our linear mapping primary bolted entry in CC 0.
+ */
+#define WSP_UART_VIRT	0xf000000000001000
+#endif
+
+/* A2 erativax attributes definitions */
+#define ERATIVAX_RS_IS_ALL		0x000
+#define ERATIVAX_RS_IS_TID		0x040
+#define ERATIVAX_RS_IS_CLASS		0x080
+#define ERATIVAX_RS_IS_FULLMATCH	0x0c0
+#define ERATIVAX_CLASS_00		0x000
+#define ERATIVAX_CLASS_01		0x010
+#define ERATIVAX_CLASS_10		0x020
+#define ERATIVAX_CLASS_11		0x030
+#define ERATIVAX_PSIZE_4K		(TLB_PSIZE_4K >> 1)
+#define ERATIVAX_PSIZE_64K		(TLB_PSIZE_64K >> 1)
+#define ERATIVAX_PSIZE_1M		(TLB_PSIZE_1M >> 1)
+#define ERATIVAX_PSIZE_16M		(TLB_PSIZE_16M >> 1)
+#define ERATIVAX_PSIZE_1G		(TLB_PSIZE_1G >> 1)
+
+/* A2 eratilx attributes definitions */
+#define ERATILX_T_ALL			0
+#define ERATILX_T_TID			1
+#define ERATILX_T_TGS			2
+#define ERATILX_T_FULLMATCH		3
+#define ERATILX_T_CLASS0		4
+#define ERATILX_T_CLASS1		5
+#define ERATILX_T_CLASS2		6
+#define ERATILX_T_CLASS3		7
+
+/* XUCR0 bits */
+#define XUCR0_TRACE_UM_T0		0x40000000	/* Thread 0 */
+#define XUCR0_TRACE_UM_T1		0x20000000	/* Thread 1 */
+#define XUCR0_TRACE_UM_T2		0x10000000	/* Thread 2 */
+#define XUCR0_TRACE_UM_T3		0x08000000	/* Thread 3 */
+
+/* A2 CCR0 register */
+#define A2_CCR0_PME_DISABLED		0x00000000
+#define A2_CCR0_PME_SLEEP		0x40000000
+#define A2_CCR0_PME_RVW			0x80000000
+#define A2_CCR0_PME_DISABLED2		0xc0000000
+
+/* A2 CCR2 register */
+#define A2_CCR2_ERAT_ONLY_MODE		0x00000001
+#define A2_CCR2_ENABLE_ICSWX		0x00000002
+#define A2_CCR2_ENABLE_PC		0x20000000
+#define A2_CCR2_ENABLE_TRACE		0x40000000
+
+#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index b316794aa2b..0f0ad9fa01c 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -27,10 +27,12 @@
 #define MSR_CM		(1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
 
 #if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_64BIT	MSR_CM
+
 #define MSR_		MSR_ME | MSR_CE
-#define MSR_KERNEL      MSR_ | MSR_CM
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE | MSR_DE
-#define MSR_USER64	MSR_USER32 | MSR_CM | MSR_DE
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined (CONFIG_40x)
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
@@ -81,6 +83,10 @@
 #define SPRN_IVOR13	0x19D	/* Interrupt Vector Offset Register 13 */
 #define SPRN_IVOR14	0x19E	/* Interrupt Vector Offset Register 14 */
 #define SPRN_IVOR15	0x19F	/* Interrupt Vector Offset Register 15 */
+#define SPRN_IVOR38	0x1B0	/* Interrupt Vector Offset Register 38 */
+#define SPRN_IVOR39	0x1B1	/* Interrupt Vector Offset Register 39 */
+#define SPRN_IVOR40	0x1B2	/* Interrupt Vector Offset Register 40 */
+#define SPRN_IVOR41	0x1B3	/* Interrupt Vector Offset Register 41 */
 #define SPRN_SPEFSCR	0x200	/* SPE & Embedded FP Status & Control */
 #define SPRN_BBEAR	0x201	/* Branch Buffer Entry Address Register */
 #define SPRN_BBTAR	0x202	/* Branch Buffer Target Address Register */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9a1193e30f2..58625d1e780 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -158,7 +158,50 @@ struct rtas_error_log {
 	unsigned long target:4;			/* Target of failed operation */
 	unsigned long type:8;			/* General event or error*/
 	unsigned long extended_log_length:32;	/* length in bytes */
-	unsigned char buffer[1];
+	unsigned char buffer[1];		/* Start of extended log */
+						/* Variable length.      */
+};
+
+#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG	14
+
+#define RTAS_V6EXT_COMPANY_ID_IBM	(('I' << 24) | ('B' << 16) | ('M' << 8))
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+	/* Byte 0 */
+	uint32_t log_valid:1;		/* 1:Log valid */
+	uint32_t unrecoverable_error:1;	/* 1:Unrecoverable error */
+	uint32_t recoverable_error:1;	/* 1:recoverable (correctable	*/
+					/*   or successfully retried)	*/
+	uint32_t degraded_operation:1;	/* 1:Unrecoverable err, bypassed*/
+					/*   - degraded operation (e.g.	*/
+					/*   CPU or mem taken off-line)	*/
+	uint32_t predictive_error:1;
+	uint32_t new_log:1;		/* 1:"New" log (Always 1 for	*/
+					/*   data returned from RTAS	*/
+	uint32_t big_endian:1;		/* 1: Big endian */
+	uint32_t :1;			/* reserved */
+	/* Byte 1 */
+	uint32_t :8;			/* reserved */
+	/* Byte 2 */
+	uint32_t powerpc_format:1;	/* Set to 1 (indicating log is	*/
+					/* in PowerPC format		*/
+	uint32_t :3;			/* reserved */
+	uint32_t log_format:4;		/* Log format indicator. Define	*/
+					/* format used for byte 12-2047	*/
+	/* Byte 3 */
+	uint32_t :8;			/* reserved */
+	/* Byte 4-11 */
+	uint8_t reserved[8];		/* reserved */
+	/* Byte 12-15 */
+	uint32_t company_id;		/* Company ID of the company	*/
+					/* that defines the format for	*/
+					/* the vendor specific log type	*/
+	/* Byte 16-end of log */
+	uint8_t vendor_log[1];		/* Start of vendor specific log	*/
+					/* Variable length.		*/
 };
 
 /*
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
new file mode 100644
index 00000000000..0cabfd7bc2d
--- /dev/null
+++ b/arch/powerpc/include/asm/scom.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_POWERPC_SCOM_H
+#define _ASM_POWERPC_SCOM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_SCOM
+
+/*
+ * The SCOM bus is a sideband bus used for accessing various internal
+ * registers of the processor or the chipset. The implementation details
+ * differ between processors and platforms, and the access method as
+ * well.
+ *
+ * This API allows to "map" ranges of SCOM register numbers associated
+ * with a given SCOM controller. The later must be represented by a
+ * device node, though some implementations might support NULL if there
+ * is no possible ambiguity
+ *
+ * Then, scom_read/scom_write can be used to accesses registers inside
+ * that range. The argument passed is a register number relative to
+ * the beginning of the range mapped.
+ */
+
+typedef void *scom_map_t;
+
+/* Value for an invalid SCOM map */
+#define SCOM_MAP_INVALID	(NULL)
+
+/* The scom_controller data structure is what the platform passes
+ * to the core code in scom_init, it provides the actual implementation
+ * of all the SCOM functions
+ */
+struct scom_controller {
+	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
+	void (*unmap)(scom_map_t map);
+
+	u64 (*read)(scom_map_t map, u32 reg);
+	void (*write)(scom_map_t map, u32 reg, u64 value);
+};
+
+extern const struct scom_controller *scom_controller;
+
+/**
+ * scom_init - Initialize the SCOM backend, called by the platform
+ * @controller: The platform SCOM controller
+ */
+static inline void scom_init(const struct scom_controller *controller)
+{
+	scom_controller = controller;
+}
+
+/**
+ * scom_map_ok - Test is a SCOM mapping is successful
+ * @map: The result of scom_map to test
+ */
+static inline int scom_map_ok(scom_map_t map)
+{
+	return map != SCOM_MAP_INVALID;
+}
+
+/**
+ * scom_map - Map a block of SCOM registers
+ * @ctrl_dev: Device node of the SCOM controller
+ *            some implementations allow NULL here
+ * @reg: first SCOM register to map
+ * @count: Number of SCOM registers to map
+ */
+
+static inline scom_map_t scom_map(struct device_node *ctrl_dev,
+				  u64 reg, u64 count)
+{
+	return scom_controller->map(ctrl_dev, reg, count);
+}
+
+/**
+ * scom_find_parent - Find the SCOM controller for a device
+ * @dev: OF node of the device
+ *
+ * This is not meant for general usage, but in combination with
+ * scom_map() allows to map registers not represented by the
+ * device own scom-reg property. Useful for applying HW workarounds
+ * on things not properly represented in the device-tree for example.
+ */
+struct device_node *scom_find_parent(struct device_node *dev);
+
+
+/**
+ * scom_map_device - Map a device's block of SCOM registers
+ * @dev: OF node of the device
+ * @index: Register bank index (index in "scom-reg" property)
+ *
+ * This function will use the device-tree binding for SCOM which
+ * is to follow "scom-parent" properties until it finds a node with
+ * a "scom-controller" property to find the controller. It will then
+ * use the "scom-reg" property which is made of reg/count pairs,
+ * each of them having a size defined by the controller's #scom-cells
+ * property
+ */
+extern scom_map_t scom_map_device(struct device_node *dev, int index);
+
+
+/**
+ * scom_unmap - Unmap a block of SCOM registers
+ * @map: Result of scom_map is to be unmapped
+ */
+static inline void scom_unmap(scom_map_t map)
+{
+	if (scom_map_ok(map))
+		scom_controller->unmap(map);
+}
+
+/**
+ * scom_read - Read a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ */
+static inline u64 scom_read(scom_map_t map, u32 reg)
+{
+	return scom_controller->read(map, reg);
+}
+
+/**
+ * scom_write - Write to a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ * @value: Value to write
+ */
+static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	scom_controller->write(map, reg, value);
+}
+
+#endif /* CONFIG_PPC_SCOM */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index a902a0d3ae0..880b8c1e6e5 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -20,6 +20,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
+#include <linux/irqreturn.h>
 
 #ifndef __ASSEMBLY__
 
@@ -29,14 +30,32 @@
 #include <asm/percpu.h>
 
 extern int boot_cpuid;
+extern int boot_cpu_count;
 
 extern void cpu_die(void);
 
 #ifdef CONFIG_SMP
 
-extern void smp_send_debugger_break(int cpu);
-extern void smp_message_recv(int);
+struct smp_ops_t {
+	void  (*message_pass)(int cpu, int msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+	void  (*cause_ipi)(int cpu, unsigned long data);
+#endif
+	int   (*probe)(void);
+	int   (*kick_cpu)(int nr);
+	void  (*setup_cpu)(int nr);
+	void  (*bringup_done)(void);
+	void  (*take_timebase)(void);
+	void  (*give_timebase)(void);
+	int   (*cpu_disable)(void);
+	void  (*cpu_die)(unsigned int nr);
+	int   (*cpu_bootable)(unsigned int nr);
+};
+
+extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
+extern void __devinit smp_generic_give_timebase(void);
+extern void __devinit smp_generic_take_timebase(void);
 
 DECLARE_PER_CPU(unsigned int, cpu_pvr);
 
@@ -93,13 +112,16 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
-/*
- * irq controllers that have dedicated ipis per message and don't
- * need additional code in the action handler may use this
- */
+/* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
 
+/* for irq controllers with only a single ipi */
+extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
+extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_resend(void);
+extern irqreturn_t smp_ipi_demux(void);
+
 void smp_init_iSeries(void);
 void smp_init_pSeries(void);
 void smp_init_cell(void);
@@ -149,7 +171,7 @@ extern int smt_enabled_at_boot;
 
 extern int smp_mpic_probe(void);
 extern void smp_mpic_setup_cpu(int cpu);
-extern void smp_generic_kick_cpu(int nr);
+extern int smp_generic_kick_cpu(int nr);
 
 extern void smp_generic_give_timebase(void);
 extern void smp_generic_take_timebase(void);
@@ -169,6 +191,8 @@ extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
 
+extern irqreturn_t debug_ipi_action(int irq, void *data);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd..8489d372077 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
 COMPAT_SYS_SPU(open_by_handle_at)
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
+COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 5e474ddd227..2dc595dda03 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -219,8 +219,6 @@ extern int mem_init_done;	/* set on boot once kmalloc can be called */
 extern int init_bootmem_done;	/* set once bootmem is available */
 extern phys_addr_t memory_limit;
 extern unsigned long klimit;
-
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index d50a380b2b6..81143fcbd11 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 
 #elif defined(CONFIG_PPC_STD_MMU_64)
 
+#define MMU_NO_CONTEXT		0
+
 /*
  * TLB flushing for 64-bit hash-MMU CPUs
  */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 11ae699135b..58580e94a2b 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void);
 extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
+extern void __init udbg_init_wsp(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index ae9c899c8a6..d12b11d7641 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -60,7 +60,7 @@
  *
  * Obviously, the GART is not cache coherent and so any change to it
  * must be flushed to memory (or maybe just make the GART space non
- * cachable). AGP memory itself does't seem to be cache coherent neither.
+ * cachable). AGP memory itself doesn't seem to be cache coherent neither.
  *
  * In order to invalidate the GART (which is probably necessary to inval
  * the bridge internal TLBs), the following sequence has to be written,
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6..6d23c8193ca 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
 #define __NR_open_by_handle_at	346
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
+#define __NR_sendmmsg		349
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		349
+#define __NR_syscalls		350
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
new file mode 100644
index 00000000000..c7dc83088a3
--- /dev/null
+++ b/arch/powerpc/include/asm/wsp.h
@@ -0,0 +1,14 @@
+/*
+ *  Copyright 2011 Michael Ellerman, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#ifndef __ASM_POWERPC_WSP_H
+#define __ASM_POWERPC_WSP_H
+
+extern int wsp_get_chip_id(struct device_node *dn);
+
+#endif /* __ASM_POWERPC_WSP_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
new file mode 100644
index 00000000000..b183a406201
--- /dev/null
+++ b/arch/powerpc/include/asm/xics.h
@@ -0,0 +1,142 @@
+/*
+ * Common definitions accross all variants of ICP and ICS interrupt
+ * controllers.
+ */
+
+#ifndef _XICS_H
+#define _XICS_H
+
+#include <linux/interrupt.h>
+
+#define XICS_IPI		2
+#define XICS_IRQ_SPURIOUS	0
+
+/* Want a priority other than 0.  Various HW issues require this. */
+#define	DEFAULT_PRIORITY	5
+
+/*
+ * Mark IPIs as higher priority so we can take them inside interrupts that
+ * arent marked IRQF_DISABLED
+ */
+#define IPI_PRIORITY		4
+
+/* The least favored priority */
+#define LOWEST_PRIORITY		0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES	3
+
+/* Native ICP */
+extern int icp_native_init(void);
+
+/* PAPR ICP */
+extern int icp_hv_init(void);
+
+/* ICP ops */
+struct icp_ops {
+	unsigned int (*get_irq)(void);
+	void (*eoi)(struct irq_data *d);
+	void (*set_priority)(unsigned char prio);
+	void (*teardown_cpu)(void);
+	void (*flush_ipi)(void);
+#ifdef CONFIG_SMP
+	void (*cause_ipi)(int cpu, unsigned long data);
+	irq_handler_t ipi_action;
+#endif
+};
+
+extern const struct icp_ops *icp_ops;
+
+/* Native ICS */
+extern int ics_native_init(void);
+
+/* RTAS ICS */
+extern int ics_rtas_init(void);
+
+/* ICS instance, hooked up to chip_data of an irq */
+struct ics {
+	struct list_head link;
+	int (*map)(struct ics *ics, unsigned int virq);
+	void (*mask_unknown)(struct ics *ics, unsigned long vec);
+	long (*get_server)(struct ics *ics, unsigned long vec);
+	int (*host_match)(struct ics *ics, struct device_node *node);
+	char data[];
+};
+
+/* Commons */
+extern unsigned int xics_default_server;
+extern unsigned int xics_default_distrib_server;
+extern unsigned int xics_interrupt_server_size;
+extern struct irq_host *xics_host;
+
+struct xics_cppr {
+	unsigned char stack[MAX_NUM_PRIORITIES];
+	int index;
+};
+
+DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
+
+static inline void xics_push_cppr(unsigned int vec)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+		return;
+
+	if (vec == XICS_IPI)
+		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+	else
+		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
+static inline unsigned char xics_pop_cppr(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index < 1))
+		return LOWEST_PRIORITY;
+
+	return os_cppr->stack[--os_cppr->index];
+}
+
+static inline void xics_set_base_cppr(unsigned char cppr)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/* we only really want to set the priority when there's
+	 * just one cppr value on the stack
+	 */
+	WARN_ON(os_cppr->index != 0);
+
+	os_cppr->stack[0] = cppr;
+}
+
+static inline unsigned char xics_cppr_top(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+	
+	return os_cppr->stack[os_cppr->index];
+}
+
+DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+extern void xics_init(void);
+extern void xics_setup_cpu(void);
+extern void xics_update_irq_servers(void);
+extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
+extern void xics_mask_unknown_vec(unsigned int vec);
+extern irqreturn_t xics_ipi_dispatch(int cpu);
+extern int xics_smp_probe(void);
+extern void xics_register_ics(struct ics *ics);
+extern void xics_teardown_cpu(void);
+extern void xics_kexec_teardown_cpu(int secondary);
+extern void xics_migrate_irqs_away(void);
+#ifdef CONFIG_SMP
+extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			       unsigned int strict_check);
+#else
+#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server)
+#endif
+
+
+#endif /* _XICS_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3bb2a3e6a33..9aab3631257 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,11 +38,14 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power7.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
+obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
 obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 obj-$(CONFIG_PPC_CLOCK)		+= clock.o
 procfs-y			:= proc_powerpc.o
@@ -75,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o dbell.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o
 
 extra-y				:= head_$(CONFIG_WORD_SIZE).o
-extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
@@ -103,6 +105,8 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o crash.o \
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
+obj-$(CONFIG_PPC_IO_WORKAROUNDS)	+= io-workarounds.o
+
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 23e6a93145a..36e1c8a29be 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -74,6 +74,7 @@ int main(void)
 	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
+	DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
@@ -395,6 +396,7 @@ int main(void)
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
 	DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
 	DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
 	DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
new file mode 100644
index 00000000000..7f818feaa7a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -0,0 +1,114 @@
+/*
+ *  A2 specific assembly support code
+ *
+ *  Copyright 2009 Ben Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/reg_a2.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+
+/*
+ * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
+ * This also prevents external LPID accesses but that isn't a problem when not a
+ * guest. Under PV, this setting will be ignored and MMUCR will return the right
+ * number of PID bits we can use.
+ */
+#define MMUCR1_EXTEND_PID \
+	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
+	 MMUCR1_DTTID | MMUCR1_DCCD)
+
+/*
+ * Use extended PIDs if enabled.
+ * Don't clear the ERATs on context sync events and enable I & D LRU.
+ * Enable ERAT back invalidate when tlbwe overwrites an entry.
+ */
+#define INITIAL_MMUCR1 \
+	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
+	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
+
+_GLOBAL(__setup_cpu_a2)
+	/* Some of these are actually thread local and some are
+	 * core local but doing it always won't hurt
+	 */
+
+#ifdef CONFIG_PPC_WSP_COPRO
+	/* Make sure ACOP starts out as zero */
+	li	r3,0
+	mtspr   SPRN_ACOP,r3
+
+	/* Enable icswx instruction */
+	mfspr   r3,SPRN_A2_CCR2
+	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
+	mtspr   SPRN_A2_CCR2,r3
+
+	/* Unmask all CTs in HACOP */
+	li      r3,-1
+	mtspr   SPRN_HACOP,r3
+#endif /* CONFIG_PPC_WSP_COPRO */
+
+	/* Enable doorbell */
+	mfspr   r3,SPRN_A2_CCR2
+	oris     r3,r3,A2_CCR2_ENABLE_PC@h
+	mtspr   SPRN_A2_CCR2,r3
+	isync
+
+	/* Setup CCR0 to disable power saving for now as it's busted
+	 * in the current implementations. Setup CCR1 to wake on
+	 * interrupts normally (we write the default value but who
+	 * knows what FW may have clobbered...)
+	 */
+	li	r3,0
+	mtspr	SPRN_A2_CCR0, r3
+	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
+	mtspr	SPRN_A2_CCR1, r3
+
+	/* Initialise MMUCR1 */
+	lis	r3,INITIAL_MMUCR1@h
+	ori	r3,r3,INITIAL_MMUCR1@l
+	mtspr	SPRN_MMUCR1,r3
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	LOAD_REG_IMMEDIATE(r3, 0x000a7531)
+	mtspr	SPRN_MMUCR2,r3
+
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	LOAD_REG_IMMEDIATE(r3, 0x0000000f)
+	mtspr	SPRN_MMUCR3,r3
+
+	/* Don't do ERAT stuff if running guest mode */
+	mfmsr	r3
+	andis.	r0,r3,MSR_GS@h
+	bne	1f
+
+	/* Now set the I-ERAT watermark to 15 */
+	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_IERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* Now set the D-ERAT watermark to 31 */
+	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_DERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* And invalidate the beast just in case. That won't get rid of
+	 * a bolted entry though it will be in LRU and so will go away eventually
+	 * but let's not bother for now
+	 */
+	PPC_ERATILX(0,0,0)
+1:
+	blr
+
+_GLOBAL(__restore_cpu_a2)
+	b	__setup_cpu_a2
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 913611105c1..8053db02b85 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500)
 	bl	__e500_dcache_setup
 #ifdef CONFIG_PPC_BOOK3E_64
 	bl	.__setup_base_ivors
+	bl	.setup_perfmon_ivor
+	bl	.setup_doorbell_ivors
+	bl	.setup_ehv_ivors
 #else
 	bl	__setup_e500mc_ivors
 #endif
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
new file mode 100644
index 00000000000..4f9a93fcfe0
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -0,0 +1,91 @@
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+	mflr	r11
+	bl	__init_hvmode_206
+	mtlr	r11
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+_GLOBAL(__restore_cpu_power7)
+	mflr	r11
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
+	bl	__init_LPCR
+	bl	__init_TLB
+	mtlr	r11
+	blr
+
+__init_hvmode_206:
+	/* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	bnelr
+	ld	r5,CPU_SPEC_FEATURES(r4)
+	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206)
+	xor	r5,r5,r6
+	std	r5,CPU_SPEC_FEATURES(r4)
+	blr
+
+__init_LPCR:
+	/* Setup a sane LPCR:
+	 *
+	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
+	 *   PECE = 0b111
+	 *   DPFD = 4
+	 *
+	 * Other bits untouched for now
+	 */
+	mfspr	r3,SPRN_LPCR
+	ori	r3,r3,(LPCR_LPES0|LPCR_LPES1)
+	xori	r3,r3, LPCR_LPES0
+	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+	li	r5,7
+	sldi	r5,r5,LPCR_DPFD_SH
+	andc	r3,r3,r5
+	li	r5,4
+	sldi	r5,r5,LPCR_DPFD_SH
+	or	r3,r3,r5
+	mtspr	SPRN_LPCR,r3
+	isync
+	blr
+
+__init_TLB:
+	/* Clear the TLB */
+	li	r6,128
+	mtctr	r6
+	li	r7,0xc00	/* IS field = 0b11 */
+	ptesync
+2:	tlbiel	r7
+	addi	r7,r7,0x1000
+	bdnz	2b
+	ptesync
+1:	blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b9602ee06de..34d2722b945 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_pa6t(void);
 extern void __restore_cpu_ppc970(void);
 extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power7(void);
+extern void __restore_cpu_a2(void);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -199,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (gp)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -214,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4+ (gq)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -230,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -248,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -284,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -302,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -318,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5 (gr)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -338,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -354,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -371,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -385,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6 |
 			PPC_FEATURE_POWER6_EXT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -404,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER6 (architected)",
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -417,12 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (architected)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7 */
@@ -431,14 +434,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7+ */
@@ -447,14 +451,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7+ (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
 	{	/* Cell Broadband Engine */
@@ -465,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_user_features	= COMMON_USER_PPC64 |
 			PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
 			PPC_FEATURE_SMT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_CELL,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 4,
@@ -480,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "PA6T",
 		.cpu_features		= CPU_FTRS_PA6T,
 		.cpu_user_features	= COMMON_USER_PA6T,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PA6T,
 		.icache_bsize		= 64,
 		.dcache_bsize		= 64,
 		.num_pmcs		= 6,
@@ -497,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (compatible)",
 		.cpu_features		= CPU_FTRS_COMPATIBLE,
 		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -2005,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_PPC32 */
 #endif /* CONFIG_E500 */
 
-#ifdef CONFIG_PPC_BOOK3E_64
+#ifdef CONFIG_PPC_A2
+	{	/* Standard A2 (>= DD2) + FPU core */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00480000,
+		.cpu_name		= "A2 (>= DD2)",
+		.cpu_features		= CPU_FTRS_A2,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTRS_A2,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 0,
+		.cpu_setup		= __setup_cpu_a2,
+		.cpu_restore		= __restore_cpu_a2,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppca2",
+	},
 	{	/* This is a default entry to get going, to be replaced by
 		 * a real one at some stage
 		 */
@@ -2026,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_generic,
 		.platform		= "power6",
 	},
-#endif
+#endif /* CONFIG_PPC_A2 */
 };
 
 static struct cpu_spec the_cpu_spec;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 5b5e1f002a8..4e6ee944495 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs)
 		return;
 
 	hard_irq_disable();
-	if (!cpu_isset(cpu, cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
 		crash_save_cpu(regs, cpu);
-	cpu_set(cpu, cpus_in_crash);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
 
 	/*
 	 * Entered via soft-reset - could be the kdump
@@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * Tell the kexec CPU that entered via soft-reset and ready
 	 * to go down.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr)) {
-		cpu_clear(cpu, cpus_in_sr);
+	if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
+		cpumask_clear_cpu(cpu, &cpus_in_sr);
 		atomic_inc(&enter_on_soft_reset);
 	}
 
@@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * This barrier is needed to make sure that all CPUs are stopped.
 	 * If not, soft-reset will be invoked to bring other CPUs.
 	 */
-	while (!cpu_isset(crashing_cpu, cpus_in_crash))
+	while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
 		cpu_relax();
 
 	if (ppc_md.kexec_cpu_down)
@@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu)
 {
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
-	cpu_clear(cpu, cpus_in_sr);
+	cpumask_clear_cpu(cpu, &cpus_in_sr);
 	while (atomic_read(&enter_on_soft_reset) != ncpus)
 		cpu_relax();
 }
@@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 */
 	printk(KERN_EMERG "Sending IPI to other cpus...\n");
 	msecs = 10000;
-	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
 		cpu_relax();
 		mdelay(1);
 	}
@@ -144,52 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 * user to do soft reset such that we get all.
 	 * Soft-reset will be used until better mechanism is implemented.
 	 */
-	if (cpus_weight(cpus_in_crash) < ncpus) {
+	if (cpumask_weight(&cpus_in_crash) < ncpus) {
 		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
-			ncpus - cpus_weight(cpus_in_crash));
+			ncpus - cpumask_weight(&cpus_in_crash));
 		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
-		cpus_in_sr = CPU_MASK_NONE;
+		cpumask_clear(&cpus_in_sr);
 		atomic_set(&enter_on_soft_reset, 0);
-		while (cpus_weight(cpus_in_crash) < ncpus)
+		while (cpumask_weight(&cpus_in_crash) < ncpus)
 			cpu_relax();
 	}
 	/*
 	 * Make sure all CPUs are entered via soft-reset if the kdump is
 	 * invoked using soft-reset.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr))
+	if (cpumask_test_cpu(cpu, &cpus_in_sr))
 		crash_soft_reset_check(cpu);
 	/* Leave the IPI callback set */
 }
 
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#ifdef CONFIG_PPC_STD_MMU_64
-static void crash_kexec_wait_realmode(int cpu)
-{
-	unsigned int msecs;
-	int i;
-
-	msecs = 10000;
-	for (i=0; i < NR_CPUS && msecs > 0; i++) {
-		if (i == cpu)
-			continue;
-
-		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
-			barrier();
-			if (!cpu_possible(i)) {
-				break;
-			}
-			if (!cpu_online(i)) {
-				break;
-			}
-			msecs--;
-			mdelay(1);
-		}
-	}
-	mb();
-}
-#endif	/* CONFIG_PPC_STD_MMU_64 */
-
 /*
  * This function will be called by secondary cpus or by kexec cpu
  * if soft-reset is activated to stop some CPUs.
@@ -210,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 			 * exited using 'x'(exit and recover) or
 			 * kexec_should_crash() failed for all running tasks.
 			 */
-			cpu_clear(cpu, cpus_in_sr);
+			cpumask_clear_cpu(cpu, &cpus_in_sr);
 			local_irq_restore(flags);
 			return;
 		}
@@ -224,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 		 * then start kexec boot.
 		 */
 		crash_soft_reset_check(cpu);
-		cpu_set(crashing_cpu, cpus_in_crash);
+		cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 		if (ppc_md.kexec_cpu_down)
 			ppc_md.kexec_cpu_down(1, 0);
 		machine_kexec(kexec_crash_image);
@@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs)
 }
 
 #else	/* ! CONFIG_SMP */
-static inline void crash_kexec_wait_realmode(int cpu) {}
 
 static void crash_kexec_prepare_cpus(int cpu)
 {
@@ -253,10 +224,40 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	cpus_in_sr = CPU_MASK_NONE;
+	cpumask_clear(&cpus_in_sr);
 }
 #endif	/* CONFIG_SMP */
 
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = 10000;
+	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i)) {
+				break;
+			}
+			if (!cpu_online(i)) {
+				break;
+			}
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
 /*
  * Register a function to be called on shutdown.  Only use this if you
  * can't reset your device in the second kernel.
@@ -345,7 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crashing_cpu = smp_processor_id();
 	crash_save_cpu(regs, crashing_cpu);
 	crash_kexec_prepare_cpus(crashing_cpu);
-	cpu_set(crashing_cpu, cpus_in_crash);
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 	crash_kexec_wait_realmode(crashing_cpu);
 
 	machine_kexec_mask_interrupts();
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 3307a52d797..2cc451aaaca 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -13,84 +13,35 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
-#include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 #include <asm/dbell.h>
 #include <asm/irq_regs.h>
 
 #ifdef CONFIG_SMP
-struct doorbell_cpu_info {
-	unsigned long	messages;	/* current messages bits */
-	unsigned int	tag;		/* tag value */
-};
-
-static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info);
-
 void doorbell_setup_this_cpu(void)
 {
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+	unsigned long tag = mfspr(SPRN_PIR) & 0x3fff;
 
-	info->messages = 0;
-	info->tag = mfspr(SPRN_PIR) & 0x3fff;
+	smp_muxed_ipi_set_data(smp_processor_id(), tag);
 }
 
-void doorbell_message_pass(int target, int msg)
+void doorbell_cause_ipi(int cpu, unsigned long data)
 {
-	struct doorbell_cpu_info *info;
-	int i;
-
-	if (target < NR_CPUS) {
-		info = &per_cpu(doorbell_cpu_info, target);
-		set_bit(msg, &info->messages);
-		ppc_msgsnd(PPC_DBELL, 0, info->tag);
-	}
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(i) {
-			if (i == smp_processor_id())
-				continue;
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-			ppc_msgsnd(PPC_DBELL, 0, info->tag);
-		}
-	}
-	else { /* target == MSG_ALL */
-		for_each_online_cpu(i) {
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-		}
-		ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);
-	}
+	ppc_msgsnd(PPC_DBELL, 0, data);
 }
 
 void doorbell_exception(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-	int msg;
 
-	/* Warning: regs can be NULL when called from irq enable */
+	irq_enter();
 
-	if (!info->messages || (num_online_cpus() < 2))
-		goto out;
+	smp_ipi_demux();
 
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &info->messages))
-			smp_message_recv(msg);
-
-out:
+	irq_exit();
 	set_irq_regs(old_regs);
 }
-
-void doorbell_check_self(void)
-{
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-
-	if (!info->messages)
-		return;
-
-	ppc_msgsnd(PPC_DBELL, 0, info->tag);
-}
-
 #else /* CONFIG_SMP */
 void doorbell_exception(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d82878c4daa..d834425186a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -421,6 +421,12 @@ BEGIN_FTR_SECTION
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	mfspr	r25,SPRN_DSCR
+	std	r25,THREAD_DSCR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0
@@ -462,10 +468,10 @@ BEGIN_FTR_SECTION
   FTR_SECTION_ELSE_NESTED(95)
 	clrrdi	r6,r8,40	/* get its 1T ESID */
 	clrrdi	r9,r1,40	/* get current sp 1T ESID */
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
 FTR_SECTION_ELSE
 	b	2f
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
 	clrldi.	r0,r6,2		/* is new ESID c00000000? */
 	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
 	cror	eq,4*cr1+eq,eq
@@ -479,7 +485,7 @@ BEGIN_FTR_SECTION
 	li	r9,MMU_SEGSIZE_1T	/* insert B field */
 	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
 	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 
 	/* Update the last bolted SLB.  No write barriers are needed
 	 * here, provided we only update the current CPU's SLB shadow
@@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
 	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
 	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
 
-	/* No need to check for CPU_FTR_NO_SLBIE_B here, since when
+	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
 	 * we have 1TB segments, the only CPUs known to have the errata
 	 * only support less than 1TB of system memory and we'll never
 	 * actually hit this code path.
@@ -522,6 +528,15 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	ld	r0,THREAD_DSCR(r4)
+	cmpd	r0,r25
+	beq	1f
+	mtspr	SPRN_DSCR,r0
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 
 	/* r3-r13 are destroyed -- Cort */
 	REST_8GPRS(14, r1)
@@ -838,7 +853,7 @@ _GLOBAL(enter_rtas)
 
 _STATIC(rtas_return_loc)
 	/* relocation is off at this point */
-	mfspr	r4,SPRN_SPRG_PACA	/* Get PACA */
+	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
 
 	bcl	20,31,$+4
@@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs)
 	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
 	REST_10GPRS(22, r1)		/* ditto */
 
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 
 	ld	r4,_CCR(r1)
 	mtcr	r4
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 9651acc3504..d24d4400cc7 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -17,6 +17,7 @@
 #include <asm/cputable.h>
 #include <asm/setup.h>
 #include <asm/thread_info.h>
+#include <asm/reg_a2.h>
 #include <asm/exception-64e.h>
 #include <asm/bug.h>
 #include <asm/irqflags.h>
@@ -252,9 +253,6 @@ exception_marker:
 	.balign	0x1000
 	.globl interrupt_base_book3e
 interrupt_base_book3e:					/* fake trap */
-	/* Note: If real debug exceptions are supported by the HW, the vector
-	 * below will have to be patched up to point to an appropriate handler
-	 */
 	EXCEPTION_STUB(0x000, machine_check)		/* 0x0200 */
 	EXCEPTION_STUB(0x020, critical_input)		/* 0x0580 */
 	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
@@ -271,8 +269,13 @@ interrupt_base_book3e:					/* fake trap */
 	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
 	EXCEPTION_STUB(0x1c0, data_tlb_miss)
 	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+	EXCEPTION_STUB(0x260, perfmon)
 	EXCEPTION_STUB(0x280, doorbell)
 	EXCEPTION_STUB(0x2a0, doorbell_crit)
+	EXCEPTION_STUB(0x2c0, guest_doorbell)
+	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+	EXCEPTION_STUB(0x300, hypercall)
+	EXCEPTION_STUB(0x320, ehpriv)
 
 	.globl interrupt_end_book3e
 interrupt_end_book3e:
@@ -454,6 +457,70 @@ interrupt_end_book3e:
 kernel_dbg_exc:
 	b	.	/* NYI */
 
+/* Debug exception as a debug interrupt*/
+	START_EXCEPTION(debug_debug);
+	DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the DSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,DBSR_IC@h
+	beq+	1f
+
+	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+	LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,DBSR_IC@h		/* clear the IC event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the DSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_DSRR1,r11
+	lwz	r10,PACA_EXDBG+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXDBG+EX_R1(r13)
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXDBG+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXDBG+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_DBG_SCRATCH
+	rfdi
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r15,SPRN_SPRG_DBG_SCRATCH
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r15
+	mfspr	r14,SPRN_DBSR
+	EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL)
+	std	r14,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r14
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	bl	.save_nvgprs
+	bl	.DebugException
+	b	.ret_from_except
+
+	MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE)
+
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
 
@@ -468,6 +535,11 @@ kernel_dbg_exc:
 //	b	ret_from_crit_except
 	b	.
 
+	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
+
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -587,7 +659,12 @@ fast_exception_return:
 BAD_STACK_TRAMPOLINE(0x000)
 BAD_STACK_TRAMPOLINE(0x100)
 BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
 BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
 BAD_STACK_TRAMPOLINE(0x400)
 BAD_STACK_TRAMPOLINE(0x500)
 BAD_STACK_TRAMPOLINE(0x600)
@@ -864,8 +941,23 @@ have_hes:
 	 * that will have to be made dependent on whether we are running under
 	 * a hypervisor I suppose.
 	 */
-	ori	r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS
-	mtspr	SPRN_MAS0,r3
+
+	/* BEWARE, MAGIC
+	 * This code is called as an ordinary function on the boot CPU. But to
+	 * avoid duplication, this code is also used in SCOM bringup of
+	 * secondary CPUs. We read the code between the initial_tlb_code_start
+	 * and initial_tlb_code_end labels one instruction at a time and RAM it
+	 * into the new core via SCOM. That doesn't process branches, so there
+	 * must be none between those two labels. It also means if this code
+	 * ever takes any parameters, the SCOM code must also be updated to
+	 * provide them.
+	 */
+	.globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
+	ori	r11,r3,MAS0_WQ_ALLWAYS
+	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+	mtspr	SPRN_MAS0,r11
 	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
 	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
 	mtspr	SPRN_MAS1,r3
@@ -879,18 +971,86 @@ have_hes:
 	/* Write the TLB entry */
 	tlbwe
 
+	.globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
 	/* Now we branch the new virtual address mapped by this entry */
 	LOAD_REG_IMMEDIATE(r3,1f)
 	mtctr	r3
 	bctr
 
 1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
-	 * else (XXX we should scan for bolted crap from the firmware too)
+	 * else (including IPROTed things left by firmware)
+	 * r4 = TLBnCFG
+	 * r3 = current address (more or less)
 	 */
+
+	li	r5,0
+	mtspr	SPRN_MAS6,r5
+	tlbsx	0,r3
+
+	rlwinm	r9,r4,0,TLBnCFG_N_ENTRY
+	rlwinm	r10,r4,8,0xff
+	addi	r10,r10,-1	/* Get inner loop mask */
+
+	li	r3,1
+
+	mfspr	r5,SPRN_MAS1
+	rlwinm	r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+	mfspr	r6,SPRN_MAS2
+	rldicr	r6,r6,0,51		/* Extract EPN */
+
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r7,r7,0,0xffff0fff	/* Clear HES and WQ */
+
+	rlwinm	r8,r7,16,0xfff		/* Extract ESEL */
+
+2:	add	r4,r3,r8
+	and	r4,r4,r10
+
+	rlwimi	r7,r4,16,MAS0_ESEL_MASK
+
+	mtspr	SPRN_MAS0,r7
+	mtspr	SPRN_MAS1,r5
+	mtspr	SPRN_MAS2,r6
+	tlbwe
+
+	addi	r3,r3,1
+	and.	r4,r3,r10
+
+	bne	3f
+	addis	r6,r6,(1<<30)@h
+3:
+	cmpw	r3,r9
+	blt	2b
+
+	.globl  a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+	/* Now establish early debug mappings if applicable */
+	/* Restore the MAS0 we used for linear mapping load */
+	mtspr	SPRN_MAS0,r11
+
+	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
+	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
+	mtspr	SPRN_MAS1,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
+	mtspr	SPRN_MAS2,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
+	mtspr	SPRN_MAS7_MAS3,r3
+	/* re-use the MAS8 value from the linear mapping */
+	tlbwe
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
+
 	PPC_TLBILX(0,0,0)
 	sync
 	isync
 
+	.globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
 	/* We translate LR and return */
 	mflr	r3
 	tovirt(r3,r3)
@@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors)
 	sync
 
 	blr
+
+_GLOBAL(setup_perfmon_ivor)
+	SET_IVOR(35, 0x260) /* Performance Monitor */
+	blr
+
+_GLOBAL(setup_doorbell_ivors)
+	SET_IVOR(36, 0x280) /* Processor Doorbell */
+	SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+
+	/* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+	blr
+
+_GLOBAL(setup_ehv_ivors)
+	/*
+	 * We may be running as a guest and lack E.HV even on a chip
+	 * that normally has it.
+	 */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+	SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+	blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index aeb739e1876..a85f4874cba 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -37,23 +37,51 @@
 	.globl __start_interrupts
 __start_interrupts:
 
-	STD_EXCEPTION_PSERIES(0x100, system_reset)
+	.globl system_reset_pSeries;
+system_reset_pSeries:
+	HMT_MEDIUM;
+	DO_KVM	0x100;
+	SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+	/* Running native on arch 2.06 or later, check if we are
+	 * waking up from nap. We only handle no state loss and
+	 * supervisor state loss. We do -not- handle hypervisor
+	 * state loss at this time.
+	 */
+	mfspr	r13,SPRN_SRR1
+	rlwinm	r13,r13,47-31,30,31
+	cmpwi	cr0,r13,1
+	bne	1f
+	b	.power7_wakeup_noloss
+1:	cmpwi	cr0,r13,2
+	bne	1f
+	b	.power7_wakeup_loss
+	/* Total loss of HV state is fatal, we could try to use the
+	 * PIR to locate a PACA, then use an emergency stack etc...
+	 * but for now, let's just stay stuck here
+	 */
+1:	cmpwi	cr0,r13,3
+	beq	.
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206)
+#endif /* CONFIG_PPC_P7_NAP */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	. = 0x200
 _machine_check_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x200
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	SET_SCRATCH0(r13)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 	. = 0x300
 	.globl data_access_pSeries
 data_access_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x300
-	mtspr	SPRN_SPRG_SCRATCH0,r13
+	SET_SCRATCH0(r13)
 BEGIN_FTR_SECTION
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	std	r9,PACA_EXSLB+EX_R9(r13)
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	mfspr	r10,SPRN_DAR
@@ -67,22 +95,22 @@ BEGIN_FTR_SECTION
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r12,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r12)
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(data_access_common)
+	EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD)
 FTR_SECTION_ELSE
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 
 	. = 0x380
 	.globl data_access_slb_pSeries
 data_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x380
-	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_DAR
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -95,7 +123,7 @@ data_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -113,15 +141,15 @@ data_access_slb_pSeries:
 	bctr
 #endif
 
-	STD_EXCEPTION_PSERIES(0x400, instruction_access)
+	STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
 
 	. = 0x480
 	.globl instruction_access_slb_pSeries
 instruction_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x480
-	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	SET_SCRATCH0(r13)
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -134,7 +162,7 @@ instruction_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -147,13 +175,29 @@ instruction_access_slb_pSeries:
 	bctr
 #endif
 
-	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
-	STD_EXCEPTION_PSERIES(0x600, alignment)
-	STD_EXCEPTION_PSERIES(0x700, program_check)
-	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
-	MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
-	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
-	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
+	/* We open code these as we can't have a ". = x" (even with
+	 * x = "." within a feature section
+	 */
+	. = 0x500;
+	.globl hardware_interrupt_pSeries;
+	.globl hardware_interrupt_hv;
+hardware_interrupt_pSeries:
+hardware_interrupt_hv:
+	BEGIN_FTR_SECTION
+		_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
+	FTR_SECTION_ELSE
+		_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
+	ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206)
+
+	STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+	STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+	STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+
+	MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
+	MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer)
+
+	STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
+	STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
 
 	. = 0xc00
 	.globl	system_call_pSeries
@@ -165,13 +209,13 @@ BEGIN_FTR_SECTION
 	beq-	1f
 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	mr	r9,r13
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	mfspr	r11,SPRN_SRR0
-	ld	r12,PACAKBASE(r13)
-	ld	r10,PACAKMSR(r13)
-	LOAD_HANDLER(r12, system_call_entry)
-	mtspr	SPRN_SRR0,r12
 	mfspr	r12,SPRN_SRR1
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10, system_call_entry)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
 	rfid
 	b	.	/* prevent speculative execution */
@@ -183,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	rfid		/* return to userspace */
 	b	.
 
-	STD_EXCEPTION_PSERIES(0xd00, single_step)
-	STD_EXCEPTION_PSERIES(0xe00, trap_0e)
+	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+
+	/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+	 * out of line to handle them
+	 */
+	. = 0xe00
+	b	h_data_storage_hv
+	. = 0xe20
+	b	h_instr_storage_hv
+	. = 0xe40
+	b	emulation_assist_hv
+	. = 0xe50
+	b	hmi_exception_hv
+	. = 0xe60
+	b	hmi_exception_hv
 
 	/* We need to deal with the Altivec unavailable exception
 	 * here which is at 0xf20, thus in the middle of the
@@ -193,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	 */
 performance_monitor_pSeries_1:
 	. = 0xf00
-	DO_KVM	0xf00
 	b	performance_monitor_pSeries
 
 altivec_unavailable_pSeries_1:
 	. = 0xf20
-	DO_KVM	0xf20
 	b	altivec_unavailable_pSeries
 
 vsx_unavailable_pSeries_1:
 	. = 0xf40
-	DO_KVM	0xf40
 	b	vsx_unavailable_pSeries
 
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
+	STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+	STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
+	STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+	STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
+	STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
 #endif /* CONFIG_CBE_RAS */
 
 	. = 0x3000
 
-/*** pSeries interrupt support ***/
+/*** Out of line interrupts support ***/
+
+	/* moved from 0xe00 */
+	STD_EXCEPTION_HV(., 0xe00, h_data_storage)
+	STD_EXCEPTION_HV(., 0xe20, h_instr_storage)
+	STD_EXCEPTION_HV(., 0xe40, emulation_assist)
+	STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */
 
 	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(., performance_monitor)
-	STD_EXCEPTION_PSERIES(., altivec_unavailable)
-	STD_EXCEPTION_PSERIES(., vsx_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+	STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -240,17 +300,30 @@ masked_interrupt:
 	rotldi	r10,r10,16
 	mtspr	SPRN_SRR1,r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	mfspr	r13,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 	rfid
 	b	.
 
+masked_Hinterrupt:
+	stb	r10,PACAHARDIRQEN(r13)
+	mtcrf	0x80,r9
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	mfspr	r10,SPRN_HSRR1
+	rldicl	r10,r10,48,1		/* clear MSR_EE */
+	rotldi	r10,r10,16
+	mtspr	SPRN_HSRR1,r10
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	GET_SCRATCH0(r13)
+	hrfid
+	b	.
+
 	.align	7
 do_stab_bolted_pSeries:
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted)
+	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
 
 #ifdef CONFIG_PPC_PSERIES
 /*
@@ -260,15 +333,15 @@ do_stab_bolted_pSeries:
       .align 7
 system_reset_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	.globl machine_check_fwnmi
       .align 7
 machine_check_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	SET_SCRATCH0(r13)		/* save r13 */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -282,7 +355,7 @@ slb_miss_user_pseries:
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r10,SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	ld	r12,PACA_EXSLB+EX_R3(r13)
 	std	r10,PACA_EXGEN+EX_R13(r13)
@@ -342,6 +415,8 @@ machine_check_common:
 	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
 	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+        STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+        STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
 	STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
 	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
 #ifdef CONFIG_ALTIVEC
@@ -386,9 +461,24 @@ bad_stack:
 	std	r12,_XER(r1)
 	SAVE_GPR(0,r1)
 	SAVE_GPR(2,r1)
-	SAVE_4GPRS(3,r1)
-	SAVE_2GPRS(7,r1)
-	SAVE_10GPRS(12,r1)
+	ld	r10,EX_R3(r3)
+	std	r10,GPR3(r1)
+	SAVE_GPR(4,r1)
+	SAVE_4GPRS(5,r1)
+	ld	r9,EX_R9(r3)
+	ld	r10,EX_R10(r3)
+	SAVE_2GPRS(9,r1)
+	ld	r9,EX_R11(r3)
+	ld	r10,EX_R12(r3)
+	ld	r11,EX_R13(r3)
+	std	r9,GPR11(r1)
+	std	r10,GPR12(r1)
+	std	r11,GPR13(r1)
+BEGIN_FTR_SECTION
+	ld	r10,EX_CFAR(r3)
+	std	r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	SAVE_8GPRS(14,r1)
 	SAVE_10GPRS(22,r1)
 	lhz	r12,PACA_TRAP_SAVE(r13)
 	std	r12,_TRAP(r1)
@@ -397,6 +487,9 @@ bad_stack:
 	li	r12,0
 	std	r12,0(r11)
 	ld	r2,PACATOC(r13)
+	ld	r11,exception_marker@toc(r2)
+	std	r12,RESULT(r1)
+	std	r11,STACK_FRAME_OVERHEAD-16(r1)
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.kernel_bad_stack
 	b	1b
@@ -419,6 +512,19 @@ data_access_common:
 	li	r5,0x300
 	b	.do_hash_page	 	/* Try to handle as hpte fault */
 
+	.align  7
+        .globl  h_data_storage_common
+h_data_storage_common:
+        mfspr   r10,SPRN_HDAR
+        std     r10,PACA_EXGEN+EX_DAR(r13)
+        mfspr   r10,SPRN_HDSISR
+        stw     r10,PACA_EXGEN+EX_DSISR(r13)
+        EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+        bl      .save_nvgprs
+        addi    r3,r1,STACK_FRAME_OVERHEAD
+        bl      .unknown_exception
+        b       .ret_from_except
+
 	.align	7
 	.globl instruction_access_common
 instruction_access_common:
@@ -428,6 +534,8 @@ instruction_access_common:
 	li	r5,0x400
 	b	.do_hash_page		/* Try to handle as hpte fault */
 
+        STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+
 /*
  * Here is the common SLB miss user that is used when going to virtual
  * mode for SLB misses, that is currently not used
@@ -750,7 +858,7 @@ _STATIC(do_hash_page)
 BEGIN_FTR_SECTION
 	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
 	bne-	do_ste_alloc		/* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 
 	clrrdi	r11,r1,THREAD_SHIFT
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index c5c24beb838..ba250d505e0 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -805,19 +805,6 @@ _ENTRY(copy_and_flush)
 	blr
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
-	.globl	__secondary_start_gemini
-__secondary_start_gemini:
-        mfspr   r4,SPRN_HID0
-        ori     r4,r4,HID0_ICFI
-        li      r3,0
-        ori     r3,r3,HID0_ICE
-        andc    r4,r4,r3
-        mtspr   SPRN_HID0,r4
-        sync
-        b       __secondary_start
-#endif /* CONFIG_GEMINI */
-
 	.globl __secondary_start_mpc86xx
 __secondary_start_mpc86xx:
 	mfspr	r3, SPRN_PIR
@@ -890,15 +877,6 @@ __secondary_start:
 	mtspr	SPRN_SRR1,r4
 	SYNC
 	RFI
-
-_GLOBAL(start_secondary_resume)
-	/* Reset stack */
-	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3a319f9c9d3..ba504099844 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -147,6 +147,8 @@ __secondary_hold:
 	mtctr	r4
 	mr	r3,r24
 	li	r4,0
+	/* Make sure that patched code is visible */
+	isync
 	bctr
 #else
 	BUG_OPCODE
@@ -216,19 +218,25 @@ generic_secondary_common_init:
 	 */
 	LOAD_REG_ADDR(r13, paca)	/* Load paca pointer		 */
 	ld	r13,0(r13)		/* Get base vaddr of paca array	 */
+#ifndef CONFIG_SMP
+	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */
+	b	.kexec_wait		/* wait for next kernel if !SMP	 */
+#else
+	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
+	lwz	r7,0(r7)		/* also the max paca allocated 	 */
 	li	r5,0			/* logical cpu id                */
 1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
 	cmpw	r6,r24			/* Compare to our id             */
 	beq	2f
 	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
 	addi	r5,r5,1
-	cmpwi	r5,NR_CPUS
+	cmpw	r5,r7			/* Check if more pacas exist     */
 	blt	1b
 
 	mr	r3,r24			/* not found, copy phys to r3	 */
 	b	.kexec_wait		/* next kernel might do better	 */
 
-2:	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG */
+2:	SET_PACA(r13)
 #ifdef CONFIG_PPC_BOOK3E
 	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
 	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
@@ -236,34 +244,39 @@ generic_secondary_common_init:
 
 	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
-3:	HMT_LOW
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
-					/* start.			 */
-
-#ifndef CONFIG_SMP
-	b	3b			/* Never go on non-SMP		 */
-#else
-	cmpwi	0,r23,0
-	beq	3b			/* Loop until told to go	 */
-
-	sync				/* order paca.run and cur_cpu_spec */
 
 	/* See if we need to call a cpu state restore handler */
 	LOAD_REG_ADDR(r23, cur_cpu_spec)
 	ld	r23,0(r23)
 	ld	r23,CPU_SPEC_RESTORE(r23)
 	cmpdi	0,r23,0
-	beq	4f
+	beq	3f
 	ld	r23,0(r23)
 	mtctr	r23
 	bctrl
 
-4:	/* Create a temp kernel stack for use before relocation is on.	*/
+3:	LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */
+	lwarx	r4,0,r3
+	subi	r4,r4,1
+	stwcx.	r4,0,r3
+	bne	3b
+	isync
+
+4:	HMT_LOW
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
+					/* start.			 */
+	cmpwi	0,r23,0
+	beq	4b			/* Loop until told to go	 */
+
+	sync				/* order paca.run and cur_cpu_spec */
+	isync				/* In case code patching happened */
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
 	b	__secondary_start
-#endif
+#endif /* SMP */
 
 /*
  * Turn the MMU off.
@@ -534,7 +547,7 @@ _GLOBAL(pmac_secondary_start)
 	ld	r4,0(r4)		/* Get base vaddr of paca array	*/
 	mulli	r13,r24,PACA_SIZE	/* Calculate vaddr of right paca */
 	add	r13,r13,r4		/* for this processor.		*/
-	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG*/
+	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
@@ -645,7 +658,7 @@ _GLOBAL(enable_64b_mode)
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
 	mtmsr	r11
 #else /* CONFIG_PPC_BOOK3E */
-	li	r12,(MSR_SF | MSR_ISF)@highest
+	li	r12,(MSR_64BIT | MSR_ISF)@highest
 	sldi	r12,r12,48
 	or	r11,r11,r12
 	mtmsrd	r11
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 00000000000..f8f0bc7f1d4
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,97 @@
+/*
+ *  This file contains the power_save function for 970-family CPUs.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+
+#undef DEBUG
+
+	.text
+
+_GLOBAL(power7_idle)
+	/* Now check if user or arch enabled NAP mode */
+	LOAD_REG_ADDRBASE(r3,powersave_nap)
+	lwz	r4,ADDROFF(powersave_nap)(r3)
+	cmpwi	0,r4,0
+	beqlr
+
+	/* NAP is a state loss, we create a regs frame on the
+	 * stack, fill it up with the state we care about and
+	 * stick a pointer to it in PACAR1. We really only
+	 * need to save PC, some CR bits and the NV GPRs,
+	 * but for now an interrupt frame will do.
+	 */
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-INT_FRAME_SIZE(r1)
+	std	r0,_LINK(r1)
+	std	r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+	/* Make sure FPU, VSX etc... are flushed as we may lose
+	 * state when going to nap mode
+	 */
+	bl	.discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+	/* Hard disable interrupts */
+	mfmsr	r9
+	rldicl	r9,r9,48,1
+	rotldi	r9,r9,16
+	mtmsrd	r9,1			/* hard-disable interrupts */
+	li	r0,0
+	stb	r0,PACASOFTIRQEN(r13)	/* we'll hard-enable shortly */
+	stb	r0,PACAHARDIRQEN(r13)
+
+	/* Continue saving state */
+	SAVE_GPR(2, r1)
+	SAVE_NVGPRS(r1)
+	mfcr	r3
+	std	r3,_CCR(r1)
+	std	r9,_MSR(r1)
+	std	r1,PACAR1(r13)
+
+	/* Magic NAP mode enter sequence */
+	std	r0,0(r1)
+	ptesync
+	ld	r0,0(r1)
+1:	cmp	cr0,r0,r0
+	bne	1b
+	PPC_NAP
+	b	.
+
+_GLOBAL(power7_wakeup_loss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	REST_NVGPRS(r1)
+	REST_GPR(2, r1)
+	ld	r3,_CCR(r1)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtcr	r3
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
+
+_GLOBAL(power7_wakeup_noloss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 5c1118e3194..ffafaea3d26 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -17,8 +17,7 @@
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
 #include <asm/ppc-pci.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
 
 #define IOWA_MAX_BUS	8
 
@@ -145,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
 	return res;
 }
 
-/* Regist new bus to support workaround */
+/* Enable IO workaround */
+static void __devinit io_workaround_init(void)
+{
+	static int io_workaround_inited;
+
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
 void __devinit iowa_register_bus(struct pci_controller *phb,
 			struct ppc_pci_io *ops,
 			int (*initfunc)(struct iowa_bus *, void *), void *data)
@@ -153,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	struct iowa_bus *bus;
 	struct device_node *np = phb->dn;
 
+	io_workaround_init();
+
 	if (iowa_bus_count >= IOWA_MAX_BUS) {
 		pr_err("IOWA:Too many pci bridges, "
 		       "workarounds disabled for %s\n", np->full_name);
@@ -162,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	bus = &iowa_busses[iowa_bus_count];
 	bus->phb = phb;
 	bus->ops = ops;
+	bus->private = data;
 
 	if (initfunc)
 		if ((*initfunc)(bus, data))
@@ -172,14 +186,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
 }
 
-/* enable IO workaround */
-void __devinit io_workaround_init(void)
-{
-	static int io_workaround_inited;
-
-	if (io_workaround_inited)
-		return;
-	ppc_pci_io = iowa_pci_io;
-	ppc_md.ioremap = iowa_ioremap;
-	io_workaround_inited = 1;
-}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f621b7d2d86..a24d37d4cf5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -66,7 +66,6 @@
 #include <asm/ptrace.h>
 #include <asm/machdep.h>
 #include <asm/udbg.h>
-#include <asm/dbell.h>
 #include <asm/smp.h>
 
 #ifdef CONFIG_PPC64
@@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en)
 
 #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP)
 	/* Check for pending doorbell interrupts and resend to ourself */
-	doorbell_check_self();
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		smp_muxed_ipi_resend();
 #endif
 
 	/*
@@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
 void exc_lvl_ctx_init(void)
 {
 	struct thread_info *tp;
-	int i, hw_cpu;
+	int i, cpu_nr;
 
 	for_each_possible_cpu(i) {
-		hw_cpu = get_hard_smp_processor_id(i);
-		memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = critirq_ctx[hw_cpu];
-		tp->cpu = i;
+#ifdef CONFIG_PPC64
+		cpu_nr = i;
+#else
+		cpu_nr = get_hard_smp_processor_id(i);
+#endif
+		memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = critirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
 #ifdef CONFIG_BOOKE
-		memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = dbgirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = dbgirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
-		memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = mcheckirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = mcheckirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = HARDIRQ_OFFSET;
 #endif
 	}
@@ -477,20 +481,41 @@ void do_softirq(void)
  * IRQ controller and virtual interrupts
  */
 
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+
 static LIST_HEAD(irq_hosts);
 static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static unsigned int revmap_trees_allocated;
 static DEFINE_MUTEX(revmap_trees_mutex);
-struct irq_map_entry irq_map[NR_IRQS];
+static struct irq_map_entry irq_map[NR_IRQS];
 static unsigned int irq_virq_count = NR_IRQS;
 static struct irq_host *irq_default_host;
 
+irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
+{
+	return irq_map[d->irq].hwirq;
+}
+EXPORT_SYMBOL_GPL(irqd_to_hwirq);
+
 irq_hw_number_t virq_to_hw(unsigned int virq)
 {
 	return irq_map[virq].hwirq;
 }
 EXPORT_SYMBOL_GPL(virq_to_hw);
 
+bool virq_is_host(unsigned int virq, struct irq_host *host)
+{
+	return irq_map[virq].host == host;
+}
+EXPORT_SYMBOL_GPL(virq_is_host);
+
 static int default_irq_host_match(struct irq_host *h, struct device_node *np)
 {
 	return h->of_node != NULL && h->of_node == np;
@@ -511,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 	/* Allocate structure and revmap table if using linear mapping */
 	if (revmap_type == IRQ_HOST_MAP_LINEAR)
 		size += revmap_arg * sizeof(unsigned int);
-	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	host = kzalloc(size, GFP_KERNEL);
 	if (host == NULL)
 		return NULL;
 
@@ -561,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 			irq_map[i].host = host;
 			smp_wmb();
 
-			/* Clear norequest flags */
-			irq_clear_status_flags(i, IRQ_NOREQUEST);
-
 			/* Legacy flags are left to default at this point,
 			 * one can then use irq_create_mapping() to
 			 * explicitly change them
 			 */
 			ops->map(host, i, i);
+
+			/* Clear norequest flags */
+			irq_clear_status_flags(i, IRQ_NOREQUEST);
 		}
 		break;
 	case IRQ_HOST_MAP_LINEAR:
@@ -579,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 		smp_wmb();
 		host->revmap_data.linear.revmap = rmap;
 		break;
+	case IRQ_HOST_MAP_TREE:
+		INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
+		break;
 	default:
 		break;
 	}
@@ -636,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto error;
 	}
 
-	irq_clear_status_flags(virq, IRQ_NOREQUEST);
-
 	/* map it */
 	smp_wmb();
 	irq_map[virq].hwirq = hwirq;
@@ -648,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto errdesc;
 	}
 
+	irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
 	return 0;
 
 errdesc:
@@ -704,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host,
 	 */
 	virq = irq_find_mapping(host, hwirq);
 	if (virq != NO_IRQ) {
-		if (host->ops->remap)
-			host->ops->remap(host, virq, hwirq);
 		pr_debug("irq: -> existing mapping on virq %d\n", virq);
 		return virq;
 	}
@@ -786,14 +812,15 @@ void irq_dispose_mapping(unsigned int virq)
 		return;
 
 	host = irq_map[virq].host;
-	WARN_ON (host == NULL);
-	if (host == NULL)
+	if (WARN_ON(host == NULL))
 		return;
 
 	/* Never unmap legacy interrupts */
 	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
 		return;
 
+	irq_set_status_flags(virq, IRQ_NOREQUEST);
+
 	/* remove chip and handler */
 	irq_set_chip_and_handler(virq, NULL, NULL);
 
@@ -813,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq)
 			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
 		break;
 	case IRQ_HOST_MAP_TREE:
-		/*
-		 * Check if radix tree allocated yet, if not then nothing to
-		 * remove.
-		 */
-		smp_rmb();
-		if (revmap_trees_allocated < 1)
-			break;
 		mutex_lock(&revmap_trees_mutex);
 		radix_tree_delete(&host->revmap_data.tree, hwirq);
 		mutex_unlock(&revmap_trees_mutex);
@@ -830,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq)
 	smp_mb();
 	irq_map[virq].hwirq = host->inval_irq;
 
-	irq_set_status_flags(virq, IRQ_NOREQUEST);
-
 	irq_free_descs(virq, 1);
 	/* Free it */
 	irq_free_virt(virq, 1);
@@ -877,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 	struct irq_map_entry *ptr;
 	unsigned int virq;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists and has bee initialized.
-	 * If not, we fallback to slow mode
-	 */
-	if (revmap_trees_allocated < 2)
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
 		return irq_find_mapping(host, hwirq);
 
-	/* Now try to resolve */
 	/*
 	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
 	 * as it's referencing an entry in the static irq_map table.
@@ -909,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
-
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists yet.
-	 * If not, then the irq will be inserted into the tree when it gets
-	 * initialized.
-	 */
-	smp_rmb();
-	if (revmap_trees_allocated < 1)
+	if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
 		return;
 
 	if (virq != NO_IRQ) {
@@ -934,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host,
 {
 	unsigned int *revmap;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
+		return irq_find_mapping(host, hwirq);
 
 	/* Check revmap bounds */
 	if (unlikely(hwirq >= host->revmap_data.linear.size))
@@ -1028,53 +1031,6 @@ int arch_early_irq_init(void)
 	return 0;
 }
 
-/* We need to create the radix trees late */
-static int irq_late_init(void)
-{
-	struct irq_host *h;
-	unsigned int i;
-
-	/*
-	 * No mutual exclusion with respect to accessors of the tree is needed
-	 * here as the synchronization is done via the state variable
-	 * revmap_trees_allocated.
-	 */
-	list_for_each_entry(h, &irq_hosts, link) {
-		if (h->revmap_type == IRQ_HOST_MAP_TREE)
-			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
-	}
-
-	/*
-	 * Make sure the radix trees inits are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 1;
-
-	/*
-	 * Insert the reverse mapping for those interrupts already present
-	 * in irq_map[].
-	 */
-	mutex_lock(&revmap_trees_mutex);
-	for (i = 0; i < irq_virq_count; i++) {
-		if (irq_map[i].host &&
-		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
-			radix_tree_insert(&irq_map[i].host->revmap_data.tree,
-					  irq_map[i].hwirq, &irq_map[i]);
-	}
-	mutex_unlock(&revmap_trees_mutex);
-
-	/*
-	 * Make sure the radix trees insertions are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 2;
-
-	return 0;
-}
-arch_initcall(irq_late_init);
-
 #ifdef CONFIG_VIRQ_DEBUG
 static int virq_debug_show(struct seq_file *m, void *private)
 {
@@ -1082,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	struct irq_desc *desc;
 	const char *p;
 	static const char none[] = "none";
+	void *data;
 	int i;
 
-	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
-		      "chip name", "host name");
+	seq_printf(m, "%-5s  %-7s  %-15s  %-18s  %s\n", "virq", "hwirq",
+		      "chip name", "chip data", "host name");
 
 	for (i = 1; i < nr_irqs; i++) {
 		desc = irq_to_desc(i);
@@ -1098,7 +1055,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			struct irq_chip *chip;
 
 			seq_printf(m, "%5d  ", i);
-			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
+			seq_printf(m, "0x%05lx  ", irq_map[i].hwirq);
 
 			chip = irq_desc_get_chip(desc);
 			if (chip && chip->name)
@@ -1107,6 +1064,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
 				p = none;
 			seq_printf(m, "%-15s  ", p);
 
+			data = irq_desc_get_chip_data(desc);
+			seq_printf(m, "0x%16p  ", data);
+
 			if (irq_map[i].host && irq_map[i].host->of_node)
 				p = irq_map[i].host->of_node->full_name;
 			else
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 9411747bbca..76a6e40a6f7 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs)
 #ifdef CONFIG_SMP
 void kgdb_roundup_cpus(unsigned long flags)
 {
-	smp_send_debugger_break(MSG_ALL_BUT_SELF);
+	smp_send_debugger_break();
 }
 #endif
 
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 301db65f05a..84daabe2fcb 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
 /*
  * Methods used to fetch LPAR data when running on a pSeries platform.
  */
-/**
- * h_get_mpp
- * H_GET_MPP hcall returns info in 7 parms
- */
-int h_get_mpp(struct hvcall_mpp_data *mpp_data)
-{
-	int rc;
-	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
-	rc = plpar_hcall9(H_GET_MPP, retbuf);
-
-	mpp_data->entitled_mem = retbuf[0];
-	mpp_data->mapped_mem = retbuf[1];
-
-	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
-	mpp_data->pool_num = retbuf[2] & 0xffff;
-
-	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
-	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
-	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
-
-	mpp_data->pool_size = retbuf[4];
-	mpp_data->loan_request = retbuf[5];
-	mpp_data->backing_mem = retbuf[6];
-
-	return rc;
-}
-EXPORT_SYMBOL(h_get_mpp);
 
 struct hvcall_ppp_data {
 	u64	entitlement;
@@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m)
 	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
 }
 
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
 
@@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		parse_system_parameter_string(m);
 		parse_ppp_data(m);
 		parse_mpp_data(m);
+		parse_mpp_x_data(m);
 		pseries_cmo_data(m);
 		splpar_dispatch_data(m);
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd9821ad..998a1002860 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -694,6 +694,17 @@ _GLOBAL(kernel_thread)
 	addi	r1,r1,16
 	blr
 
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+	/* Reset stack */
+	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r3,0
+	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	start_secondary
+	b	.
+#endif /* CONFIG_SMP */
+	
 /*
  * This routine is just here to keep GCC happy - sigh...
  */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 206a321a71d..e89df59cdc5 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp)
  * wait for the flag to change, indicating this kernel is going away but
  * the slave code for the next one is at addresses 0 to 100.
  *
- * This is used by all slaves.
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
  *
  * Physical (hardware) cpu id should be in r3.
  */
@@ -471,10 +472,6 @@ _GLOBAL(kexec_wait)
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
-	li	r4,KEXEC_STATE_REAL_MODE
-	stb	r4,PACAKEXECSTATE(r13)
-	SYNC
-
 99:	HMT_LOW
 #ifdef CONFIG_KEXEC		/* use no memory without kexec */
 	lwz	r4,0(r5)
@@ -499,11 +496,17 @@ kexec_flag:
  *
  * get phys id from paca
  * switch to real mode
+ * mark the paca as no longer used
  * join other cpus in kexec_wait(phys_id)
  */
 _GLOBAL(kexec_smp_wait)
 	lhz	r3,PACAHWCPUID(r13)
 	bl	real_mode
+
+	li	r4,KEXEC_STATE_REAL_MODE
+	stb	r4,PACAKEXECSTATE(r13)
+	SYNC
+
 	b	.kexec_wait
 
 /*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 10f0aadee95..efeb8818418 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,7 +7,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/threads.h>
+#include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/memblock.h>
 
@@ -156,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 /* Put the paca pointer into r13 and SPRG_PACA */
 void setup_paca(struct paca_struct *new_paca)
 {
+	/* Setup r13 */
 	local_paca = new_paca;
-	mtspr(SPRN_SPRG_PACA, local_paca);
+
 #ifdef CONFIG_PPC_BOOK3E
+	/* On Book3E, initialize the TLB miss exception frames */
 	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+	/* In HV mode, we setup both HPACA and PACA to avoid problems
+	 * if we do a GET_PACA() before the feature fixups have been
+	 * applied
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE_206))
+		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
+	mtspr(SPRN_SPRG_PACA, local_paca);
+
 }
 
 static int __initdata paca_size;
 
 void __init allocate_pacas(void)
 {
-	int nr_cpus, cpu, limit;
+	int cpu, limit;
 
 	/*
 	 * We can't take SLB misses on the paca, and we want to access them
@@ -179,23 +190,18 @@ void __init allocate_pacas(void)
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
-	nr_cpus = NR_CPUS;
-	/* On iSeries we know we can never have more than 64 cpus */
-	if (firmware_has_feature(FW_FEATURE_ISERIES))
-		nr_cpus = min(64, nr_cpus);
-
-	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
+	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
 
 	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
 	memset(paca, 0, paca_size);
 
 	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
-		paca_size, nr_cpus, paca);
+		paca_size, nr_cpu_ids, paca);
 
-	allocate_lppacas(nr_cpus, limit);
+	allocate_lppacas(nr_cpu_ids, limit);
 
 	/* Can't use for_each_*_cpu, as they aren't functional yet */
-	for (cpu = 0; cpu < nr_cpus; cpu++)
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		initialise_paca(&paca[cpu], cpu);
 }
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index d225d99fe39..6baabc13306 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	const u32 *regs;
 	struct pci_dn *pdn;
 
-	pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
 	if (pdn == NULL)
 		return NULL;
-	memset(pdn, 0, sizeof(*pdn));
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ef3ef566235..7d28f540200 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs);
 extern int sys_sigreturn(struct pt_regs *regs);
 
 EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
@@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(copy_4K_page);
-#endif
+EXPORT_SYMBOL(copy_page);
 
 #if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
 EXPORT_SYMBOL(isa_io_base);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f74f355a961..095043d7994 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk)
 /*
  * Copy a thread..
  */
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long unused, struct task_struct *p,
 		struct pt_regs *regs)
@@ -755,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 				_ALIGN_UP(sizeof(struct thread_info), 16);
 
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB)) {
+	if (mmu_has_feature(MMU_FTR_SLB)) {
 		unsigned long sp_vsid;
 		unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
 
-		if (cpu_has_feature(CPU_FTR_1T_SEGMENT))
+		if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
 				<< SLB_VSID_SHIFT_1T;
 		else
@@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		p->thread.ksp_vsid = sp_vsid;
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64 
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		if (current->thread.dscr_inherit) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = current->thread.dscr;
+		} else if (0 != dscr_default) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = dscr_default;
+		} else {
+			p->thread.dscr_inherit = 0;
+			p->thread.dscr = 0;
+		}
+	}
+#endif
 
 	/*
 	 * The PPC64 ABI makes use of a TOC to contain function 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e74fa12afc8..48aeb55faae 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -68,6 +68,7 @@ int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
 u64 ppc64_rma_size;
 #endif
+static phys_addr_t first_memblock_size;
 
 static int __init early_parse_mem(char *p)
 {
@@ -123,18 +124,19 @@ static void __init move_device_tree(void)
  */
 static struct ibm_pa_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
+	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
 	unsigned char	pabyte;		/* byte number in ibm,pa-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-	{0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
-	{0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{CPU_FTR_SLB, 0,		0, 2, 0},
-	{CPU_FTR_CTRL, 0,		0, 3, 0},
-	{CPU_FTR_NOEXECUTE, 0,		0, 6, 0},
-	{CPU_FTR_NODSISRALIGN, 0,	1, 1, 1},
-	{CPU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
+	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
+	{0, MMU_FTR_SLB, 0,		0, 2, 0},
+	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
+	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
 	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 };
 
@@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
 		if (bit ^ fp->invert) {
 			cur_cpu_spec->cpu_features |= fp->cpu_features;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features |= fp->mmu_features;
 		} else {
 			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
 			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
 		}
 	}
 }
@@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	static int logical_cpuid = 0;
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	const u32 *prop;
 	const u32 *intserv;
 	int i, nthreads;
 	unsigned long len;
-	int found = 0;
+	int found = -1;
+	int found_thread = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * version 2 of the kexec param format adds the phys cpuid of
 		 * booted proc.
 		 */
-		if (initial_boot_params && initial_boot_params->version >= 2) {
-			if (intserv[i] ==
-					initial_boot_params->boot_cpuid_phys) {
-				found = 1;
-				break;
+		if (initial_boot_params->version >= 2) {
+			if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
+				found = boot_cpu_count;
+				found_thread = i;
 			}
 		} else {
 			/*
@@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 			 * off secondary threads.
 			 */
 			if (of_get_flat_dt_prop(node,
-					"linux,boot-cpu", NULL) != NULL) {
-				found = 1;
-				break;
-			}
+					"linux,boot-cpu", NULL) != NULL)
+				found = boot_cpu_count;
 		}
-
 #ifdef CONFIG_SMP
 		/* logical cpu id is always 0 on UP kernels */
-		logical_cpuid++;
+		boot_cpu_count++;
 #endif
 	}
 
-	if (found) {
-		DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
-			intserv[i]);
-		boot_cpuid = logical_cpuid;
-		set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+	if (found >= 0) {
+		DBG("boot cpu: logical %d physical %d\n", found,
+			intserv[found_thread]);
+		boot_cpuid = found;
+		set_hard_smp_processor_id(found, intserv[found_thread]);
 
 		/*
 		 * PAPR defines "logical" PVR values for cpus that
@@ -509,11 +509,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 			size = 0x80000000ul - base;
 	}
 #endif
-
-	/* First MEMBLOCK added, do some special initializations */
-	if (memstart_addr == ~(phys_addr_t)0)
-		setup_initial_memory_limit(base, size);
-	memstart_addr = min((u64)memstart_addr, base);
+	/* Keep track of the beginning of memory -and- the size of
+	 * the very first block in the device-tree as it represents
+	 * the RMA on ppc64 server
+	 */
+	if (base < memstart_addr) {
+		memstart_addr = base;
+		first_memblock_size = size;
+	}
 
 	/* Add the chunk to the MEMBLOCK list */
 	memblock_add(base, size);
@@ -698,6 +701,7 @@ void __init early_init_devtree(void *params)
 
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+	setup_initial_memory_limit(memstart_addr, first_memblock_size);
 
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 941ff4dbc56..c016033ba78 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...)
 	const char *p, *q, *s;
 	va_list args;
 	unsigned long v;
+	long vs;
 	struct prom_t *_prom = &RELOC(prom);
 
 	va_start(args, format);
@@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...)
 			v = va_arg(args, unsigned long);
 			prom_print_hex(v);
 			break;
+		case 'd':
+			++q;
+			vs = va_arg(args, int);
+			if (vs < 0) {
+				prom_print(RELOC("-"));
+				vs = -vs;
+			}
+			prom_print_dec(vs);
+			break;
 		case 'l':
 			++q;
-			if (*q == 'u') { /* '%lu' */
+			if (*q == 0)
+				break;
+			else if (*q == 'x') {
+				++q;
+				v = va_arg(args, unsigned long);
+				prom_print_hex(v);
+			} else if (*q == 'u') { /* '%lu' */
 				++q;
 				v = va_arg(args, unsigned long);
 				prom_print_dec(v);
+			} else if (*q == 'd') { /* %ld */
+				++q;
+				vs = va_arg(args, long);
+				if (vs < 0) {
+					prom_print(RELOC("-"));
+					vs = -vs;
+				}
+				prom_print_dec(vs);
 			}
 			break;
 		}
@@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void)
 #endif /* CONFIG_PCI_MSI */
 #ifdef CONFIG_PPC_SMLPAR
 #define OV5_CMO			0x80	/* Cooperative Memory Overcommitment */
+#define OV5_XCMO			0x40	/* Page Coalescing */
 #else
 #define OV5_CMO			0x00
+#define OV5_XCMO			0x00
 #endif
 #define OV5_TYPE1_AFFINITY	0x80	/* Type 1 NUMA affinity */
 
@@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = {
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
 	0,
-	OV5_CMO,
+	OV5_CMO | OV5_XCMO,
 	OV5_TYPE1_AFFINITY,
 	0,
 	0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 55613e33e26..a6ae1cfad86 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(task) < 0)
+		return -ESRCH;
+
 	bp = thread->ptrace_bps[0];
 	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
 		if (bp) {
 			unregister_hw_breakpoint(bp);
 			thread->ptrace_bps[0] = NULL;
 		}
+		ptrace_put_breakpoints(task);
 		return 0;
 	}
 	if (bp) {
@@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 					(DABR_DATA_WRITE | DABR_DATA_READ),
 							&attr.bp_type);
 		ret =  modify_user_hw_breakpoint(bp, &attr);
-		if (ret)
+		if (ret) {
+			ptrace_put_breakpoints(task);
 			return ret;
+		}
 		thread->ptrace_bps[0] = bp;
+		ptrace_put_breakpoints(task);
 		thread->dabr = data;
 		return 0;
 	}
@@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 							ptrace_triggered, task);
 	if (IS_ERR(bp)) {
 		thread->ptrace_bps[0] = NULL;
+		ptrace_put_breakpoints(task);
 		return PTR_ERR(bp);
 	}
 
+	ptrace_put_breakpoints(task);
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	/* Move contents to the DABR register */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 2097f2b3cba..271ff6318ed 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,6 +42,7 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
+#include <asm/pSeries_reconfig.h>
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -494,7 +495,7 @@ unsigned int rtas_busy_delay(int status)
 
 	might_sleep();
 	ms = rtas_busy_delay_time(status);
-	if (ms)
+	if (ms && need_resched())
 		msleep(ms);
 
 	return ms;
@@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
 
 	atomic_set(&data->error, rc);
 	start_topology_update();
+	pSeries_coalesce_init();
 
 	if (wake_when_done) {
 		atomic_set(&data->done, 1);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 21f30cb6807..79fca2651b6 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	int i;
 
 	threads_per_core = tpc;
-	threads_core_mask = CPU_MASK_NONE;
+	cpumask_clear(&threads_core_mask);
 
 	/* This implementation only supports power of 2 number of threads
 	 * for simplicity and performance
@@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	BUG_ON(tpc != (1 << threads_shift));
 
 	for (i = 0; i < tpc; i++)
-		cpu_set(i, threads_core_mask);
+		cpumask_set_cpu(i, &threads_core_mask);
 
 	printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
 	       tpc, tpc > 1 ? "s" : "");
@@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
  *                  cpu_present_mask
  *
  * Having the possible map set up early allows us to restrict allocations
- * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
  *
  * We do not initialize the online map here; cpus set their own bits in
  * cpu_online_mask as they come up.
@@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
-	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
+	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
 		const int *intserv;
 		int j, len;
 
@@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void)
 				intserv = &cpu;	/* assume logical == phys */
 		}
 
-		for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
+		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, intserv[j]);
 			set_cpu_present(cpu, true);
@@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void)
 		if (cpu_has_feature(CPU_FTR_SMT))
 			maxcpus *= nthreads;
 
-		if (maxcpus > NR_CPUS) {
+		if (maxcpus > nr_cpu_ids) {
 			printk(KERN_WARNING
 			       "Partition configured for %d cpus, "
 			       "operating system maximum is %d.\n",
-			       maxcpus, NR_CPUS);
-			maxcpus = NR_CPUS;
+			       maxcpus, nr_cpu_ids);
+			maxcpus = nr_cpu_ids;
 		} else
 			printk(KERN_INFO "Partition configured for %d cpus.\n",
 			       maxcpus);
@@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void)
 	cpu_init_thread_core_maps(nthreads);
 
 	/* Now that possible cpus are set, set nr_cpu_ids for later use */
-	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+	setup_nr_cpu_ids();
 
 	free_unused_pacas();
 }
@@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port)
 		 * name instead */
 		if (!np)
 			np = of_find_node_by_name(NULL, "8042");
+		if (np) {
+			of_i8042_kbd_irq = 1;
+			of_i8042_aux_irq = 12;
+		}
 		break;
 	case FDC_BASE: /* FDC1 */
 		np = of_find_node_by_type(NULL, "fdc");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1d2fbc90530..620d792b52e 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys);
 
 int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_cpu_count;
 int boot_cpuid_phys;
 
 int smp_hw_index[NR_CPUS];
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5a0401fcaeb..a88bf2713d4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -62,6 +62,7 @@
 #include <asm/udbg.h>
 #include <asm/kexec.h>
 #include <asm/mmu_context.h>
+#include <asm/code-patching.h>
 
 #include "setup.h"
 
@@ -72,6 +73,7 @@
 #endif
 
 int boot_cpuid = 0;
+int __initdata boot_cpu_count;
 u64 ppc64_pft_size;
 
 /* Pick defaults since we might want to patch instructions
@@ -233,6 +235,7 @@ void early_setup_secondary(void)
 void smp_release_cpus(void)
 {
 	unsigned long *ptr;
+	int i;
 
 	DBG(" -> smp_release_cpus()\n");
 
@@ -245,7 +248,16 @@ void smp_release_cpus(void)
 	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
 			- PHYSICAL_START);
 	*ptr = __pa(generic_secondary_smp_init);
-	mb();
+
+	/* And wait a bit for them to catch up */
+	for (i = 0; i < 100000; i++) {
+		mb();
+		HMT_low();
+		if (boot_cpu_count == 0)
+			break;
+		udelay(1);
+	}
+	DBG("boot_cpu_count = %d\n", boot_cpu_count);
 
 	DBG(" <- smp_release_cpus()\n");
 }
@@ -423,17 +435,30 @@ void __init setup_system(void)
 	DBG(" <- setup_system()\n");
 }
 
-static u64 slb0_limit(void)
+/* This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause a TLB or SLB miss. This is
+ * used to allocate interrupt or emergency stacks for which our
+ * exception entry path doesn't deal with being interrupted.
+ */
+static u64 safe_stack_limit(void)
 {
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+#ifdef CONFIG_PPC_BOOK3E
+	/* Freescale BookE bolts the entire linear mapping */
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		return linear_map_top;
+	/* Other BookE, we assume the first GB is bolted */
+	return 1ul << 30;
+#else
+	/* BookS, the first segment is bolted */
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return 1UL << SID_SHIFT_1T;
-	}
 	return 1UL << SID_SHIFT;
+#endif
 }
 
 static void __init irqstack_early_init(void)
 {
-	u64 limit = slb0_limit();
+	u64 limit = safe_stack_limit();
 	unsigned int i;
 
 	/*
@@ -453,6 +478,9 @@ static void __init irqstack_early_init(void)
 #ifdef CONFIG_PPC_BOOK3E
 static void __init exc_lvl_early_init(void)
 {
+	extern unsigned int interrupt_base_book3e;
+	extern unsigned int exc_debug_debug_book3e;
+
 	unsigned int i;
 
 	for_each_possible_cpu(i) {
@@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void)
 		mcheckirq_ctx[i] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
+
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+		patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
+			     (unsigned long)&exc_debug_debug_book3e, 0);
 }
 #else
 #define exc_lvl_early_init()
@@ -486,7 +518,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), ppc64_rma_size);
+	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 27c4a4584f8..da989fff19c 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -381,7 +381,7 @@ badframe:
 	       regs, uc, &uc->uc_mcontext);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "rt_sigreturn",
 			(long)uc, regs->nip, regs->link);
 
@@ -469,7 +469,7 @@ badframe:
 	       regs, frame, newsp);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "setup_rt_frame",
 			(long)frame, regs->nip, regs->link);
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b..4a6f2ec7e76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1;
 static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
-void __devinit smp_generic_kick_cpu(int nr)
+int __devinit smp_generic_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
@@ -106,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr)
 	 */
 	paca[nr].cpu_start = 1;
 	smp_mb();
-}
-#endif
 
-void smp_message_recv(int msg)
-{
-	switch(msg) {
-	case PPC_MSG_CALL_FUNCTION:
-		generic_smp_call_function_interrupt();
-		break;
-	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
-		break;
-	case PPC_MSG_CALL_FUNC_SINGLE:
-		generic_smp_call_function_single_interrupt();
-		break;
-	case PPC_MSG_DEBUGGER_BREAK:
-		if (crash_ipi_function_ptr) {
-			crash_ipi_function_ptr(get_irq_regs());
-			break;
-		}
-#ifdef CONFIG_DEBUGGER
-		debugger_ipi(get_irq_regs());
-		break;
-#endif /* CONFIG_DEBUGGER */
-		/* FALLTHROUGH */
-	default:
-		printk("SMP %d: smp_message_recv(): unknown msg %d\n",
-		       smp_processor_id(), msg);
-		break;
-	}
+	return 0;
 }
+#endif
 
 static irqreturn_t call_function_action(int irq, void *data)
 {
@@ -146,7 +119,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
 static irqreturn_t reschedule_action(int irq, void *data)
 {
-	/* we just need the return path side effect of checking need_resched */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -156,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t debug_ipi_action(int irq, void *data)
+irqreturn_t debug_ipi_action(int irq, void *data)
 {
-	smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+	if (crash_ipi_function_ptr) {
+		crash_ipi_function_ptr(get_irq_regs());
+		return IRQ_HANDLED;
+	}
+
+#ifdef CONFIG_DEBUGGER
+	debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
 	return IRQ_HANDLED;
 }
 
@@ -197,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg)
 	return err;
 }
 
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+	int messages;			/* current messages */
+	unsigned long data;		/* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+	info->data = data;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+	char *message = (char *)&info->messages;
+
+	message[msg] = 1;
+	mb();
+	smp_ops->cause_ipi(cpu, info->data);
+}
+
+void smp_muxed_ipi_resend(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+
+	if (info->messages)
+		smp_ops->cause_ipi(smp_processor_id(), info->data);
+}
+
+irqreturn_t smp_ipi_demux(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+	unsigned int all;
+
+	mb();	/* order any irq clear */
+
+	do {
+		all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+			generic_smp_call_function_interrupt();
+		if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+			scheduler_ipi();
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+			generic_smp_call_function_single_interrupt();
+		if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+			debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
+#endif
+	} while (info->messages);
+
+	return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
+
 void smp_send_reschedule(int cpu)
 {
 	if (likely(smp_ops))
@@ -216,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 		smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
 
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
 {
-	if (likely(smp_ops))
-		smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+	int cpu;
+	int me = raw_smp_processor_id();
+
+	if (unlikely(!smp_ops))
+		return;
+
+	for_each_online_cpu(cpu)
+		if (cpu != me)
+			smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
 }
 #endif
 
@@ -228,9 +276,9 @@ void smp_send_debugger_break(int cpu)
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 	crash_ipi_function_ptr = crash_ipi_callback;
-	if (crash_ipi_callback && smp_ops) {
+	if (crash_ipi_callback) {
 		mb();
-		smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
+		smp_send_debugger_break();
 	}
 }
 #endif
@@ -410,8 +458,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int rc, c;
 
-	secondary_ti = current_set[cpu];
-
 	if (smp_ops == NULL ||
 	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
 		return -EINVAL;
@@ -421,6 +467,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	if (rc)
 		return rc;
 
+	secondary_ti = current_set[cpu];
+
 	/* Make sure callin-map entry is 0 (can be leftover a CPU
 	 * hotplug
 	 */
@@ -434,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	/* wake up cpus */
 	DBG("smp: kicking cpu %d\n", cpu);
-	smp_ops->kick_cpu(cpu);
+	rc = smp_ops->kick_cpu(cpu);
+	if (rc) {
+		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+		return rc;
+	}
 
 	/*
 	 * wait to see if the cpu made a callin (is actually up).
@@ -507,7 +559,7 @@ int cpu_first_thread_of_core(int core)
 }
 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
-/* Must be called when no change can occur to cpu_present_map,
+/* Must be called when no change can occur to cpu_present_mask,
  * i.e. during cpu online or offline.
  */
 static struct device_node *cpu_to_l2cache(int cpu)
@@ -608,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
-	cpumask_copy(old_mask, &current->cpus_allowed);
+	cpumask_copy(old_mask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c0d8c2006bf..f0f2199e64e 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
 static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
 static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+
+unsigned long dscr_default = 0;
+EXPORT_SYMBOL(dscr_default);
+
+static ssize_t show_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx\n", dscr_default);
+}
+
+static ssize_t __used store_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int ret = 0;
+	
+	ret = sscanf(buf, "%lx", &val);
+	if (ret != 1)
+		return -EINVAL;
+	dscr_default = val;
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+		show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+	int err = 0;
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			&attr_dscr_default.attr);
+}
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
@@ -617,6 +652,9 @@ static int __init topology_init(void)
 		if (cpu_online(cpu))
 			register_cpu_online(cpu);
 	}
+#ifdef CONFIG_PPC64
+	sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc15..b13306b0d92 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err)
 #endif
 		printk("%s\n", ppc_md.name ? ppc_md.name : "");
 
-		sysfs_printk_last_file();
 		if (notify_die(DIE_OOPS, str, regs, err, 255,
 			       SIGSEGV) == NOTIFY_STOP)
 			return 1;
@@ -199,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	} else if (show_unhandled_signals &&
 		    unhandled_signal(current, signr) &&
 		    printk_ratelimit()) {
-			printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+			printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				current->comm, current->pid, signr,
 				addr, regs->nip, regs->link, code);
 		}
@@ -221,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs)
 	}
 
 #ifdef CONFIG_KEXEC
-	cpu_set(smp_processor_id(), cpus_in_sr);
+	cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
 #endif
 
 	die("System Reset", regs, SIGABRT);
@@ -909,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs)
 		return emulate_isel(regs, instword);
 	}
 
+#ifdef CONFIG_PPC64
+	/* Emulate the mfspr rD, DSCR. */
+	if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mfdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_DSCR);
+		return 0;
+	}
+	/* Emulate the mtspr DSCR, rD. */
+	if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mtdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		mtspr(SPRN_DSCR, regs->gpr[rd]);
+		current->thread.dscr_inherit = 1;
+		return 0;
+	}
+#endif
+
 	return -EINVAL;
 }
 
@@ -1506,6 +1525,10 @@ struct ppc_emulated ppc_emulated = {
 #ifdef CONFIG_VSX
 	WARN_EMULATED_SETUP(vsx),
 #endif
+#ifdef CONFIG_PPC64
+	WARN_EMULATED_SETUP(mfdscr),
+	WARN_EMULATED_SETUP(mtdscr),
+#endif
 };
 
 u32 ppc_warn_emulated;
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index e39cad83c88..23d65abbedc 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,6 +62,8 @@ void __init udbg_early_init(void)
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
+	udbg_init_wsp();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index baa33a7517b..6837f839ab7 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <asm/udbg.h>
 #include <asm/io.h>
+#include <asm/reg_a2.h>
 
 extern u8 real_readb(volatile u8 __iomem  *addr);
 extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void)
 	udbg_getc_poll = NULL;
 }
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+static void udbg_wsp_flush(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+			/* wait for idle */;
+	}
+}
+
+static void udbg_wsp_putc(char c)
+{
+	if (udbg_comport) {
+		if (c == '\n')
+			udbg_wsp_putc('\r');
+		udbg_wsp_flush();
+		writeb(c, &udbg_comport->thr); eieio();
+	}
+}
+
+static int udbg_wsp_getc(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
+			; /* wait for char */
+		return readb(&udbg_comport->rbr);
+	}
+	return -1;
+}
+
+static int udbg_wsp_getc_poll(void)
+{
+	if (udbg_comport)
+		if (readb(&udbg_comport->lsr) & LSR_DR)
+			return readb(&udbg_comport->rbr);
+	return -1;
+}
+
+void __init udbg_init_wsp(void)
+{
+	udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+
+	udbg_init_uart(udbg_comport, 57600, 50000000);
+
+	udbg_putc = udbg_wsp_putc;
+	udbg_flush = udbg_wsp_flush;
+	udbg_getc = udbg_wsp_getc;
+	udbg_getc_poll = udbg_wsp_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 9de6f396cf8..4d5a3edff49 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec)
 	MTMSRD(r5)			/* enable use of VMX now */
 	isync
 	PPC_LCMPI	0,r3,0
-	beqlr-				/* if no previous owner, done */
+	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 74d0e742114..da3a1225c0a 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 65ea083a5b2..549bb2c9a47 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
 	}
 
-	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
 	return emulated;
 }
 
@@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
 	}
 
-	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
 	return emulated;
 }
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c961de40c67..0f95b5cce03 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
-	return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 2b9c9088d00..1a1b34487e7 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -35,9 +35,7 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
-#define LOAD_SHADOW_VCPU(reg)				\
-	mfspr	reg, SPRN_SPRG_PACA
-
+#define LOAD_SHADOW_VCPU(reg)	GET_PACA(reg)					
 #define SHADOW_VCPU_OFF		PACA_KVM_SVCPU
 #define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
@@ -72,7 +70,7 @@
 .global kvmppc_trampoline_\intno
 kvmppc_trampoline_\intno:
 
-	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
+	SET_SCRATCH0(r13)		/* Save r13 */
 
 	/*
 	 * First thing to do is to find out if we're coming
@@ -91,7 +89,7 @@ kvmppc_trampoline_\intno:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
+	GET_SCRATCH0(r13)			/* r13 = original r13 */
 	b	kvmppc_resume_\intno		/* Get back original handler */
 
 	/* Now we know we're handling a KVM guest */
@@ -114,6 +112,9 @@ INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+#ifdef CONFIG_PPC_BOOK3S_64
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL_HV
+#endif
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
@@ -158,7 +159,7 @@ kvmppc_handler_skip_ins:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 
 	/* And get back into the code */
 	RFI
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 7c52ed0b705..451264274b8 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -155,14 +155,20 @@ kvmppc_handler_trampoline_exit:
 	PPC_LL	r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
 
 	/* Save guest PC and MSR */
-	mfsrr0	r3
+	andi.	r0,r12,0x2
+	beq	1f
+	mfspr	r3,SPRN_HSRR0
+	mfspr	r4,SPRN_HSRR1
+	andi.	r12,r12,0x3ffd
+	b	2f
+1:	mfsrr0	r3
 	mfsrr1	r4
-
+2:
 	PPC_STL	r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
 	PPC_STL	r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
 
 	/* Get scratch'ed off registers */
-	mfspr	r9, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r9)
 	PPC_LL	r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
 	lwz	r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ef76acb455c..8462b3a1c1c 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.shared->srr0 = regs->srr0;
 	vcpu->arch.shared->srr1 = regs->srr1;
+	kvmppc_set_pid(vcpu, regs->pid);
 	vcpu->arch.shared->sprg0 = regs->sprg0;
 	vcpu->arch.shared->sprg1 = regs->sprg1;
 	vcpu->arch.shared->sprg2 = regs->sprg2;
@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+static void get_sregs_base(struct kvm_vcpu *vcpu,
+                           struct kvm_sregs *sregs)
+{
+	u64 tb = get_tb();
+
+	sregs->u.e.features |= KVM_SREGS_E_BASE;
+
+	sregs->u.e.csrr0 = vcpu->arch.csrr0;
+	sregs->u.e.csrr1 = vcpu->arch.csrr1;
+	sregs->u.e.mcsr = vcpu->arch.mcsr;
+	sregs->u.e.esr = vcpu->arch.esr;
+	sregs->u.e.dear = vcpu->arch.shared->dar;
+	sregs->u.e.tsr = vcpu->arch.tsr;
+	sregs->u.e.tcr = vcpu->arch.tcr;
+	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
+	sregs->u.e.tb = tb;
+	sregs->u.e.vrsave = vcpu->arch.vrsave;
+}
+
+static int set_sregs_base(struct kvm_vcpu *vcpu,
+                          struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
+		return 0;
+
+	vcpu->arch.csrr0 = sregs->u.e.csrr0;
+	vcpu->arch.csrr1 = sregs->u.e.csrr1;
+	vcpu->arch.mcsr = sregs->u.e.mcsr;
+	vcpu->arch.esr = sregs->u.e.esr;
+	vcpu->arch.shared->dar = sregs->u.e.dear;
+	vcpu->arch.vrsave = sregs->u.e.vrsave;
+	vcpu->arch.tcr = sregs->u.e.tcr;
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
+		vcpu->arch.dec = sregs->u.e.dec;
+
+	kvmppc_emulate_dec(vcpu);
+
+	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		/*
+		 * FIXME: existing KVM timer handling is incomplete.
+		 * TSR cannot be read by the guest, and its value in
+		 * vcpu->arch is always zero.  For now, just handle
+		 * the case where the caller is trying to inject a
+		 * decrementer interrupt.
+		 */
+
+		if ((sregs->u.e.tsr & TSR_DIS) &&
+		    (vcpu->arch.tcr & TCR_DIE))
+			kvmppc_core_queue_dec(vcpu);
+	}
+
+	return 0;
+}
+
+static void get_sregs_arch206(struct kvm_vcpu *vcpu,
+                              struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206;
+
+	sregs->u.e.pir = 0;
+	sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
+	sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
+	sregs->u.e.decar = vcpu->arch.decar;
+	sregs->u.e.ivpr = vcpu->arch.ivpr;
+}
+
+static int set_sregs_arch206(struct kvm_vcpu *vcpu,
+                             struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
+		return 0;
+
+	if (sregs->u.e.pir != 0)
+		return -EINVAL;
+
+	vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
+	vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
+	vcpu->arch.decar = sregs->u.e.decar;
+	vcpu->arch.ivpr = sregs->u.e.ivpr;
+
+	return 0;
+}
+
+void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	sregs->u.e.features |= KVM_SREGS_E_IVOR;
+
+	sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+	sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+	sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+	sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+	sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+	sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+	sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+	sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+	sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+	sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+	sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+	sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+	sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+	sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+	sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+	sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+}
+
+int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
+	vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	sregs->pvr = vcpu->arch.pvr;
+
+	get_sregs_base(vcpu, sregs);
+	get_sregs_arch206(vcpu, sregs);
+	kvmppc_core_get_sregs(vcpu, sregs);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
 {
-	return -ENOTSUPP;
+	int ret;
+
+	if (vcpu->arch.pvr != sregs->pvr)
+		return -EINVAL;
+
+	ret = set_sregs_base(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	ret = set_sregs_arch206(vcpu, sregs);
+	if (ret < 0)
+		return ret;
+
+	return kvmppc_core_set_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 1cc471faac2..b58ccae9590 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -380,7 +380,6 @@ lightweight_exit:
 	 * because host interrupt handlers would get confused. */
 	lwz	r1, VCPU_GPR(r1)(r4)
 
-	/* XXX handle USPRG0 */
 	/* Host interrupt handlers may have clobbered these guest-readable
 	 * SPRGs, so we need to reload them here with the guest's values. */
 	lwz	r3, VCPU_SPRG4(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e3768ee9b59..318dbc61ba4 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	/* Registers init */
 	vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu_e500->svr = mfspr(SPRN_SVR);
 
 	/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
 	vcpu->vcpu_id = 0;
@@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
+	                       KVM_SREGS_E_PM;
+	sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
+
+	sregs->u.e.impl.fsl.features = 0;
+	sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
+	sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
+	sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
+
+	sregs->u.e.mas0 = vcpu_e500->mas0;
+	sregs->u.e.mas1 = vcpu_e500->mas1;
+	sregs->u.e.mas2 = vcpu_e500->mas2;
+	sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
+	sregs->u.e.mas4 = vcpu_e500->mas4;
+	sregs->u.e.mas6 = vcpu_e500->mas6;
+
+	sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
+	sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
+	sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
+	sregs->u.e.tlbcfg[2] = 0;
+	sregs->u.e.tlbcfg[3] = 0;
+
+	sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+	sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+	sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+	sregs->u.e.ivor_high[3] =
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+
+	kvmppc_get_sregs_ivor(vcpu, sregs);
+}
+
+int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
+		vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
+		vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
+		vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
+		vcpu_e500->mas0 = sregs->u.e.mas0;
+		vcpu_e500->mas1 = sregs->u.e.mas1;
+		vcpu_e500->mas2 = sregs->u.e.mas2;
+		vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
+		vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
+		vcpu_e500->mas4 = sregs->u.e.mas4;
+		vcpu_e500->mas6 = sregs->u.e.mas6;
+	}
+
+	if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
+		return 0;
+
+	if (sregs->u.e.features & KVM_SREGS_E_SPE) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
+			sregs->u.e.ivor_high[0];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
+			sregs->u.e.ivor_high[1];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
+			sregs->u.e.ivor_high[2];
+	}
+
+	if (sregs->u.e.features & KVM_SREGS_E_PM) {
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
+			sregs->u.e.ivor_high[3];
+	}
+
+	return kvmppc_set_sregs_ivor(vcpu, sregs);
+}
+
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8e3edfbc963..69cd665a0ca 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, <yu.liu@freescale.com>
  *
@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 	switch (sprn) {
 	case SPRN_PID:
-		vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
-			vcpu->arch.pid = spr_val;
+		kvmppc_set_pid(vcpu, spr_val);
 		break;
 	case SPRN_PID1:
 		vcpu_e500->pid[1] = spr_val; break;
@@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
 	case SPRN_HID1:
 		kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
+	case SPRN_SVR:
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
 
 	case SPRN_MMUCSR0:
 		kvmppc_set_gpr(vcpu, rt, 0); break;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index d6d6d47a75a..b18fe353397 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, yu.liu@freescale.com
  *
@@ -24,6 +24,7 @@
 #include "../mm/mmu_decl.h"
 #include "e500_tlb.h"
 #include "trace.h"
+#include "timing.h"
 
 #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
 
@@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
 		vcpu_e500->mas7 = 0;
 	}
 
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
 	return EMULATE_DONE;
 }
 
@@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 		write_host_tlbe(vcpu_e500, stlbsel, sesel);
 	}
 
+	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
 	return EMULATE_DONE;
 }
 
@@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
 	return -1;
 }
 
+void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+	vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+		vcpu->arch.pid = pid;
+}
+
 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	struct tlbe *tlbe;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c64fd2909bb..141dce3c681 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 	}
 }
 
+u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
+{
+	u64 jd = tb - vcpu->arch.dec_jiffies;
+	return vcpu->arch.dec - jd;
+}
+
 /* XXX to do:
  * lhax
  * lhaux
@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 			case SPRN_DEC:
 			{
-				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
-				pr_debug("mfDEC: %x - %llx = %lx\n",
-					 vcpu->arch.dec, jd,
-					 kvmppc_get_gpr(vcpu, rt));
+				kvmppc_set_gpr(vcpu, rt,
+					       kvmppc_get_dec(vcpu, get_tb()));
 				break;
 			}
 			default:
@@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				}
 				break;
 			}
+			kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
 			break;
 
 		case OP_31_XOP_STHX:
@@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 					printk("mtspr: unknown spr %x\n", sprn);
 				break;
 			}
+			kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
 			break;
 
 		case OP_31_XOP_DCBI:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 99758460efd..616dd516ca1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
 	int r;
 
 	switch (ext) {
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_SREGS:
+#else
 	case KVM_CAP_PPC_SEGSTATE:
+#endif
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
@@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
 
+#ifdef CONFIG_KVM_EXIT_TIMING
+	mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+
 	return 0;
 }
 
@@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+#ifdef CONFIG_BOOKE
+	/*
+	 * vrsave (formerly usprg0) isn't used by Linux, but may
+	 * be used by the guest.
+	 *
+	 * On non-booke this is associated with Altivec and
+	 * is handled by code in book3s.c.
+	 */
+	mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
 	kvmppc_core_vcpu_load(vcpu, cpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	kvmppc_core_vcpu_put(vcpu);
+#ifdef CONFIG_BOOKE
+	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index a021f5827a3..319177df958 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	/* pause guest execution to avoid concurrent updates */
-	mutex_lock(&vcpu->mutex);
+	/* Take a lock to avoid concurrent updates */
+	mutex_lock(&vcpu->arch.exit_timing_lock);
 
 	vcpu->arch.last_exit_type = 0xDEAD;
 	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
 	vcpu->arch.timing_exit.tv64 = 0;
 	vcpu->arch.timing_last_enter.tv64 = 0;
 
-	mutex_unlock(&vcpu->mutex);
+	mutex_unlock(&vcpu->arch.exit_timing_lock);
 }
 
 static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
@@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
 		return;
 	}
 
+	mutex_lock(&vcpu->arch.exit_timing_lock);
+
 	vcpu->arch.timing_count_type[type]++;
 
 	/* sum */
@@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
 		vcpu->arch.timing_min_duration[type] = duration;
 	if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
 		vcpu->arch.timing_max_duration[type] = duration;
+
+	mutex_unlock(&vcpu->arch.exit_timing_lock);
 }
 
 void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
@@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
 {
 	struct kvm_vcpu *vcpu = m->private;
 	int i;
+	u64 min, max, sum, sum_quad;
 
 	seq_printf(m, "%s", "type	count	min	max	sum	sum_squared\n");
 
+
 	for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
+
+		min = vcpu->arch.timing_min_duration[i];
+		do_div(min, tb_ticks_per_usec);
+		max = vcpu->arch.timing_max_duration[i];
+		do_div(max, tb_ticks_per_usec);
+		sum = vcpu->arch.timing_sum_duration[i];
+		do_div(sum, tb_ticks_per_usec);
+		sum_quad = vcpu->arch.timing_sum_quad_duration[i];
+		do_div(sum_quad, tb_ticks_per_usec);
+
 		seq_printf(m, "%12s	%10d	%10lld	%10lld	%20lld	%20lld\n",
 			kvm_exit_names[i],
 			vcpu->arch.timing_count_type[i],
-			vcpu->arch.timing_min_duration[i],
-			vcpu->arch.timing_max_duration[i],
-			vcpu->arch.timing_sum_duration[i],
-			vcpu->arch.timing_sum_quad_duration[i]);
+			min,
+			max,
+			sum,
+			sum_quad);
+
 	}
 	return 0;
 }
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index f53e09c7dac..13b676c20d1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -6,14 +6,6 @@
 
 #include <asm/system.h>
 
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	if (mem_init_done)
-		return kmalloc(size, mask);
-	else
-		return alloc_bootmem(size);
-}
-
 void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4d4eeb90048..53dcb6b1b70 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -6,6 +6,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
@@ -15,9 +16,9 @@ PPC64_CACHES:
         .tc             ppc64_caches[TC],ppc64_caches
         .section        ".text"
 
-
-_GLOBAL(copy_4K_page)
-	li	r5,4096		/* 4K page size */
+_GLOBAL(copy_page)
+	lis	r5,PAGE_SIZE@h
+	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
 	ld      r10,PPC64_CACHES@toc(r2)
 	lwz	r11,DCACHEL1LOGLINESIZE(r10)	/* log2 of cache line size */
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
index deac4d30daf..e91615abae6 100644
--- a/arch/powerpc/lib/devres.c
+++ b/arch/powerpc/lib/devres.c
@@ -9,11 +9,11 @@
 
 #include <linux/device.h>	/* devres_*(), devm_ioremap_release() */
 #include <linux/gfp.h>
-#include <linux/io.h>		/* ioremap_flags() */
+#include <linux/io.h>		/* ioremap_prot() */
 #include <linux/module.h>	/* EXPORT_SYMBOL() */
 
 /**
- * devm_ioremap_prot - Managed ioremap_flags()
+ * devm_ioremap_prot - Managed ioremap_prot()
  * @dev: Generic device to remap IO address for
  * @offset: BUS offset to map
  * @size: Size of map
@@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
 	if (!ptr)
 		return NULL;
 
-	addr = ioremap_flags(offset, size, flags);
+	addr = ioremap_prot(offset, size, flags);
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ae5189ab004..9a52349874e 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
+#include <linux/prefetch.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -45,6 +46,18 @@ extern int do_stxvd2x(int rn, unsigned long ea);
 #endif
 
 /*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+{
+#ifdef __powerpc64__
+	if ((msr & MSR_64BIT) == 0)
+		val &= 0xffffffffUL;
+#endif
+	return val;
+}
+
+/*
  * Determine whether a conditional branch instruction would branch.
  */
 static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
@@ -90,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs
 		if (instr & 0x04000000)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 #ifdef __powerpc64__
@@ -113,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
 		if ((instr & 3) == 1)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 #endif /* __powerpc64 */
 
@@ -136,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
 		if (do_update)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 /*
@@ -466,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd)
 
 	regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
+	if (!(regs->msr & MSR_64BIT))
 		val = (int) val;
 #endif
 	if (val < 0)
@@ -487,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
 		++val;
 	regs->gpr[rd] = val;
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF)) {
+	if (!(regs->msr & MSR_64BIT)) {
 		val = (unsigned int) val;
 		val1 = (unsigned int) val1;
 	}
@@ -570,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		if ((instr & 2) == 0)
 			imm += regs->nip;
 		regs->nip += 4;
-		if ((regs->msr & MSR_SF) == 0)
-			regs->nip &= 0xffffffffUL;
+		regs->nip = truncate_if_32bit(regs->msr, regs->nip);
 		if (instr & 1)
 			regs->link = regs->nip;
 		if (branch_taken(instr, regs))
@@ -604,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			imm -= 0x04000000;
 		if ((instr & 2) == 0)
 			imm += regs->nip;
-		if (instr & 1) {
-			regs->link = regs->nip + 4;
-			if ((regs->msr & MSR_SF) == 0)
-				regs->link &= 0xffffffffUL;
-		}
-		if ((regs->msr & MSR_SF) == 0)
-			imm &= 0xffffffffUL;
+		if (instr & 1)
+			regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
+		imm = truncate_if_32bit(regs->msr, imm);
 		regs->nip = imm;
 		return 1;
 	case 19:
@@ -618,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		case 16:	/* bclr */
 		case 528:	/* bcctr */
 			imm = (instr & 0x400)? regs->ctr: regs->link;
-			regs->nip += 4;
-			if ((regs->msr & MSR_SF) == 0) {
-				regs->nip &= 0xffffffffUL;
-				imm &= 0xffffffffUL;
-			}
+			regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+			imm = truncate_if_32bit(regs->msr, imm);
 			if (instr & 1)
 				regs->link = regs->nip;
 			if (branch_taken(instr, regs))
@@ -1616,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		return 0;	/* invoke DSI if -EFAULT? */
 	}
  instr_done:
-	regs->nip += 4;
-#ifdef __powerpc64__
-	if ((regs->msr & MSR_SF) == 0)
-		regs->nip &= 0xffffffffUL;
-#endif
+	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
 	return 1;
 
  logical_done:
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 5b7dd4ea02b..a242b5d7cbe 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
@@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
 	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
@@ -715,7 +715,7 @@ BEGIN_FTR_SECTION
 	andi.	r0,r31,_PAGE_NO_CACHE
 	/* If so, bail out and refault as a 4k page */
 	bne-	ht64_bail_ok
-END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
 	/* Prepare new PTE value (turn access RW into DIRTY, then
 	 * add BUSY and ACCESSED)
 	 */
@@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 784a400e078..dfd764896db 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	case MMU_PAGE_4K:
 		va &= ~0xffful;
 		va |= ssize << 8;
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	default:
@@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 		va |= penc << 12;
 		va |= ssize << 8;
 		va |= 1; /* L */
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	}
@@ -98,8 +96,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
 
 static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 {
-	unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 	if (use_local)
 		use_local = mmu_psize_defs[psize].tlbiel;
@@ -503,7 +501,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		} pte_iterate_hashed_end();
 	}
 
-	if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
 	    mmu_psize_defs[psize].tlbiel && local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
@@ -517,7 +515,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		}
 		asm volatile("ptesync":::"memory");
 	} else {
-		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 58a022d0f46..26b2872b3d0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -53,6 +53,7 @@
 #include <asm/sections.h>
 #include <asm/spu.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -258,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 	for (; size >= 4; size -= 4, ++prop) {
 		if (prop[0] == 40) {
 			DBG("1T segment support detected\n");
-			cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT;
+			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
 			return 1;
 		}
 	}
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
 	return 0;
 }
 
@@ -288,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 	if (prop != NULL) {
 		DBG("Page sizes from device-tree:\n");
 		size /= 4;
-		cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
 			unsigned int shift = prop[0];
 			unsigned int slbenc = prop[1];
@@ -316,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 				break;
 			case 0x18:
 				idx = MMU_PAGE_16M;
-				cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+				cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
 				break;
 			case 0x22:
 				idx = MMU_PAGE_16G;
@@ -411,7 +412,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (mmu_has_feature(MMU_FTR_16M_PAGE))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
@@ -441,7 +442,7 @@ static void __init htab_init_page_sizes(void)
 		mmu_vmalloc_psize = MMU_PAGE_64K;
 		if (mmu_linear_psize == MMU_PAGE_4K)
 			mmu_linear_psize = MMU_PAGE_64K;
-		if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) {
+		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
 			/*
 			 * Don't use 64k pages for ioremap on pSeries, since
 			 * that would stop us accessing the HEA ethernet.
@@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static inline void make_bl(unsigned int *insn_addr, void *func)
-{
-	unsigned long funcp = *((unsigned long *)func);
-	int offset = funcp - (unsigned long)insn_addr;
-
-	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
-}
+#define FUNCTION_TEXT(A)	((*(unsigned long *)(A)))
 
 static void __init htab_finish_init(void)
 {
@@ -570,16 +563,33 @@ static void __init htab_finish_init(void)
 	extern unsigned int *ht64_call_hpte_remove;
 	extern unsigned int *ht64_call_hpte_updatepp;
 
-	make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(ht64_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
+
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 
-	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(htab_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
 }
 
 static void __init htab_initialize(void)
@@ -598,7 +608,7 @@ static void __init htab_initialize(void)
 	/* Initialize page sizes */
 	htab_init_page_sizes();
 
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		mmu_kernel_ssize = MMU_SEGSIZE_1T;
 		mmu_highuser_ssize = MMU_SEGSIZE_1T;
 		printk(KERN_INFO "Using 1TB segments\n");
@@ -739,7 +749,7 @@ void __init early_init_mmu(void)
 
 	/* Initialize stab / SLB management except on iSeries
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else if (!firmware_has_feature(FW_FEATURE_ISERIES))
 		stab_initialize(get_paca()->stab_real);
@@ -756,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void)
 	 * in real mode on pSeries and we want a virtual address on
 	 * iSeries anyway
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else
 		stab_initialize(get_paca()->stab_addr);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9bb249c3046..0b9a5c1901b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void)
 {
 	int psize;
 
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return -ENODEV;
 
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 2535828aa84..3bafc3deca6 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -20,9 +20,205 @@
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_PPC_ICSWX
+/*
+ * The processor and its L2 cache cause the icswx instruction to
+ * generate a COP_REQ transaction on PowerBus. The transaction has
+ * no address, and the processor does not perform an MMU access
+ * to authenticate the transaction. The command portion of the
+ * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
+ * the coprocessor Process ID (PID), which the coprocessor compares
+ * to the authorized LPID and PID held in the coprocessor, to determine
+ * if the process is authorized to generate the transaction.
+ * The data of the COP_REQ transaction is 128-byte or less and is
+ * placed in cacheable memory on a 128-byte cache line boundary.
+ *
+ * The task to use a coprocessor should use use_cop() to allocate
+ * a coprocessor PID before executing icswx instruction. use_cop()
+ * also enables the coprocessor context switching. Drop_cop() is
+ * used to free the coprocessor PID.
+ *
+ * Example:
+ * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
+ * Each HFI have multiple windows. Each HFI window serves as a
+ * network device sending to and receiving from HFI network.
+ * HFI immediate send function uses icswx instruction. The immediate
+ * send function allows small (single cache-line) packets be sent
+ * without using the regular HFI send FIFO and doorbell, which are
+ * much slower than immediate send.
+ *
+ * For each task intending to use HFI immediate send, the HFI driver
+ * calls use_cop() to obtain a coprocessor PID for the task.
+ * The HFI driver then allocate a free HFI window and save the
+ * coprocessor PID to the HFI window to allow the task to use the
+ * HFI window.
+ *
+ * The HFI driver repeatedly creates immediate send packets and
+ * issues icswx instruction to send data through the HFI window.
+ * The HFI compares the coprocessor PID in the CPU PID register
+ * to the PID held in the HFI window to determine if the transaction
+ * is allowed.
+ *
+ * When the task to release the HFI window, the HFI driver calls
+ * drop_cop() to release the coprocessor PID.
+ */
+
+#define COP_PID_NONE 0
+#define COP_PID_MIN (COP_PID_NONE + 1)
+#define COP_PID_MAX (0xFFFF)
+
+static DEFINE_SPINLOCK(mmu_context_acop_lock);
+static DEFINE_IDA(cop_ida);
+
+void switch_cop(struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.cop_pid);
+	mtspr(SPRN_ACOP, next->context.acop);
+}
+
+static int new_cop_pid(struct ida *ida, int min_id, int max_id,
+		       spinlock_t *lock)
+{
+	int index;
+	int err;
+
+again:
+	if (!ida_pre_get(ida, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(lock);
+	err = ida_get_new_above(ida, min_id, &index);
+	spin_unlock(lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > max_id) {
+		spin_lock(lock);
+		ida_remove(ida, index);
+		spin_unlock(lock);
+		return -ENOMEM;
+	}
+
+	return index;
+}
+
+static void sync_cop(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (mm == current->active_mm)
+		switch_cop(current->active_mm);
+}
+
+/**
+ * Start using a coprocessor.
+ * @acop: mask of coprocessor to be used.
+ * @mm: The mm the coprocessor to associate with. Most likely current mm.
+ *
+ * Return a positive PID if successful. Negative errno otherwise.
+ * The returned PID will be fed to the coprocessor to determine if an
+ * icswx transaction is authenticated.
+ */
+int use_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return -ENODEV;
+
+	if (!mm || !acop)
+		return -EINVAL;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	if (mm->context.cop_pid == COP_PID_NONE) {
+		ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
+				  &mmu_context_acop_lock);
+		if (ret < 0)
+			goto out;
+
+		mm->context.cop_pid = ret;
+	}
+	mm->context.acop |= acop;
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	ret = mm->context.cop_pid;
+
+out:
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(use_cop);
+
+/**
+ * Stop using a coprocessor.
+ * @acop: mask of coprocessor to be stopped.
+ * @mm: The mm the coprocessor associated with.
+ */
+void drop_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int free_pid = COP_PID_NONE;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return;
+
+	if (WARN_ON_ONCE(!mm))
+		return;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	mm->context.acop &= ~acop;
+
+	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
+		free_pid = mm->context.cop_pid;
+		mm->context.cop_pid = COP_PID_NONE;
+	}
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	if (free_pid != COP_PID_NONE) {
+		spin_lock(&mmu_context_acop_lock);
+		ida_remove(&cop_ida, free_pid);
+		spin_unlock(&mmu_context_acop_lock);
+	}
+
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(drop_cop);
+
+#endif /* CONFIG_PPC_ICSWX */
+
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
@@ -31,7 +227,6 @@ static DEFINE_IDA(mmu_context_ida);
  * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
  * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
  */
-#define NO_CONTEXT	0
 #define MAX_CONTEXT	((1UL << 19) - 1)
 
 int __init_new_context(void)
@@ -79,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 		slice_set_user_psize(mm, mmu_virtual_psize);
 	subpage_prot_init_new_context(mm);
 	mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (!mm->context.cop_lockp) {
+		__destroy_context(index);
+		subpage_prot_free(mm);
+		mm->context.id = MMU_NO_CONTEXT;
+		return -ENOMEM;
+	}
+	spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
 
 	return 0;
 }
@@ -93,7 +298,12 @@ EXPORT_SYMBOL_GPL(__destroy_context);
 
 void destroy_context(struct mm_struct *mm)
 {
+#ifdef CONFIG_PPC_ICSWX
+	drop_cop(mm->context.acop, mm);
+	kfree(mm->context.cop_lockp);
+	mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
 	__destroy_context(mm->context.id);
 	subpage_prot_free(mm);
-	mm->context.id = NO_CONTEXT;
+	mm->context.id = MMU_NO_CONTEXT;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index c0aab52da3a..336807de550 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 		return NOTIFY_OK;
 
 	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
 		pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
 		stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
@@ -407,7 +409,17 @@ void __init mmu_context_init(void)
 	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
 		first_context = 1;
 		last_context = 65535;
-	} else {
+	} else
+#ifdef CONFIG_PPC_BOOK3E_MMU
+	if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
+		u32 mmucfg = mfspr(SPRN_MMUCFG);
+		u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
+				>> MMUCFG_PIDSIZE_SHIFT;
+		first_context = 1;
+		last_context = (1UL << (pid_bits + 1)) - 1;
+	} else
+#endif
+	{
 		first_context = 1;
 		last_context = 255;
 	}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ec1dad2a19..2164006fe17 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
 static int __init find_min_common_depth(void)
 {
 	int depth;
-	struct device_node *rtas_root;
 	struct device_node *chosen;
+	struct device_node *root;
 	const char *vec5;
 
-	rtas_root = of_find_node_by_path("/rtas");
-
-	if (!rtas_root)
-		return -1;
+	root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
 
 	/*
 	 * This property is a set of 32-bit integers, each representing
@@ -332,7 +331,7 @@ static int __init find_min_common_depth(void)
 	 * NUMA boundary and the following are progressively less significant
 	 * boundaries. There can be more than one level of NUMA.
 	 */
-	distance_ref_points = of_get_property(rtas_root,
+	distance_ref_points = of_get_property(root,
 					"ibm,associativity-reference-points",
 					&distance_ref_points_depth);
 
@@ -376,11 +375,11 @@ static int __init find_min_common_depth(void)
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return depth;
 
 err:
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return -1;
 }
 
@@ -1453,7 +1452,7 @@ int arch_update_cpu_topology(void)
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	struct sys_device *sysdev;
 
-	for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+	for_each_cpu(cpu,&cpu_associativity_changes_mask) {
 		vphn_get_associativity(cpu, associativity);
 		nid = associativity_to_nid(associativity);
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8dc41c0157f..51f87956f8f 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -133,7 +133,15 @@ ioremap(phys_addr_t addr, unsigned long size)
 EXPORT_SYMBOL(ioremap);
 
 void __iomem *
-ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
+ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *
+ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
 	/* writeable implies dirty for kernel addresses */
 	if (flags & _PAGE_RW)
@@ -152,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
 
 	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_prot);
 
 void __iomem *
 __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 88927a05cdc..6e595f6496d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -255,7 +255,17 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 	return __ioremap_caller(addr, size, flags, caller);
 }
 
-void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
+void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	unsigned long flags = _PAGE_NO_CACHE;
+	void *caller = __builtin_return_address(0);
+
+	if (ppc_md.ioremap)
+		return ppc_md.ioremap(addr, size, flags, caller);
+	return __ioremap_caller(addr, size, flags, caller);
+}
+
+void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 			     unsigned long flags)
 {
 	void *caller = __builtin_return_address(0);
@@ -311,7 +321,8 @@ void iounmap(volatile void __iomem *token)
 }
 
 EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_wc);
+EXPORT_SYMBOL(ioremap_prot);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(__ioremap_at);
 EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 1d98ecc8eec..e22276cb67a 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -24,6 +24,7 @@
 #include <asm/firmware.h>
 #include <linux/compiler.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 
 extern void slb_allocate_realmode(unsigned long ea);
@@ -166,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
 	int esid_1t_count;
 
 	/* System is not 1T segment size capable. */
-	if (!cpu_has_feature(CPU_FTR_1T_SEGMENT))
+	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return (GET_ESID(addr1) == GET_ESID(addr2));
 
 	esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
@@ -201,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	hard_irq_disable();
 	offset = get_paca()->slb_cache_ptr;
-	if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
+	if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
 	    offset <= SLB_CACHE_ENTRIES) {
 		int i;
 		asm volatile("isync" : : : "memory");
@@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 static inline void patch_slb_encoding(unsigned int *insn_addr,
 				      unsigned int immed)
 {
-	*insn_addr = (*insn_addr & 0xffff0000) | immed;
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
+	int insn = (*insn_addr & 0xffff0000) | immed;
+	patch_instruction(insn_addr, insn);
 }
 
 void slb_set_size(u16 size)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 95ce3558169..ef653dc95b6 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 1:
@@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 6:
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 0:	/* user address: proto-VSID = context << 15 | ESID. First check
@@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
 	ld	r9,PACACONTEXTID(r13)
 BEGIN_FTR_SECTION
 	cmpldi	r10,0x1000
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	rldimi	r10,r9,USER_ESID_BITS,0
 BEGIN_FTR_SECTION
 	bge	slb_finish_load_1T
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load
 
 8:	/* invalid EA */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 446a01842a7..41e31642a86 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -243,7 +243,7 @@ void __init stabs_alloc(void)
 {
 	int cpu;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		return;
 
 	for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index aa46e9d1e77..19395f18b1d 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
 	mpic_setup_this_cpu();
 }
 
-static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
 {
 	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
 	const u64 *spin_table_addr_prop;
@@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 					       NULL);
 	if (spin_table_addr_prop == NULL) {
 		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
-		return;
+		return -ENOENT;
 	}
 
 	/* Assume it's mapped as part of the linear mapping. This is a bit
@@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 	smp_wmb();
 	spin_table[1] = __pa(start_secondary_47x);
 	mb();
+
+	return 0;
 }
 
 static struct smp_ops_t iss_smp_ops = {
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index cfc4b200998..9f09319352c 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq)
 static void
 cpld_mask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
@@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d)
 static void
 cpld_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
@@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 	status |= (ignore | mask);
 
 	if (status == 0xff)
-		return NO_IRQ_IGNORE;
+		return NO_IRQ;
 
 	cpld_irq = ffz(status) + offset;
 
@@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
 {
 	irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
 		&cpld_regs->pci_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
 
 	irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
 		&cpld_regs->misc_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 57a6a349e93..96f85e5e0cd 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq);
+	val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
@@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq));
+	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 1dd15400f6f..1a9a4957057 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
  */
 static void mpc52xx_extirq_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 27-l2irq);
 }
 
 static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	u32 ctrl_reg, type;
-	int irq;
-	int l2irq;
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	void *handler = handle_level_irq;
 
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
-	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type);
+	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+		(int) irqd_to_hwirq(d), l2irq, flow_type);
 
 	switch (flow_type) {
 	case IRQF_TRIGGER_HIGH: type = 0; break;
@@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
 
 static void mpc52xx_main_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->main_mask, 16 - l2irq);
 }
 
 static void mpc52xx_main_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->main_mask, 16 - l2irq);
 }
 
@@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = {
  */
 static void mpc52xx_periph_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->per_mask, 31 - l2irq);
 }
 
 static void mpc52xx_periph_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->per_mask, 31 - l2irq);
 }
 
@@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = {
  */
 static void mpc52xx_sdma_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	out_be32(&sdma->IntPend, 1 << l2irq);
 }
 
@@ -539,7 +486,7 @@ void __init mpc52xx_init_irq(void)
 unsigned int mpc52xx_get_irq(void)
 {
 	u32 status;
-	int irq = NO_IRQ_IGNORE;
+	int irq;
 
 	status = in_be32(&intr->enc_status);
 	if (status & 0x00000400) {	/* critical */
@@ -562,6 +509,8 @@ unsigned int mpc52xx_get_irq(void)
 		} else {
 			irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
 		}
+	} else {
+		return NO_IRQ;
 	}
 
 	return irq_linear_revmap(mpc52xx_irqhost, irq);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 4a4eb6ffa12..8ccf9ed62fe 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -42,7 +42,7 @@ struct pq2ads_pci_pic {
 static void pq2ads_pci_mask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
@@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d)
 static void pq2ads_pci_unmask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
@@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void pci_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* remove chip and handler */
-	irq_set_chip_data(virq, NULL);
-	irq_set_chip(virq, NULL);
-}
-
 static struct irq_host_ops pci_pic_host_ops = {
 	.map = pci_pic_host_map,
-	.unmap = pci_host_unmap,
 };
 
 int __init pq2ads_pci_init_irq(void)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cf..104faa8aa23 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
 	struct pmc_type *type;
 	int ret = 0;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
-	type = ofdev->dev.of_match->data;
+	type = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 0d00ff9d05a..d6a93a10c0f 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -41,7 +41,7 @@ extern void __early_start(void);
 #define NUM_BOOT_ENTRY		8
 #define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
 
-static void __init
+static int __init
 smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
@@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr)
 
 	if (cpu_rel_addr == NULL) {
 		printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
-		return;
+		return -ENOENT;
 	}
 
 	/*
@@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr)
 		iounmap(bptr_vaddr);
 
 	pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
+
+	return 0;
 }
 
 static void __init
@@ -233,8 +235,10 @@ void __init mpc85xx_smp_init(void)
 		smp_85xx_ops.message_pass = smp_mpic_message_pass;
 	}
 
-	if (cpu_has_feature(CPU_FTR_DBELL))
-		smp_85xx_ops.message_pass = doorbell_message_pass;
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass;
+		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+	}
 
 	BUG_ON(!smp_85xx_ops.message_pass);
 
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index db864623b4a..12cb9bb2cc6 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
 	[8] = {0, IRQ_TYPE_LEVEL_HIGH},
 };
 
-#define socrates_fpga_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
 
 static void __iomem *socrates_fpga_pic_iobase;
@@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void socrates_fpga_pic_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq, irq_line;
+	unsigned int irq_line, hwirq = irqd_to_hwirq(d);
 	uint32_t mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d)
 static void socrates_fpga_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d)
 static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 static void socrates_fpga_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d)
 static void socrates_fpga_pic_eoi(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d,
 		unsigned int flow_type)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int polarity;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
 		return -EINVAL;
 
diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c
index 0beec7d5566..94594e58594 100644
--- a/arch/powerpc/platforms/86xx/gef_pic.c
+++ b/arch/powerpc/platforms/86xx/gef_pic.c
@@ -46,8 +46,6 @@
 #define GEF_PIC_CPU0_MCP_MASK	GEF_PIC_MCP_MASK(0)
 #define GEF_PIC_CPU1_MCP_MASK	GEF_PIC_MCP_MASK(1)
 
-#define gef_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 
 static DEFINE_RAW_SPINLOCK(gef_pic_lock);
 
@@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void gef_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask &= ~(1 << hwirq);
@@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d)
 static void gef_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask |= (1 << hwirq);
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 018cc67be42..a896511690c 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void)
 		return;
 	}
 
-	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL);
+	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
 	if (ret) {
 		pr_err("%s: can't request pixis event IRQ: %d\n",
 		       __func__, ret);
@@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
-static u32 get_busfreq(void)
-{
-	struct device_node *node;
-
-	u32 fs_busfreq = 0;
-	node = of_find_node_by_type(NULL, "cpu");
-	if (node) {
-		unsigned int size;
-		const unsigned int *prop =
-			of_get_property(node, "bus-frequency", &size);
-		if (prop)
-			fs_busfreq = *prop;
-		of_node_put(node);
-	};
-	return fs_busfreq;
-}
+/*
+ * DIU Area Descriptor
+ *
+ * The MPC8610 reference manual shows the bits of the AD register in
+ * little-endian order, which causes the BLUE_C field to be split into two
+ * parts. To simplify the definition of the MAKE_AD() macro, we define the
+ * fields in big-endian order and byte-swap the result.
+ *
+ * So even though the registers don't look like they're in the
+ * same bit positions as they are on the P1022, the same value is written to
+ * the AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
 
 unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
 						int monitor_port)
 {
 	static const unsigned long pixelformat[][3] = {
-		{0x88882317, 0x88083218, 0x65052119},
-		{0x88883316, 0x88082219, 0x65053118},
+		{
+			MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
+		},
+		{
+			MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
+		},
 	};
-	unsigned int pix_fmt, arch_monitor;
+	unsigned int arch_monitor;
 
+	/* The DVI port is mis-wired on revision 1 of this board. */
 	arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
-		/* DVI port for board version 0x01 */
-
-	if (bits_per_pixel == 32)
-		pix_fmt = pixelformat[arch_monitor][0];
-	else if (bits_per_pixel == 24)
-		pix_fmt = pixelformat[arch_monitor][1];
-	else if (bits_per_pixel == 16)
-		pix_fmt = pixelformat[arch_monitor][2];
-	else
-		pix_fmt = pixelformat[1][0];
-
-	return pix_fmt;
+
+	switch (bits_per_pixel) {
+	case 32:
+		return pixelformat[arch_monitor][0];
+	case 24:
+		return pixelformat[arch_monitor][1];
+	case 16:
+		return pixelformat[arch_monitor][2];
+	default:
+		pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
+		return 0;
+	}
 }
 
 void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
@@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
 	}
 
 	/* Pixel Clock configuration */
-	pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
-	speed_ccb = get_busfreq();
+	speed_ccb = fsl_get_sys_freq();
 
 	/* Calculate the pixel clock with the smallest error */
 	/* calculate the following in steps to avoid overflow */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index eacea0e3fcc..af09baee22c 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -56,7 +56,7 @@ smp_86xx_release_core(int nr)
 }
 
 
-static void __init
+static int __init
 smp_86xx_kick_cpu(int nr)
 {
 	unsigned int save_vector;
@@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr)
 	unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
 
 	if (nr < 0 || nr >= NR_CPUS)
-		return;
+		return -ENOENT;
 
 	pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
 
@@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr)
 	local_irq_restore(flags);
 
 	pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+	return 0;
 }
 
 
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 9ecce995dd4..1e121088826 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void)
 	 */
 	cpu = of_find_node_by_type(NULL, "cpu");
 	virq= irq_of_parse_and_map(cpu, 0);
-	irq = irq_map[virq].hwirq;
+	irq = virq_to_hw(virq);
 
 	sys_tmr2 = immr_map(im_sit);
 	out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b07720aa3..f970ca2b180 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"
@@ -56,16 +57,19 @@ config UDBG_RTAS_CONSOLE
 	depends on PPC_RTAS
 	default n
 
+config PPC_SMP_MUXED_IPI
+	bool
+	help
+	  Select this opton if your platform supports SMP and your
+	  interrupt controller provides less than 4 interrupts to each
+	  cpu.	This will enable the generic code to multiplex the 4
+	  messages on to one ipi.
+
 config PPC_UDBG_BEAT
 	bool "BEAT based debug console"
 	depends on PPC_CELLEB
 	default n
 
-config XICS
-	depends on PPC_PSERIES
-	bool
-	default y
-
 config IPIC
 	bool
 	default n
@@ -147,14 +151,27 @@ config PPC_970_NAP
 	bool
 	default n
 
+config PPC_P7_NAP
+	bool
+	default n
+
 config PPC_INDIRECT_IO
 	bool
 	select GENERIC_IOMAP
-	default n
+
+config PPC_INDIRECT_PIO
+	bool
+	select PPC_INDIRECT_IO
+
+config PPC_INDIRECT_MMIO
+	bool
+	select PPC_INDIRECT_IO
+
+config PPC_IO_WORKAROUNDS
+	bool
 
 config GENERIC_IOMAP
 	bool
-	default n
 
 source "drivers/cpufreq/Kconfig"
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 111138c55f9..2165b65876f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@ config PPC_BOOK3S_64
 config PPC_BOOK3E_64
 	bool "Embedded processors"
 	select PPC_FPU # Make it a choice ?
+	select PPC_SMP_MUXED_IPI
 
 endchoice
 
@@ -107,6 +108,10 @@ config POWER4
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
+config PPC_A2
+	bool
+	depends on PPC_BOOK3E_64
+
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -174,6 +179,7 @@ config FSL_BOOKE
 config PPC_FSL_BOOK3E
 	bool
 	select FSL_EMB_PERFMON
+	select PPC_SMP_MUXED_IPI
 	default y if FSL_BOOKE
 
 config PTE_64BIT
@@ -226,6 +232,24 @@ config VSX
 
 	  If in doubt, say Y here.
 
+config PPC_ICSWX
+	bool "Support for PowerPC icswx coprocessor instruction"
+	depends on POWER4
+	default n
+	---help---
+
+	  This option enables kernel support for the PowerPC Initiate
+	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+	  or newer processors.
+
+	  This option is only useful if you have a processor that supports
+	  the icswx coprocessor instruction. It does not have any effect
+	  on processors without the icswx coprocessor instruction.
+
+	  This option slightly increases kernel memory usage.
+
+	  If in doubt, say N here.
+
 config SPE
 	bool "SPE Support"
 	depends on E200 || (E500 && !PPC_E500MC)
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index fdb9f0b0d7a..73e2116cfee 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
+obj-$(CONFIG_PPC_WSP)		+= wsp/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 81239ebed83..67d5009b4e8 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -6,7 +6,8 @@ config PPC_CELL_COMMON
 	bool
 	select PPC_CELL
 	select PPC_DCR_MMIO
-	select PPC_INDIRECT_IO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_NATIVE
 	select PPC_RTAS
 	select IRQ_EDGE_EOI_HANDLER
@@ -15,6 +16,7 @@ config PPC_CELL_NATIVE
 	bool
 	select PPC_CELL_COMMON
 	select MPIC
+	select PPC_IO_WORKAROUNDS
 	select IBM_NEW_EMAC_EMAC4
 	select IBM_NEW_EMAC_RGMII
 	select IBM_NEW_EMAC_ZMII #test only
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 83fafe92264..a4a89350bcf 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
 
 obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
-					   pmu.o io-workarounds.o spider-pci.o
+					   pmu.o spider-pci.o
 obj-$(CONFIG_CBE_RAS)			+= ras.o
 
 obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
@@ -39,11 +39,10 @@ obj-y					+= celleb_setup.o \
 					   celleb_pci.o celleb_scc_epci.o \
 					   celleb_scc_pciex.o \
 					   celleb_scc_uhc.o \
-					   io-workarounds.o spider-pci.o \
-					   beat.o beat_htab.o beat_hvCall.o \
-					   beat_interrupt.o beat_iommu.o
+					   spider-pci.o beat.o beat_htab.o \
+					   beat_hvCall.o beat_interrupt.o \
+					   beat_iommu.o
 
-obj-$(CONFIG_SMP)			+= beat_smp.o
 obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
 obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
 obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index bb5ebf8fa80..ac06903e136 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		pr_devel("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
-		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+		if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
 			generic_handle_irq(msi);
 			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
 		} else {
@@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = {
 static int msic_host_map(struct irq_host *h, unsigned int virq,
 			 irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
 
 	return 0;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 4cb9e147c30..55015e1f693 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -148,16 +148,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
 }
 
 /*
- * Update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- */
-static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq,
-			       irq_hw_number_t hw)
-{
-	beat_construct_and_connect_irq_plug(virq, hw);
-}
-
-/*
  * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
  * to pass away to irq_create_mapping().
  *
@@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops beatic_pic_host_ops = {
 	.map = beatic_pic_host_map,
-	.remap = beatic_pic_host_remap,
 	.unmap = beatic_pic_host_unmap,
 	.xlate = beatic_pic_host_xlate,
 	.match = beatic_pic_host_match,
@@ -257,22 +246,6 @@ void __init beatic_init_IRQ(void)
 	irq_set_default_host(beatic_host);
 }
 
-#ifdef CONFIG_SMP
-
-/* Nullified to compile with SMP mode */
-void beatic_setup_cpu(int cpu)
-{
-}
-
-void beatic_cause_IPI(int cpu, int mesg)
-{
-}
-
-void beatic_request_IPIs(void)
-{
-}
-#endif /* CONFIG_SMP */
-
 void beatic_deinit_IRQ(void)
 {
 	int	i;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f..a7e52f91a07 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -24,9 +24,6 @@
 
 extern void beatic_init_IRQ(void);
 extern unsigned int beatic_get_irq(void);
-extern void beatic_cause_IPI(int cpu, int mesg);
-extern void beatic_request_IPIs(void);
-extern void beatic_setup_cpu(int);
 extern void beatic_deinit_IRQ(void);
 
 #endif
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
deleted file mode 100644
index 26efc204c47..00000000000
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * SMP support for Celleb platform. (Incomplete)
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/smp.c:
- * Dave Engebretsen, Peter Bergner, and
- * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- * Plus various changes from other IBM teams...
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
-
-#include <asm/irq.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_interrupt.h"
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/*
- * The primary thread of each non-boot processor is recorded here before
- * smp init.
- */
-/* static cpumask_t of_spin_map; */
-
-/**
- * smp_startup_cpu() - start the given cpu
- *
- * At boot time, there is nothing to do for primary threads which were
- * started from Open Firmware.  For anything else, call RTAS with the
- * appropriate start location.
- *
- * Returns:
- *	0	- failure
- *	1	- success
- */
-static inline int __devinit smp_startup_cpu(unsigned int lcpu)
-{
-	return 0;
-}
-
-static void smp_beatic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		beatic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			beatic_cause_IPI(i, msg);
-		}
-	}
-}
-
-static int __init smp_beatic_probe(void)
-{
-	return cpus_weight(cpu_possible_map);
-}
-
-static void __devinit smp_beatic_setup_cpu(int cpu)
-{
-	beatic_setup_cpu(cpu);
-}
-
-static void __devinit smp_celleb_kick_cpu(int nr)
-{
-	BUG_ON(nr < 0 || nr >= NR_CPUS);
-
-	if (!smp_startup_cpu(nr))
-		return;
-}
-
-static int smp_celleb_cpu_bootable(unsigned int nr)
-{
-	return 1;
-}
-static struct smp_ops_t bpa_beatic_smp_ops = {
-	.message_pass	= smp_beatic_message_pass,
-	.probe		= smp_beatic_probe,
-	.kick_cpu	= smp_celleb_kick_cpu,
-	.setup_cpu	= smp_beatic_setup_cpu,
-	.cpu_bootable	= smp_celleb_cpu_bootable,
-};
-
-/* This is called very early */
-void __init smp_init_celleb(void)
-{
-	DBG(" -> smp_init_celleb()\n");
-
-	smp_ops = &bpa_beatic_smp_ops;
-
-	DBG(" <- smp_init_celleb()\n");
-}
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index dbc338f187a..f3917e7a5b4 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -45,8 +45,8 @@ static struct cbe_thread_map
 	unsigned int cbe_id;
 } cbe_thread_map[NR_CPUS];
 
-static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
-static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
 
 static struct cbe_regs_map *cbe_find_map(struct device_node *np)
 {
@@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
 
 u32 cbe_node_to_cpu(int node)
 {
-	return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+	return cpumask_first(&cbe_local_mask[node]);
+
 }
 EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
 
@@ -268,9 +269,9 @@ void __init cbe_regs_init(void)
 				thread->regs = map;
 				thread->cbe_id = cbe_id;
 				map->be_node = thread->be_node;
-				cpu_set(i, cbe_local_mask[cbe_id]);
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
 				if(thread->thread_id == 0)
-					cpu_set(i, cbe_first_online_cpu);
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
 			}
 		}
 
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 404d1fc04d5..5822141aa63 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -41,7 +41,6 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#include "io-workarounds.h"
 #include "celleb_pci.h"
 
 #define MAX_PCI_DEVICES    32
@@ -320,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = 256;
 	config = &private->fake_config[devno][fn];
-	*config = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*config == NULL) {
 		printk(KERN_ERR "PCI: "
 		       "not enough memory for fake configuration space\n");
@@ -331,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = sizeof(struct celleb_pci_resource);
 	res = &private->res[devno][fn];
-	*res = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*res == NULL) {
 		printk(KERN_ERR
 		       "PCI: not enough memory for resource data space\n");
@@ -432,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev,
 static void __init celleb_alloc_private_mem(struct pci_controller *hose)
 {
 	hose->private_data =
-		alloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
 			GFP_KERNEL);
 }
 
@@ -469,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = {
 	},
 };
 
-static int __init celleb_io_workaround_init(struct pci_controller *phb,
-					    struct celleb_phb_spec *phb_spec)
-{
-	if (phb_spec->ops) {
-		iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
-				  phb_spec->iowa_data);
-		io_workaround_init();
-	}
-
-	return 0;
-}
-
 int __init celleb_setup_phb(struct pci_controller *phb)
 {
 	struct device_node *dev = phb->dn;
@@ -500,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb)
 	if (rc)
 		return 1;
 
-	return celleb_io_workaround_init(phb, phb_spec);
+	if (phb_spec->ops)
+		iowa_register_bus(phb, phb_spec->ops,
+				  phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+	return 0;
 }
 
 int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 4cba1523ec5..a801fcc5f38 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -26,8 +26,9 @@
 #include <asm/pci-bridge.h>
 #include <asm/prom.h>
 #include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
 
-#include "io-workarounds.h"
+struct iowa_bus;
 
 struct celleb_phb_spec {
 	int (*setup)(struct device_node *, struct pci_controller *);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index e5384557977..d58d9bae4b9 100644
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void)
 	spu_management_ops	= &spu_management_of_ops;
 #endif
 
-#ifdef CONFIG_SMP
-	smp_init_celleb();
-#endif
-
 	celleb_setup_arch_common();
 }
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 44cfd1bef89..449c08c1586 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id)
 {
 	int ipi = (int)(long)dev_id;
 
-	smp_message_recv(ipi);
-
+	switch(ipi) {
+	case PPC_MSG_CALL_FUNCTION:
+		generic_smp_call_function_interrupt();
+		break;
+	case PPC_MSG_RESCHEDULE:
+		scheduler_ipi();
+		break;
+	case PPC_MSG_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+	case PPC_MSG_DEBUGGER_BREAK:
+		debug_ipi_action(0, NULL);
+		break;
+	}
 	return IRQ_HANDLED;
 }
 static void iic_request_ipi(int ipi, const char *name)
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index d31c594cfdf..51e290126bc 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -42,7 +42,6 @@
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 static void qpace_show_cpuinfo(struct seq_file *m)
 {
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fd57bfe00ed..c73cf4c43fc 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -51,11 +51,11 @@
 #include <asm/udbg.h>
 #include <asm/mpic.h>
 #include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
 
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
 
 	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
 				  (void *)SPIDER_PCI_REG_BASE);
-	io_workaround_init();
-
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f774530075b..d176e6148e3 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	unsigned int pcpu;
 	int start_cpu;
 
-	if (cpu_isset(lcpu, of_spin_map))
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
 		/* Already started by OF and sitting in spin loop */
 		return 1;
 
@@ -103,27 +103,11 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static void smp_iic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		iic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			iic_cause_IPI(i, msg);
-		}
-	}
-}
-
 static int __init smp_iic_probe(void)
 {
 	iic_request_IPIs();
 
-	return cpus_weight(cpu_possible_map);
+	return cpumask_weight(cpu_possible_mask);
 }
 
 static void __devinit smp_cell_setup_cpu(int cpu)
@@ -137,12 +121,12 @@ static void __devinit smp_cell_setup_cpu(int cpu)
 	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
 }
 
-static void __devinit smp_cell_kick_cpu(int nr)
+static int __devinit smp_cell_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -150,6 +134,8 @@ static void __devinit smp_cell_kick_cpu(int nr)
 	 * the processor will continue on to secondary_start
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static int smp_cell_cpu_bootable(unsigned int nr)
@@ -166,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
 	return 1;
 }
 static struct smp_ops_t bpa_iic_smp_ops = {
-	.message_pass	= smp_iic_message_pass,
+	.message_pass	= iic_cause_IPI,
 	.probe		= smp_iic_probe,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
@@ -186,13 +172,12 @@ void __init smp_init_cell(void)
 	if (cpu_has_feature(CPU_FTR_SMT)) {
 		for_each_present_cpu(i) {
 			if (cpu_thread_in_core(i) == 0)
-				cpu_set(i, of_spin_map);
+				cpumask_set_cpu(i, &of_spin_map);
 		}
-	} else {
-		of_spin_map = cpu_present_map;
-	}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
 
-	cpu_clear(boot_cpuid, of_spin_map);
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
 
 	/* Non-lpar has additional take/give timebase */
 	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index ca7731c0b59..f1f7878893f 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -27,8 +27,7 @@
 
 #include <asm/ppc-pci.h>
 #include <asm/pci-bridge.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
 
 #define SPIDER_PCI_DISABLE_PREFETCH
 
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index c5cf50e6b45..442c28c00f8 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -68,9 +68,9 @@ struct spider_pic {
 };
 static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
 
-static struct spider_pic *spider_virq_to_pic(unsigned int virq)
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
 {
-	return irq_map[virq].host->host_data;
+	return irq_data_get_irq_chip_data(d);
 }
 
 static void __iomem *spider_get_irq_config(struct spider_pic *pic,
@@ -81,24 +81,24 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic,
 
 static void spider_unmask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) | 0x30000000u);
 }
 
 static void spider_mask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
 
 static void spider_ack_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int src = irq_map[d->irq].hwirq;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
 
 	/* Reset edge detection logic if necessary
 	 */
@@ -116,8 +116,8 @@ static void spider_ack_irq(struct irq_data *d)
 static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int hw = irq_map[d->irq].hwirq;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
 	void __iomem *cfg = spider_get_irq_config(pic, hw);
 	u32 old_mask;
 	u32 ic;
@@ -171,6 +171,7 @@ static struct irq_chip spider_pic = {
 static int spider_host_map(struct irq_host *h, unsigned int virq,
 			irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
 
 	/* Set default irq type */
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index acfaccea5f4..3675da73623 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
 #include <asm/spu_csa.h>
@@ -521,18 +522,8 @@ void spu_init_channels(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_init_channels);
 
-static int spu_shutdown(struct sys_device *sysdev)
-{
-	struct spu *spu = container_of(sysdev, struct spu, sysdev);
-
-	spu_free_irqs(spu);
-	spu_destroy_spu(spu);
-	return 0;
-}
-
 static struct sysdev_class spu_sysdev_class = {
 	.name = "spu",
-	.shutdown = spu_shutdown,
 };
 
 int spu_add_sysdev_attr(struct sysdev_attribute *attr)
@@ -797,6 +788,22 @@ static inline void crash_register_spus(struct list_head *list)
 }
 #endif
 
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
 static int __init init_spu_base(void)
 {
 	int i, ret = 0;
@@ -830,6 +837,7 @@ static int __init init_spu_base(void)
 	crash_register_spus(&spu_full_list);
 	mutex_unlock(&spu_full_list_mutex);
 	spu_add_sysdev_attr(&attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
 
 	spu_init_affinity();
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 65203857b0c..32cb4e66d2c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	 * runqueue. The context will be rescheduled on the proper node
 	 * if it is timesliced or preempted.
 	 */
-	ctx->cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
 
 	/* Save the current cpu id for spu interrupt routing. */
 	ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index 02cafecc90e..a800122e4dd 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -30,10 +30,12 @@
 #include <asm/mpic.h>
 #include <asm/rtas.h>
 
-static void __devinit smp_chrp_kick_cpu(int nr)
+static int __devinit smp_chrp_kick_cpu(int nr)
 {
 	*(unsigned long *)KERNELBASE = nr;
 	asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+	return 0;
 }
 
 static void __devinit smp_chrp_setup_cpu(int cpu_nr)
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 12aa62b6f22..f61a2dd96b9 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -48,7 +48,7 @@
 
 static void flipper_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d)
 
 static void flipper_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	/* this is at least needed for RSW */
@@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d)
 
 static void flipper_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d)
 
 static void flipper_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void flipper_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 {
 	return 1;
@@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops flipper_irq_host_ops = {
 	.map = flipper_pic_map,
-	.unmap = flipper_pic_unmap,
 	.match = flipper_pic_match,
 };
 
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 2bdddfc9d52..e4919170c6b 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -43,7 +43,7 @@
 
 static void hlwd_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d)
 
 static void hlwd_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
@@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d)
 
 static void hlwd_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d)
 
 static void hlwd_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static struct irq_host_ops hlwd_irq_host_ops = {
 	.map = hlwd_pic_map,
-	.unmap = hlwd_pic_unmap,
 };
 
 static unsigned int __hlwd_pic_get_irq(struct irq_host *h)
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index e5bc9f75d47..b57cda3a081 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -1,7 +1,9 @@
 config PPC_ISERIES
 	bool "IBM Legacy iSeries"
 	depends on PPC64 && PPC_BOOK3S
-	select PPC_INDIRECT_IO
+	select PPC_SMP_MUXED_IPI
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_PCI_CHOICE if EXPERT
 
 menu "iSeries device drivers"
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 32a56c6dfa7..29c02f36b32 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -31,6 +31,7 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 #include <asm/cputable.h>
+#include <asm/mmu.h>
 
 #include "exception.h"
 
@@ -60,29 +61,31 @@ system_reset_iSeries:
 /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
 /* In the UP case we'll yield() later, and we will not access the paca anyway */
 #ifdef CONFIG_SMP
-1:
+iSeries_secondary_wait_paca:
 	HMT_LOW
 	LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
 	ld	r23,0(r23)
-	sync
-	LOAD_REG_ADDR(r3,current_set)
-	sldi	r28,r24,3		/* get current_set[cpu#] */
-	ldx	r3,r3,r28
-	addi	r1,r3,THREAD_SIZE
-	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0			/* Keep poking the Hypervisor until */
-	bne	2f			/* we're released */
-	/* Let the Hypervisor know we are alive */
+	cmpdi	0,r23,0
+	bne	2f			/* go on when the master is ready */
+
+	/* Keep poking the Hypervisor until we're released */
 	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
 	lis	r3,0x8002
 	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
-	b	1b
-#endif
+	b	iSeries_secondary_wait_paca
 
 2:
+	HMT_MEDIUM
+	sync
+
+	LOAD_REG_ADDR(r3, nr_cpu_ids)	/* get number of pacas allocated */
+	lwz	r3,0(r3)		/* nr_cpus= or NR_CPUS can limit */
+	cmpld	0,r24,r3		/* is our cpu number allocated? */
+	bge	iSeries_secondary_yield	/* no, yield forever */
+
 	/* Load our paca now that it's been allocated */
 	LOAD_REG_ADDR(r13, paca)
 	ld	r13,0(r13)
@@ -93,10 +96,24 @@ system_reset_iSeries:
 	ori	r23,r23,MSR_RI
 	mtmsrd	r23			/* RI on */
 
-	HMT_LOW
-#ifdef CONFIG_SMP
+iSeries_secondary_smp_loop:
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
 					 * should start */
+	cmpwi	0,r23,0
+	bne	3f			/* go on when we are told */
+
+	HMT_LOW
+	/* Let the Hypervisor know we are alive */
+	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+	lis	r3,0x8002
+	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
+	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
+	sc				/* Invoke the hypervisor via a system call */
+	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
+	b	iSeries_secondary_smp_loop /* wait for signal to start */
+
+3:
+	HMT_MEDIUM
 	sync
 	LOAD_REG_ADDR(r3,current_set)
 	sldi	r28,r24,3		/* get current_set[cpu#] */
@@ -104,27 +121,22 @@ system_reset_iSeries:
 	addi	r1,r3,THREAD_SIZE
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0
-	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
 	b	__secondary_start		/* Loop until told to go */
-iSeries_secondary_smp_loop:
-	/* Let the Hypervisor know we are alive */
-	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
-	lis	r3,0x8002
-	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
+#endif /* CONFIG_SMP */
+
+iSeries_secondary_yield:
 	/* Yield the processor.  This is required for non-SMP kernels
 		which are running on multi-threaded machines. */
+	HMT_LOW
 	lis	r3,0x8000
 	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
 	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
 	li	r4,0			/* "yield timed" */
 	li	r5,-1			/* "yield forever" */
-#endif /* CONFIG_SMP */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
 	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
-	b	2b			/* If SMP not configured, secondaries
+	b	iSeries_secondary_yield	/* If SMP not configured, secondaries
 					 * loop forever */
 
 /***  ISeries-LPAR interrupt handlers ***/
@@ -157,7 +169,7 @@ BEGIN_FTR_SECTION
 FTR_SECTION_ELSE
 	EXCEPTION_PROLOG_1(PACA_EXGEN)
 	EXCEPTION_PROLOG_ISERIES_1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 	b	data_access_common
 
 .do_stab_bolted_iSeries:
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 52a6889832c..b2103453eb0 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -42,7 +42,6 @@
 #include "irq.h"
 #include "pci.h"
 #include "call_pci.h"
-#include "smp.h"
 
 #ifdef CONFIG_PCI
 
@@ -171,7 +170,7 @@ static void iseries_enable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -188,7 +187,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	bus = REAL_IRQ_TO_BUS(rirq);
 	function = REAL_IRQ_TO_FUNC(rirq);
@@ -234,7 +233,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* irq should be locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -257,7 +256,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -271,7 +270,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 
 static void iseries_end_IRQ(struct irq_data *d)
 {
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
 		(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
@@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void)
 #ifdef CONFIG_SMP
 	if (get_lppaca()->int_dword.fields.ipi_cnt) {
 		get_lppaca()->int_dword.fields.ipi_cnt = 0;
-		iSeries_smp_message_recv();
+		smp_ipi_demux();
 	}
 #endif /* CONFIG_SMP */
 	if (hvlpevent_is_pending())
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 2946ae10fbf..c25a0815c26 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
 	unsigned long i;
 	unsigned long mem_blocks = 0;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
 				max_entries);
 	else
@@ -634,7 +634,7 @@ static int __init iseries_probe(void)
 
 	hpte_init_iSeries();
 	/* iSeries does not support 16M pages */
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE;
 
 	return 1;
 }
@@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void)
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
 	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
+#ifdef CONFIG_SMP
+	/* On iSeries we know we can never have more than 64 cpus */
+	nr_cpu_ids = max(nr_cpu_ids, 64);
+#endif
+
 	iSeries_fixup_klimit();
 
 	/*
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 6c6029914db..e3265adde5d 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -42,57 +42,23 @@
 #include <asm/cputable.h>
 #include <asm/system.h>
 
-#include "smp.h"
-
-static unsigned long iSeries_smp_message[NR_CPUS];
-
-void iSeries_smp_message_recv(void)
-{
-	int cpu = smp_processor_id();
-	int msg;
-
-	if (num_online_cpus() < 2)
-		return;
-
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &iSeries_smp_message[cpu]))
-			smp_message_recv(msg);
-}
-
-static inline void smp_iSeries_do_message(int cpu, int msg)
+static void smp_iSeries_cause_ipi(int cpu, unsigned long data)
 {
-	set_bit(msg, &iSeries_smp_message[cpu]);
 	HvCall_sendIPI(&(paca[cpu]));
 }
 
-static void smp_iSeries_message_pass(int target, int msg)
-{
-	int i;
-
-	if (target < NR_CPUS)
-		smp_iSeries_do_message(target, msg);
-	else {
-		for_each_online_cpu(i) {
-			if ((target == MSG_ALL_BUT_SELF) &&
-					(i == smp_processor_id()))
-				continue;
-			smp_iSeries_do_message(i, msg);
-		}
-	}
-}
-
 static int smp_iSeries_probe(void)
 {
 	return cpumask_weight(cpu_possible_mask);
 }
 
-static void smp_iSeries_kick_cpu(int nr)
+static int smp_iSeries_kick_cpu(int nr)
 {
 	BUG_ON((nr < 0) || (nr >= NR_CPUS));
 
 	/* Verify that our partition has a processor nr */
 	if (lppaca_of(nr).dyn_proc_status >= 2)
-		return;
+		return -ENOENT;
 
 	/* The processor is currently spinning, waiting
 	 * for the cpu_start field to become non-zero
@@ -100,6 +66,8 @@ static void smp_iSeries_kick_cpu(int nr)
 	 * continue on to secondary_start in iSeries_head.S
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static void __devinit smp_iSeries_setup_cpu(int nr)
@@ -107,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr)
 }
 
 static struct smp_ops_t iSeries_smp_ops = {
-	.message_pass = smp_iSeries_message_pass,
+	.message_pass = smp_muxed_ipi_message_pass,
+	.cause_ipi    = smp_iSeries_cause_ipi,
 	.probe        = smp_iSeries_probe,
 	.kick_cpu     = smp_iSeries_kick_cpu,
 	.setup_cpu    = smp_iSeries_setup_cpu,
diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h
deleted file mode 100644
index d501f7de01e..00000000000
--- a/arch/powerpc/platforms/iseries/smp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PLATFORMS_ISERIES_SMP_H
-#define _PLATFORMS_ISERIES_SMP_H
-
-extern void iSeries_smp_message_recv(void);
-
-#endif	/* _PLATFORMS_ISERIES_SMP_H */
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 1e1a0873e1d..1afd10f6785 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -18,4 +18,13 @@ config PPC_PMAC64
 	select PPC_970_NAP
 	default y
 
-
+config PPC_PMAC32_PSURGE
+	bool "Support for powersurge upgrade cards" if EXPERT
+	depends on SMP && PPC32 && PPC_PMAC
+	select PPC_SMP_MUXED_IPI
+	default y
+	help
+	  The powersurge cpu boards can be used in the generation
+	  of powermacs that have a socket for an upgradeable cpu card,
+	  including the 7500, 8500, 9500, 9600.  Support exists for
+	  both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 023f24086a0..9089b042119 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -21,7 +21,7 @@
 #include <linux/signal.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
 #include <linux/module.h>
@@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr)
 
 static void pmac_mask_and_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d)
 
 static void pmac_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
 static unsigned int pmac_startup_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
 
@@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d)
 static void pmac_mask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
         __clear_bit(src, ppc_cached_irq_mask);
@@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d)
 static void pmac_unmask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	__set_bit(src, ppc_cached_irq_mask);
@@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
-	__pmac_retrigger(irq_map[d->irq].hwirq);
+	__pmac_retrigger(irqd_to_hwirq(d));
 	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
 	return 1;
 }
@@ -239,15 +239,12 @@ static unsigned int pmac_pic_get_irq(void)
 	unsigned long bits = 0;
 	unsigned long flags;
 
-#ifdef CONFIG_SMP
-	void psurge_smp_message_recv(void);
-
-       	/* IPI's are a hack on the powersurge -- Cort */
-       	if ( smp_processor_id() != 0 ) {
-		psurge_smp_message_recv();
-		return NO_IRQ_IGNORE;	/* ignore, already handled */
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+	/* IPI's are a hack on the powersurge -- Cort */
+	if (smp_processor_id() != 0) {
+		return  psurge_secondary_virq;
         }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
 		int i = irq >> 5;
@@ -677,7 +674,7 @@ not_found:
 	return viaint;
 }
 
-static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
+static int pmacpic_suspend(void)
 {
 	int viaint = pmacpic_find_viaint();
 
@@ -698,7 +695,7 @@ static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
         return 0;
 }
 
-static int pmacpic_resume(struct sys_device *sysdev)
+static void pmacpic_resume(void)
 {
 	int i;
 
@@ -709,39 +706,19 @@ static int pmacpic_resume(struct sys_device *sysdev)
 	for (i = 0; i < max_real_irqs; ++i)
 		if (test_bit(i, sleep_save_mask))
 			pmac_unmask_irq(irq_get_irq_data(i));
-
-	return 0;
 }
 
-#endif /* CONFIG_PM && CONFIG_PPC32 */
-
-static struct sysdev_class pmacpic_sysclass = {
-	.name = "pmac_pic",
-};
-
-static struct sys_device device_pmacpic = {
-	.id		= 0,
-	.cls		= &pmacpic_sysclass,
-};
-
-static struct sysdev_driver driver_pmacpic = {
-#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
-	.suspend	= &pmacpic_suspend,
-	.resume		= &pmacpic_resume,
-#endif /* CONFIG_PM && CONFIG_PPC32 */
+static struct syscore_ops pmacpic_syscore_ops = {
+	.suspend	= pmacpic_suspend,
+	.resume		= pmacpic_resume,
 };
 
-static int __init init_pmacpic_sysfs(void)
+static int __init init_pmacpic_syscore(void)
 {
-#ifdef CONFIG_PPC32
-	if (max_irqs == 0)
-		return -ENODEV;
-#endif
-	printk(KERN_DEBUG "Registering pmac pic with sysfs...\n");
-	sysdev_class_register(&pmacpic_sysclass);
-	sysdev_register(&device_pmacpic);
-	sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic);
+	register_syscore_ops(&pmacpic_syscore_ops);
 	return 0;
 }
-machine_subsys_initcall(powermac, init_pmacpic_sysfs);
 
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h
deleted file mode 100644
index d622a8345aa..00000000000
--- a/arch/powerpc/platforms/powermac/pic.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __PPC_PLATFORMS_PMAC_PIC_H
-#define __PPC_PLATFORMS_PMAC_PIC_H
-
-#include <linux/irq.h>
-
-extern struct irq_chip pmac_pic;
-
-extern void pmac_pic_init(void);
-extern int pmac_get_irq(void);
-
-#endif /* __PPC_PLATFORMS_PMAC_PIC_H */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 20468f49aec..8327cce2bdb 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void);
 extern void pmac_check_ht_link(void);
 
 extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
 extern void low_cpu_die(void) __attribute__((noreturn));
 
 extern int pmac_nvram_init(void);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bc5f0dc6ae1..db092d7c4c5 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze);
 static u64 timebase;
 static int tb_req;
 
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 
 /*
  * Powersurge (old powermac SMP) support.
@@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start;
 /* what sort of powersurge board we have */
 static int psurge_type = PSURGE_NONE;
 
+/* irq for secondary cpus to report */
+static struct irq_host *psurge_host;
+int psurge_secondary_virq;
+
 /*
  * Set and clear IPIs for powersurge.
  */
@@ -156,51 +160,52 @@ static inline void psurge_clr_ipi(int cpu)
 /*
  * On powersurge (old SMP powermac architecture) we don't have
  * separate IPIs for separate messages like openpic does.  Instead
- * we have a bitmap for each processor, where a 1 bit means that
- * the corresponding message is pending for that processor.
- * Ideally each cpu's entry would be in a different cache line.
+ * use the generic demux helpers
  *  -- paulus.
  */
-static unsigned long psurge_smp_message[NR_CPUS];
-
-void psurge_smp_message_recv(void)
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
 {
-	int cpu = smp_processor_id();
-	int msg;
+	psurge_clr_ipi(smp_processor_id());
+	smp_ipi_demux();
 
-	/* clear interrupt */
-	psurge_clr_ipi(cpu);
-
-	if (num_online_cpus() < 2)
-		return;
+	return IRQ_HANDLED;
+}
 
-	/* make sure there is a message there */
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &psurge_smp_message[cpu]))
-			smp_message_recv(msg);
+static void smp_psurge_cause_ipi(int cpu, unsigned long data)
+{
+	psurge_set_ipi(cpu);
 }
 
-irqreturn_t psurge_primary_intr(int irq, void *d)
+static int psurge_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
 {
-	psurge_smp_message_recv();
-	return IRQ_HANDLED;
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+	return 0;
 }
 
-static void smp_psurge_message_pass(int target, int msg)
+struct irq_host_ops psurge_host_ops = {
+	.map	= psurge_host_map,
+};
+
+static int psurge_secondary_ipi_init(void)
 {
-	int i;
+	int rc = -ENOMEM;
 
-	if (num_online_cpus() < 2)
-		return;
+	psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+		&psurge_host_ops, 0);
 
-	for_each_online_cpu(i) {
-		if (target == MSG_ALL
-		    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
-		    || target == i) {
-			set_bit(msg, &psurge_smp_message[i]);
-			psurge_set_ipi(i);
-		}
-	}
+	if (psurge_host)
+		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+	if (psurge_secondary_virq)
+		rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+			IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
+
+	if (rc)
+		pr_err("Failed to setup secondary cpu IPI\n");
+
+	return rc;
 }
 
 /*
@@ -311,6 +316,9 @@ static int __init smp_psurge_probe(void)
 		ncpus = 2;
 	}
 
+	if (psurge_secondary_ipi_init())
+		return 1;
+
 	psurge_start = ioremap(PSURGE_START, 4);
 	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
 
@@ -329,7 +337,7 @@ static int __init smp_psurge_probe(void)
 	return ncpus;
 }
 
-static void __init smp_psurge_kick_cpu(int nr)
+static int __init smp_psurge_kick_cpu(int nr)
 {
 	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
 	unsigned long a, flags;
@@ -394,11 +402,13 @@ static void __init smp_psurge_kick_cpu(int nr)
 		psurge_set_ipi(1);
 
 	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+	return 0;
 }
 
 static struct irqaction psurge_irqaction = {
-	.handler = psurge_primary_intr,
-	.flags = IRQF_DISABLED,
+	.handler = psurge_ipi_intr,
+	.flags = IRQF_DISABLED|IRQF_PERCPU,
 	.name = "primary IPI",
 };
 
@@ -437,14 +447,15 @@ void __init smp_psurge_give_timebase(void)
 
 /* PowerSurge-style Macs */
 struct smp_ops_t psurge_smp_ops = {
-	.message_pass	= smp_psurge_message_pass,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= smp_psurge_cause_ipi,
 	.probe		= smp_psurge_probe,
 	.kick_cpu	= smp_psurge_kick_cpu,
 	.setup_cpu	= smp_psurge_setup_cpu,
 	.give_timebase	= smp_psurge_give_timebase,
 	.take_timebase	= smp_psurge_take_timebase,
 };
-#endif /* CONFIG_PPC32 - actually powersurge support */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 /*
  * Core 99 and later support
@@ -791,14 +802,14 @@ static int __init smp_core99_probe(void)
 	return ncpus;
 }
 
-static void __devinit smp_core99_kick_cpu(int nr)
+static int __devinit smp_core99_kick_cpu(int nr)
 {
 	unsigned int save_vector;
 	unsigned long target, flags;
 	unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
 
 	if (nr < 0 || nr > 3)
-		return;
+		return -ENOENT;
 
 	if (ppc_md.progress)
 		ppc_md.progress("smp_core99_kick_cpu", 0x346);
@@ -830,6 +841,8 @@ static void __devinit smp_core99_kick_cpu(int nr)
 
 	local_irq_restore(flags);
 	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+	return 0;
 }
 
 static void __devinit smp_core99_setup_cpu(int cpu_nr)
@@ -1002,7 +1015,7 @@ void __init pmac_setup_smp(void)
 		of_node_put(np);
 		smp_ops = &core99_smp_ops;
 	}
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 	else {
 		/* We have to set bits in cpu_possible_mask here since the
 		 * secondary CPU(s) aren't in the device tree. Various
@@ -1015,7 +1028,7 @@ void __init pmac_setup_smp(void)
 			set_cpu_possible(cpu, true);
 		smp_ops = &psurge_smp_ops;
 	}
-#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 #ifdef CONFIG_HOTPLUG_CPU
 	ppc_md.cpu_die = pmac_cpu_die;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index f2f6413b81d..600ed2c0ed5 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
 	result = irq_set_chip_data(*virq, pd);
 
 	if (result) {
-		pr_debug("%s:%d: set_irq_chip_data failed\n",
+		pr_debug("%s:%d: irq_set_chip_data failed\n",
 			__func__, __LINE__);
 		goto fail_set;
 	}
@@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd,
 static void dump_bmp(struct ps3_private* pd) {};
 #endif /* defined(DEBUG) */
 
-static void ps3_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	irq_set_chip_data(virq, NULL);
-}
-
 static int ps3_host_map(struct irq_host *h, unsigned int virq,
 	irq_hw_number_t hwirq)
 {
@@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops ps3_host_ops = {
 	.map = ps3_host_map,
-	.unmap = ps3_host_unmap,
 	.match = ps3_host_match,
 };
 
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 51ffde40af2..4c44794faac 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -39,7 +39,7 @@
 #define MSG_COUNT 4
 static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
 
-static void do_message_pass(int target, int msg)
+static void ps3_smp_message_pass(int cpu, int msg)
 {
 	int result;
 	unsigned int virq;
@@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg)
 		return;
 	}
 
-	virq = per_cpu(ps3_ipi_virqs, target)[msg];
+	virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
 	result = ps3_send_event_locally(virq);
 
 	if (result)
 		DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
-			" (%d)\n", __func__, __LINE__, target, msg, result);
-}
-
-static void ps3_smp_message_pass(int target, int msg)
-{
-	int cpu;
-
-	if (target < NR_CPUS)
-		do_message_pass(target, msg);
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(cpu)
-			if (cpu != smp_processor_id())
-				do_message_pass(cpu, msg);
-	} else {
-		for_each_online_cpu(cpu)
-			do_message_pass(cpu, msg);
-	}
+			" (%d)\n", __func__, __LINE__, cpu, msg, result);
 }
 
 static int ps3_smp_probe(void)
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 39a472e9e80..375a9f92158 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu)
  * The current HV requires the spu shadow regs to be mapped with the
  * PTE page protection bits set as read-only (PP=3).  This implementation
  * uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_flags() to get the needed PTE bits set for the
+ * inforced by ioremap_prot() to get the needed PTE bits set for the
  * shadow regs.
  */
 
@@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu)
 		goto fail_ioremap;
 	}
 
-	spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys,
+	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
 		LS_SIZE, _PAGE_NO_CACHE);
 
 	if (!spu->local_store) {
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5b3da4b4ea7..71af4c5d6c0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,7 +3,10 @@ config PPC_PSERIES
 	bool "IBM pSeries & new (POWER5-based) iSeries"
 	select MPIC
 	select PCI_MSI
-	select XICS
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_ICP_HV
+	select PPC_ICS_RTAS
 	select PPC_I8259
 	select PPC_RTAS
 	select PPC_RTAS_DAEMON
@@ -47,6 +50,24 @@ config SCANLOG
 	tristate "Scanlog dump interface"
 	depends on RTAS_PROC && PPC_PSERIES
 
+config IO_EVENT_IRQ
+	bool "IO Event Interrupt support"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option, if you want to enable support for IO Event
+	  interrupts. IO event interrupt is a mechanism provided by RTAS
+	  to return information about hardware error and non-error events
+	  which may need OS attention. RTAS returns events for multiple
+	  event types and scopes. Device drivers can register their handlers
+	  to receive events.
+
+	  This option will only enable the IO event platform code. You
+	  will still need to enable or compile the actual drivers
+	  that use this infrastruture to handle IO event interrupts.
+
+	  Say Y if you are unsure.
+
 config LPARCFG
 	bool "LPAR Configuration Data"
 	depends on PPC_PSERIES || PPC_ISERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fc5237810ec..3556e402cbf 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,7 +5,6 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
 obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
@@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index c371bc06434..e9190073bb9 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7;
 
 
 /*
- * Size of per-cpu log buffers. Default is just under 16 pages worth.
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
  */
-static int dtl_buf_entries = (16 * 85);
-
+static int dtl_buf_entries = N_DISPATCH_LOG;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 struct dtl_ring {
@@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl)
 
 	/* Register our dtl buffer with the hypervisor. The HV expects the
 	 * buffer size to be passed in the second word of the buffer */
-	((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry);
+	((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
 
 	hwcpu = get_hard_smp_processor_id(dtl->cpu);
 	addr = __pa(dtl->buf);
@@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl)
 	long int rc;
 	struct dtl_entry *buf = NULL;
 
+	if (!dtl_cache)
+		return -ENOMEM;
+
 	/* only allow one reader */
 	if (dtl->buf)
 		return -EBUSY;
 
 	n_entries = dtl_buf_entries;
-	buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
-			GFP_KERNEL, cpu_to_node(dtl->cpu));
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
 	if (!buf) {
 		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
 				__func__, dtl->cpu);
@@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl)
 	spin_unlock(&dtl->lock);
 
 	if (rc)
-		kfree(buf);
+		kmem_cache_free(dtl_cache, buf);
 	return rc;
 }
 
@@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl)
 {
 	spin_lock(&dtl->lock);
 	dtl_stop(dtl);
-	kfree(dtl->buf);
+	kmem_cache_free(dtl_cache, dtl->buf);
 	dtl->buf = NULL;
 	dtl->buf_entries = 0;
 	spin_unlock(&dtl->lock);
@@ -365,7 +367,7 @@ static int dtl_init(void)
 
 	event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
 				dtl_dir, &dtl_event_mask);
-	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600,
+	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
 				dtl_dir, &dtl_buf_entries);
 
 	if (!event_mask_file || !buf_entries_file) {
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 89649173d3a..46b55cf563e 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -93,6 +93,7 @@ static int ibm_slot_error_detail;
 static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_bridge;
+static int ibm_configure_pe;
 
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
 	pci_regs_buf[0] = 0;
 
 	rtas_pci_enable(pdn, EEH_THAW_MMIO);
+	rtas_configure_bridge(pdn);
+	eeh_restore_bars(pdn);
 	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
 
 	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
@@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+	struct device_node *dn;
+
+	for_each_child_of_node(parent, dn) {
+		if (PCI_DN(dn)) {
+
+			struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+			if (dev && dev->driver)
+				*freset |= dev->needs_freset;
+
+			__eeh_set_pe_freset(dn, freset);
+		}
+	}
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+	struct pci_dev *dev;
+	dn = find_device_pe(dn);
+
+	/* Back up one, since config addrs might be shared */
+	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+		dn = dn->parent;
+
+	dev = PCI_DN(dn)->pcidev;
+	if (dev)
+		*freset |= dev->needs_freset;
+
+	__eeh_set_pe_freset(dn, freset);
+}
+
 /**
  * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  * @dn device node
@@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+	rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
-	if (rc)
-		printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
-		        " (%d) #RST=%d dn=%s\n",
-		        rc, state, pdn->node->full_name);
+
+	/* Fundamental-reset not supported on this PE, try hot-reset */
+	if (rc == -8 && state == 3) {
+		rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			       config_addr,
+			       BUID_HI(pdn->phb->buid),
+			       BUID_LO(pdn->phb->buid), 1);
+		if (rc)
+			printk(KERN_WARNING
+				"EEH: Unable to reset the failed slot,"
+				" #RST=%d dn=%s\n",
+				rc, pdn->node->full_name);
+	}
 }
 
 /**
@@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 /**
  * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  * @pdn: pci device node to be reset.
- *
- *  Return 0 if success, else a non-zero value.
  */
 
 static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	struct pci_dev *dev = pdn->pcidev;
+	unsigned int freset = 0;
 
-	/* Determine type of EEH reset required by device,
-	 * default hot reset or fundamental reset
-	 */
-	if (dev && dev->needs_freset)
+	/* Determine type of EEH reset required for
+	 * Partitionable Endpoint, a hot-reset (1)
+	 * or a fundamental reset (3).
+	 * A fundamental reset required by any device under
+	 * Partitionable Endpoint trumps hot-reset.
+  	 */
+	eeh_set_pe_freset(pdn->node, &freset);
+
+	if (freset)
 		rtas_pci_slot_reset(pdn, 3);
 	else
 		rtas_pci_slot_reset(pdn, 1);
@@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
 {
 	int config_addr;
 	int rc;
+	int token;
 
 	/* Use PE configuration address, if present */
 	config_addr = pdn->eeh_config_addr;
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+	/* Use new configure-pe function, if supported */
+	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+		token = ibm_configure_pe;
+	else
+		token = ibm_configure_bridge;
+
+	rc = rtas_call(token, 3, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid));
@@ -1077,6 +1132,7 @@ void __init eeh_init(void)
 	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
 	ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
 	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+	ibm_configure_pe = rtas_token("ibm,configure-pe");
 
 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
 		return;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b8d70f5d9aa..1b6cb10589e 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-	const char *location, *pci_str, *drv_str;
+	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
 
 	frozen_dn = find_device_pe(event->dn);
 	if (!frozen_dn) {
@@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	frozen_pdn = PCI_DN(frozen_dn);
 	frozen_pdn->eeh_freeze_count++;
 
-	if (frozen_pdn->pcidev) {
-		pci_str = pci_name (frozen_pdn->pcidev);
-		drv_str = pcid_name (frozen_pdn->pcidev);
-	} else {
-		pci_str = eeh_pci_name(event->dev);
-		drv_str = pcid_name (event->dev);
-	}
+	pci_str = eeh_pci_name(event->dev);
+	drv_str = pcid_name(event->dev);
 	
 	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
@@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	printk(KERN_WARNING
 	   "EEH: This PCI device has failed %d times in the last hour:\n",
 		frozen_pdn->eeh_freeze_count);
+
+	if (frozen_pdn->pcidev) {
+		bus_pci_str = pci_name(frozen_pdn->pcidev);
+		bus_drv_str = pcid_name(frozen_pdn->pcidev);
+		printk(KERN_WARNING
+			"EEH: Bus location=%s driver=%s pci addr=%s\n",
+			location, bus_drv_str, bus_pci_str);
+	}
+
 	printk(KERN_WARNING
-		"EEH: location=%s driver=%s pci addr=%s\n",
+		"EEH: Device location=%s driver=%s pci addr=%s\n",
 		location, drv_str, pci_str);
 
 	/* Walk the various device drivers attached to this slot through
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ef8c45489e2..46f13a3c5d0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/system.h>
@@ -28,7 +29,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/pSeries_reconfig.h>
-#include "xics.h"
+#include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
 
@@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np)
 	}
 
 	for_each_cpu(cpu, tmp) {
-		BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask));
+		BUG_ON(cpu_present(cpu));
 		set_cpu_present(cpu, true);
 		set_hard_smp_processor_id(cpu, *intserv++);
 	}
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 00000000000..c829e6067d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * 	p = (struct pseries_io_event_sect_data *) data;
+ * 	if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * 		:
+ * 		:
+ * 	return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * 	.notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR		(('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR		(('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC	(('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH	(('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS	(('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC	(('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR	(('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR		(('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID	(('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID	(('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID		(('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW		(('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT		(('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_elog_section {
+	uint16_t id;			/* 0x00 2-byte ASCII section ID	*/
+	uint16_t length;		/* 0x02 Section length in bytes	*/
+	uint8_t version;		/* 0x04 Section version		*/
+	uint8_t subtype;		/* 0x05 Section subtype		*/
+	uint16_t creator_component;	/* 0x06 Creator component ID	*/
+	uint8_t data[];			/* 0x08 Start of section data	*/
+};
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find data portion of a specific section in RTAS extended event log.
+ * @elog: RTAS error/event log.
+ * @sect_id: secsion ID.
+ *
+ * Return:
+ *	pointer to the section data of the specified section
+ *	NULL if not found
+ */
+static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
+						       uint16_t sect_id)
+{
+	struct rtas_ext_event_log_v6 *xelog =
+		(struct rtas_ext_event_log_v6 *) elog->buffer;
+	struct pseries_elog_section *sect;
+	unsigned char *p, *log_end;
+
+	/* Check that we understand the format */
+	if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+	    xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+	    xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+		return NULL;
+
+	log_end = elog->buffer + elog->extended_log_length;
+	p = xelog->vendor_log;
+	while (p < log_end) {
+		sect = (struct pseries_elog_section *)p;
+		if (sect->id == sect_id)
+			return sect;
+		p += sect->length;
+	}
+	return NULL;
+}
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * 	pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+	struct pseries_elog_section *sect;
+
+	/* We should only ever get called for io-event interrupts, but if
+	 * we do get called for another type then something went wrong so
+	 * make some noise about it.
+	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+	 * No need to check event log version.
+	 */
+	if (unlikely(elog->type != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
+			    elog->type);
+		return NULL;
+	}
+
+	sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+	if (unlikely(!sect)) {
+		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+			    "log does not contain an IO Event section. "
+			    "Could be a bug in system firmware!\n");
+		return NULL;
+	}
+	return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ *   error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ *   multiple events through a single interrupt, it must ensure that the
+ *   interrupt remains asserted until check-exception has been used to
+ *   process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ *   sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ *   event type, and sub-type. There is no easy way to pre-sort clients
+ *   by scope or event type alone. For example, Torrent ISR route change
+ *   event is reported with scope 0x00 (Not Applicatable) rather than
+ *   0x3B (Torrent-hub). It is better to let the clients to identify
+ *   who owns the the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+	struct pseries_io_event *event;
+	int rtas_rc;
+
+	for (;;) {
+		rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+				    RTAS_VECTOR_EXTERNAL_INTERRUPT,
+				    virq_to_hw(irq),
+				    RTAS_IO_EVENTS, 1 /* Time Critical */,
+				    __pa(ioei_rtas_buf),
+				    RTAS_DATA_BUF_SIZE);
+		if (rtas_rc != 0)
+			break;
+
+		event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+		if (!event)
+			continue;
+
+		atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+					   0, event);
+	}
+	return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+	struct device_node *np;
+
+	ioei_check_exception_token = rtas_token("check-exception");
+	if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("IO Event IRQ not supported on this system !\n");
+		return -ENODEV;
+	}
+	np = of_find_node_by_path("/event-sources/ibm,io-events");
+	if (np) {
+		request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+		of_node_put(np);
+	} else {
+		pr_err("io_event_irq: No ibm,io-events on system! "
+		       "IO Event interrupt disabled.\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d5412a18b2..01faab9456c 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np)
 {
 	struct dynamic_dma_window_prop *dwp;
 	struct property *win64;
-	const u32 *ddr_avail;
+	const u32 *ddw_avail;
 	u64 liobn;
 	int len, ret;
 
-	ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+	ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+	if (!win64)
 		return;
 
+	if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+		goto delprop;
+
 	dwp = win64->value;
 	liobn = (u64)be32_to_cpu(dwp->liobn);
 
@@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np)
 		pr_debug("%s successfully cleared tces in window.\n",
 			 np->full_name);
 
-	ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 	if (ret)
 		pr_warning("%s: failed to remove direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
+			np->full_name, ret, ddw_avail[2], liobn);
 	else
 		pr_debug("%s: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
-}
+			np->full_name, ret, ddw_avail[2], liobn);
 
+delprop:
+	ret = prom_remove_property(np, win64);
+	if (ret)
+		pr_warning("%s: failed to remove direct window property: %d\n",
+			np->full_name, ret);
+}
 
-static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
 	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
 	spin_lock(&direct_window_list_lock);
 	/* check if we already created a window and dupe that config if so */
 	list_for_each_entry(window, &direct_window_list, list) {
@@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *
 	return dma_addr;
 }
 
-static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+static int find_existing_ddw_windows(void)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	int len;
+	struct device_node *pdn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
-	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
-	direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-	if (direct64) {
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+		if (!direct64)
+			continue;
+
 		window = kzalloc(sizeof(*window), GFP_KERNEL);
-		if (!window) {
+		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+			kfree(window);
 			remove_ddw(pdn);
-		} else {
-			window->device = pdn;
-			window->prop = direct64;
-			spin_lock(&direct_window_list_lock);
-			list_add(&window->list, &direct_window_list);
-			spin_unlock(&direct_window_list_lock);
-			dma_addr = direct64->dma_base;
+			continue;
 		}
+
+		window->device = pdn;
+		window->prop = direct64;
+		spin_lock(&direct_window_list_lock);
+		list_add(&window->list, &direct_window_list);
+		spin_unlock(&direct_window_list_lock);
 	}
 
-	return dma_addr;
+	return 0;
 }
+machine_arch_initcall(pseries, find_existing_ddw_windows);
 
-static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_query_response *query)
 {
 	struct device_node *dn;
@@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 	if (pcidn->eeh_pe_config_addr)
 		cfg_addr = pcidn->eeh_pe_config_addr;
 	buid = pcidn->phb->buid;
-	ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
 	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
-		" returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+		" returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
 		BUID_LO(buid), ret);
 	return ret;
 }
 
-static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_create_response *create, int page_shift,
 			int window_shift)
 {
@@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 
 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
-		ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
 				BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
 	} while (rtas_busy_delay(ret));
 	dev_info(&dev->dev,
 		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
-		"(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+		"(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
 		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
 		 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
 
@@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	int page_shift;
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
-	const u32 *uninitialized_var(ddr_avail);
+	const u32 *uninitialized_var(ddw_avail);
 	struct direct_window *window;
-	struct property *uninitialized_var(win64);
+	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
 
 	mutex_lock(&direct_window_init_mutex);
 
-	dma_addr = dupe_ddw_if_already_created(dev, pdn);
-	if (dma_addr != 0)
-		goto out_unlock;
-
-	dma_addr = dupe_ddw_if_kexec(dev, pdn);
+	dma_addr = find_existing_ddw(pdn);
 	if (dma_addr != 0)
 		goto out_unlock;
 
@@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * for the given node in that order.
 	 * the property is actually in the parent, not the PE
 	 */
-	ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
-	if (!ddr_avail || len < 3 * sizeof(u32))
+	ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+	if (!ddw_avail || len < 3 * sizeof(u32))
 		goto out_unlock;
 
        /*
@@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * of page sizes: supported and supported for migrate-dma.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	ret = query_ddw(dev, ddr_avail, &query);
+	ret = query_ddw(dev, ddw_avail, &query);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	}
 	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
 	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->length = sizeof(*ddwprop);
 	if (!win64->name || !win64->value) {
 		dev_info(&dev->dev,
 			"couldn't allocate property name and value\n");
 		goto out_free_prop;
 	}
 
-	ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
 	if (ret != 0)
 		goto out_free_prop;
 
@@ -1021,13 +1026,16 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	const void *dma_window = NULL;
 	u64 dma_offset;
 
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+	if (!dev->dma_mask)
 		return -EIO;
 
+	if (!dev_is_pci(dev))
+		goto check_mask;
+
+	pdev = to_pci_dev(dev);
+
 	/* only attempt to use a new window if 64-bit DMA is requested */
 	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
-		pdev = to_pci_dev(dev);
-
 		dn = pci_device_to_OF_node(pdev);
 		dev_dbg(dev, "node is %s\n", dn->full_name);
 
@@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 		}
 	}
 
-	/* fall-through to iommu ops */
-	if (!ddw_enabled) {
-		dev_info(dev, "Using 32-bit DMA via iommu\n");
+	/* fall back on iommu ops, restore table pointer with ops */
+	if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+		dev_info(dev, "Restoring 32-bit DMA via iommu\n");
 		set_dma_ops(dev, &dma_iommu_ops);
+		pci_dma_dev_setup_pSeriesLP(pdev);
 	}
 
+check_mask:
+	if (!dma_supported(dev, dma_mask))
+		return -EIO;
+
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 77d38a5e2ff..54cf3a4aa16 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -7,15 +7,18 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
 #include <asm/machdep.h>
 #include <asm/page.h>
 #include <asm/firmware.h>
 #include <asm/kexec.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/smp.h>
 
 #include "pseries.h"
-#include "xics.h"
 #include "plpar_wrappers.h"
 
 static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ca5d5898d32..39e6e0a7b2f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Make pHyp happy */
 	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
 		hpte_r &= ~_PAGE_COHERENT;
+	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+		flags |= H_COALESCE_CAND;
 
 	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
@@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 	unsigned long i, pix, rc;
 	unsigned long flags = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long param[9];
 	unsigned long va;
 	unsigned long hash, index, shift, hidx, slot;
@@ -771,3 +773,47 @@ out:
 	local_irq_restore(flags);
 }
 #endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+	rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+	mpp_x_data->coalesced_bytes = retbuf[0];
+	mpp_x_data->pool_coalesced_bytes = retbuf[1];
+	mpp_x_data->pool_purr_cycles = retbuf[2];
+	mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d9801117124..4bf21207d7d 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
 			lbuf[1]);
 }
 
-static inline long plpar_eoi(unsigned long xirr)
-{
-	return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-	return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-	return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_XIRR, retbuf, cppr);
-
-	*xirr_ret = retbuf[0];
-
-	return rc;
-}
-
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c55d7ad9c64..086d2ae4e06 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
 			   critical, __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
 			   __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 	struct rtas_error_log *h, *errhdr = NULL;
 
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
-		printk(KERN_ERR "FWNMI: corrupt r3\n");
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6c42cfde841..593acceeff9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -53,9 +53,9 @@
 #include <asm/irq.h>
 #include <asm/time.h>
 #include <asm/nvram.h>
-#include "xics.h"
 #include <asm/pmc.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/ppc-pci.h>
 #include <asm/i8259.h>
 #include <asm/udbg.h>
@@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void)
 		mpic_assign_isu(mpic, n, isuaddr);
 	}
 
+	/* Setup top-level get_irq */
+	ppc_md.get_irq = mpic_get_irq;
+
 	/* All ISUs are setup, complete initialization */
 	mpic_init(mpic);
 
@@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void)
 
 static void __init pseries_xics_init_IRQ(void)
 {
-	xics_init_IRQ();
+	xics_init();
 	pseries_setup_i8259_cascade();
 }
 
@@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void)
 		if (strstr(typep, "open-pic")) {
 			pSeries_mpic_node = of_node_get(np);
 			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
-			ppc_md.get_irq        = mpic_get_irq;
 			setup_kexec_cpu_down_mpic();
 			smp_init_pseries_mpic();
 			return;
@@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = {
 	.notifier_call = pci_dn_reconfig_notifier,
 };
 
+struct kmem_cache *dtl_cache;
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Allocate space for the dispatch trace log for all possible cpus
@@ -287,18 +291,12 @@ static int alloc_dispatch_logs(void)
 	int cpu, ret;
 	struct paca_struct *pp;
 	struct dtl_entry *dtl;
-	struct kmem_cache *dtl_cache;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
-	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
-						DISPATCH_LOG_BYTES, 0, NULL);
-	if (!dtl_cache) {
-		pr_warn("Failed to create dispatch trace log buffer cache\n");
-		pr_warn("Stolen time statistics will be unreliable\n");
+	if (!dtl_cache)
 		return 0;
-	}
 
 	for_each_possible_cpu(cpu) {
 		pp = &paca[cpu];
@@ -332,10 +330,27 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-
-early_initcall(alloc_dispatch_logs);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline int alloc_dispatch_logs(void)
+{
+	return 0;
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
+static int alloc_dispatch_log_kmem_cache(void)
+{
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, NULL);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
+	return alloc_dispatch_logs();
+}
+early_initcall(alloc_dispatch_log_kmem_cache);
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Discover PIC type and setup ppc_md accordingly */
@@ -403,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr)
 #define CMO_CHARACTERISTICS_TOKEN 44
 #define CMO_MAXLENGTH 1026
 
+void pSeries_coalesce_init(void)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+		powerpc_firmware_features |= FW_FEATURE_XCMO;
+	else
+		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
 /**
  * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
  * handle that here. (Stolen from parse_system_parameter_string)
@@ -472,6 +497,7 @@ void pSeries_cmo_feature_init(void)
 		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
 		powerpc_firmware_features |= FW_FEATURE_CMO;
+		pSeries_coalesce_init();
 	} else
 		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a509c5292a6..fbffd7e47ab 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,10 +44,11 @@
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
-#include "xics.h"
 #include "offline_states.h"
 
 
@@ -136,7 +137,6 @@ out:
 	return 1;
 }
 
-#ifdef CONFIG_XICS
 static void __devinit smp_xics_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
@@ -151,14 +151,13 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	set_default_offline_state(cpu);
 #endif
 }
-#endif /* CONFIG_XICS */
 
-static void __devinit smp_pSeries_kick_cpu(int nr)
+static int __devinit smp_pSeries_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -180,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
 						"Ret= %ld\n", nr, rc);
 	}
 #endif
+
+	return 0;
 }
 
 static int smp_pSeries_cpu_bootable(unsigned int nr)
@@ -197,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
 
 	return 1;
 }
-#ifdef CONFIG_MPIC
+
 static struct smp_ops_t pSeries_mpic_smp_ops = {
 	.message_pass	= smp_mpic_message_pass,
 	.probe		= smp_mpic_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_mpic_setup_cpu,
 };
-#endif
-#ifdef CONFIG_XICS
+
 static struct smp_ops_t pSeries_xics_smp_ops = {
-	.message_pass	= smp_xics_message_pass,
-	.probe		= smp_xics_probe,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
+	.probe		= xics_smp_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_xics_setup_cpu,
 	.cpu_bootable	= smp_pSeries_cpu_bootable,
 };
-#endif
 
 /* This is called very early */
 static void __init smp_init_pseries(void)
@@ -245,14 +245,12 @@ static void __init smp_init_pseries(void)
 	pr_debug(" <- smp_init_pSeries()\n");
 }
 
-#ifdef CONFIG_MPIC
 void __init smp_init_pseries_mpic(void)
 {
 	smp_ops = &pSeries_mpic_smp_ops;
 
 	smp_init_pseries();
 }
-#endif
 
 void __init smp_init_pseries_xics(void)
 {
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
deleted file mode 100644
index d6901334d66..00000000000
--- a/arch/powerpc/platforms/pseries/xics.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.c
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/radix-tree.h>
-#include <linux/cpu.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/percpu.h>
-
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
-
-#include "xics.h"
-#include "plpar_wrappers.h"
-
-static struct irq_host *xics_host;
-
-#define XICS_IPI		2
-#define XICS_IRQ_SPURIOUS	0
-
-/* Want a priority other than 0.  Various HW issues require this. */
-#define	DEFAULT_PRIORITY	5
-
-/*
- * Mark IPIs as higher priority so we can take them inside interrupts that
- * arent marked IRQF_DISABLED
- */
-#define IPI_PRIORITY		4
-
-/* The least favored priority */
-#define LOWEST_PRIORITY		0xFF
-
-/* The number of priorities defined above */
-#define MAX_NUM_PRIORITIES	3
-
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-struct xics_cppr {
-	unsigned char stack[MAX_NUM_PRIORITIES];
-	int index;
-};
-
-static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
-
-/* Direct hardware low level accessors */
-
-/* The part of the interrupt presentation layer that we care about */
-struct xics_ipl {
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr_poll;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr;
-	u32 dummy;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} qirr;
-};
-
-static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
-
-static inline unsigned int direct_xirr_info_get(void)
-{
-	int cpu = smp_processor_id();
-
-	return in_be32(&xics_per_cpu[cpu]->xirr.word);
-}
-
-static inline void direct_xirr_info_set(unsigned int value)
-{
-	int cpu = smp_processor_id();
-
-	out_be32(&xics_per_cpu[cpu]->xirr.word, value);
-}
-
-static inline void direct_cppr_info(u8 value)
-{
-	int cpu = smp_processor_id();
-
-	out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
-}
-
-static inline void direct_qirr_info(int n_cpu, u8 value)
-{
-	out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
-}
-
-
-/* LPAR low level accessors */
-
-static inline unsigned int lpar_xirr_info_get(unsigned char cppr)
-{
-	unsigned long lpar_rc;
-	unsigned long return_value;
-
-	lpar_rc = plpar_xirr(&return_value, cppr);
-	if (lpar_rc != H_SUCCESS)
-		panic(" bad return code xirr - rc = %lx\n", lpar_rc);
-	return (unsigned int)return_value;
-}
-
-static inline void lpar_xirr_info_set(unsigned int value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_eoi(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
-		      value);
-}
-
-static inline void lpar_cppr_info(u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_cppr(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code cppr - rc = %lx\n", lpar_rc);
-}
-
-static inline void lpar_qirr_info(int n_cpu , u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code qirr - rc = %lx\n", lpar_rc);
-}
-
-
-/* Interface to generic irq subsystem */
-
-#ifdef CONFIG_SMP
-/*
- * For the moment we only implement delivery to all cpus or one cpu.
- *
- * If the requested affinity is cpu_all_mask, we set global affinity.
- * If not we set it to the first cpu in the mask, even if multiple cpus
- * are set. This is so things like irqbalance (which set core and package
- * wide affinities) do the right thing.
- */
-static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
-			  unsigned int strict_check)
-{
-
-	if (!distribute_irqs)
-		return default_server;
-
-	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
-		int server = cpumask_first_and(cpu_online_mask, cpumask);
-
-		if (server < nr_cpu_ids)
-			return get_hard_smp_processor_id(server);
-
-		if (strict_check)
-			return -1;
-	}
-
-	/*
-	 * Workaround issue with some versions of JS20 firmware that
-	 * deliver interrupts to cpus which haven't been started. This
-	 * happens when using the maxcpus= boot option.
-	 */
-	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
-		return default_distrib_server;
-
-	return default_server;
-}
-#else
-#define get_irq_server(virq, cpumask, strict_check) (default_server)
-#endif
-
-static void xics_unmask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-	int call_status;
-	int server;
-
-	pr_devel("xics: unmask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	pr_devel(" -> map to hwirq 0x%x\n", hwirq);
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-
-	server = get_irq_server(d->irq, d->affinity, 0);
-
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
-				DEFAULT_PRIORITY);
-	if (call_status != 0) {
-		printk(KERN_ERR
-			"%s: ibm_set_xive irq %u server %x returned %d\n",
-			__func__, hwirq, server, call_status);
-		return;
-	}
-
-	/* Now unmask the interrupt (often a no-op) */
-	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static unsigned int xics_startup(struct irq_data *d)
-{
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (d->msi_desc)
-		unmask_msi_irq(d);
-
-	/* unmask it */
-	xics_unmask_irq(d);
-	return 0;
-}
-
-static void xics_mask_real_irq(unsigned int hwirq)
-{
-	int call_status;
-
-	if (hwirq == XICS_IPI)
-		return;
-
-	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-
-	/* Have to set XIVE to 0xff to be able to remove a slot */
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
-				default_server, 0xff);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static void xics_mask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-
-	pr_devel("xics: mask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-	xics_mask_real_irq(hwirq);
-}
-
-static void xics_mask_unknown_vec(unsigned int vec)
-{
-	printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
-	xics_mask_real_irq(vec);
-}
-
-static inline unsigned int xics_xirr_vector(unsigned int xirr)
-{
-	/*
-	 * The top byte is the old cppr, to be restored on EOI.
-	 * The remaining 24 bits are the vector.
-	 */
-	return xirr & 0x00ffffff;
-}
-
-static void push_cppr(unsigned int vec)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
-		return;
-
-	if (vec == XICS_IPI)
-		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
-	else
-		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
-}
-
-static unsigned int xics_get_irq_direct(void)
-{
-	unsigned int xirr = direct_xirr_info_get();
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have rtas mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	direct_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned int xics_get_irq_lpar(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]);
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have RTAS mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	lpar_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned char pop_cppr(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index < 1))
-		return LOWEST_PRIORITY;
-
-	return os_cppr->stack[--os_cppr->index];
-}
-
-static void xics_eoi_direct(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	direct_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static void xics_eoi_lpar(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static int
-xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
-{
-	unsigned int hwirq;
-	int status;
-	int xics_status[2];
-	int irq_server;
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	irq_server = get_irq_server(d->irq, cpumask, 1);
-	if (irq_server == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		printk(KERN_WARNING
-			"%s: No online cpus in the mask %s for irq %d\n",
-			__func__, cpulist, d->irq);
-		return -1;
-	}
-
-	status = rtas_call(ibm_set_xive, 3, 1, NULL,
-				hwirq, irq_server, xics_status[1]);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	return 0;
-}
-
-static struct irq_chip xics_pic_direct = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_direct,
-	.irq_set_affinity = xics_set_affinity
-};
-
-static struct irq_chip xics_pic_lpar = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_lpar,
-	.irq_set_affinity = xics_set_affinity
-};
-
-
-/* Interface to arch irq controller subsystem layer */
-
-/* Points to the irq_chip we're actually using */
-static struct irq_chip *xics_irq_chip;
-
-static int xics_host_match(struct irq_host *h, struct device_node *node)
-{
-	/* IBM machines have interrupt parents of various funky types for things
-	 * like vdevices, events, etc... The trick we use here is to match
-	 * everything here except the legacy 8259 which is compatible "chrp,iic"
-	 */
-	return !of_device_is_compatible(node, "chrp,iic");
-}
-
-static int xics_host_map(struct irq_host *h, unsigned int virq,
-			 irq_hw_number_t hw)
-{
-	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
-
-	/* Insert the interrupt mapping into the radix tree for fast lookup */
-	irq_radix_revmap_insert(xics_host, virq, hw);
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-	irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
-	return 0;
-}
-
-static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
-			   const u32 *intspec, unsigned int intsize,
-			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
-
-{
-	/* Current xics implementation translates everything
-	 * to level. It is not technically right for MSIs but this
-	 * is irrelevant at this point. We might get smarter in the future
-	 */
-	*out_hwirq = intspec[0];
-	*out_flags = IRQ_TYPE_LEVEL_LOW;
-
-	return 0;
-}
-
-static struct irq_host_ops xics_host_ops = {
-	.match = xics_host_match,
-	.map = xics_host_map,
-	.xlate = xics_host_xlate,
-};
-
-static void __init xics_init_host(void)
-{
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		xics_irq_chip = &xics_pic_lpar;
-	else
-		xics_irq_chip = &xics_pic_direct;
-
-	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
-				   XICS_IRQ_SPURIOUS);
-	BUG_ON(xics_host == NULL);
-	irq_set_default_host(xics_host);
-}
-
-
-/* Inter-processor interrupt support */
-
-#ifdef CONFIG_SMP
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-static inline void smp_xics_do_message(int cpu, int msg)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	set_bit(msg, tgt);
-	mb();
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, IPI_PRIORITY);
-	else
-		direct_qirr_info(cpu, IPI_PRIORITY);
-}
-
-void smp_xics_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		smp_xics_do_message(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			smp_xics_do_message(i, msg);
-		}
-	}
-}
-
-static irqreturn_t xics_ipi_dispatch(int cpu)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	mb();	/* order mmio clearing qirr */
-	while (*tgt) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNCTION);
-		}
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
-			smp_message_recv(PPC_MSG_RESCHEDULE);
-		}
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
-		}
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
-			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
-		}
-#endif
-	}
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	direct_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	lpar_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static void xics_request_ipi(void)
-{
-	unsigned int ipi;
-	int rc;
-
-	ipi = irq_create_mapping(xics_host, XICS_IPI);
-	BUG_ON(ipi == NO_IRQ);
-
-	/*
-	 * IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled
-	 */
-	irq_set_handler(ipi, handle_percpu_irq);
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		rc = request_irq(ipi, xics_ipi_action_lpar,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	else
-		rc = request_irq(ipi, xics_ipi_action_direct,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	BUG_ON(rc);
-}
-
-int __init smp_xics_probe(void)
-{
-	xics_request_ipi();
-
-	return cpumask_weight(cpu_possible_mask);
-}
-
-#endif /* CONFIG_SMP */
-
-
-/* Initialization */
-
-static void xics_update_irq_servers(void)
-{
-	int i, j;
-	struct device_node *np;
-	u32 ilen;
-	const u32 *ireg;
-	u32 hcpuid;
-
-	/* Find the server numbers for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
-	if (!ireg) {
-		of_node_put(np);
-		return;
-	}
-
-	i = ilen / sizeof(int);
-	hcpuid = get_hard_smp_processor_id(boot_cpuid);
-
-	/* Global interrupt distribution server is specified in the last
-	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
-	 * entry fom this property for current boot cpu id and use it as
-	 * default distribution server
-	 */
-	for (j = 0; j < i; j += 2) {
-		if (ireg[j] == hcpuid) {
-			default_server = hcpuid;
-			default_distrib_server = ireg[j+1];
-		}
-	}
-
-	of_node_put(np);
-}
-
-static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
-				     unsigned long size)
-{
-	int i;
-
-	/* This may look gross but it's good enough for now, we don't quite
-	 * have a hard -> linux processor id matching.
-	 */
-	for_each_possible_cpu(i) {
-		if (!cpu_present(i))
-			continue;
-		if (hw_id == get_hard_smp_processor_id(i)) {
-			xics_per_cpu[i] = ioremap(addr, size);
-			return;
-		}
-	}
-}
-
-static void __init xics_init_one_node(struct device_node *np,
-				      unsigned int *indx)
-{
-	unsigned int ilen;
-	const u32 *ireg;
-
-	/* This code does the theorically broken assumption that the interrupt
-	 * server numbers are the same as the hard CPU numbers.
-	 * This happens to be the case so far but we are playing with fire...
-	 * should be fixed one of these days. -BenH.
-	 */
-	ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
-
-	/* Do that ever happen ? we'll know soon enough... but even good'old
-	 * f80 does have that property ..
-	 */
-	WARN_ON(ireg == NULL);
-	if (ireg) {
-		/*
-		 * set node starting index for this node
-		 */
-		*indx = *ireg;
-	}
-	ireg = of_get_property(np, "reg", &ilen);
-	if (!ireg)
-		panic("xics_init_IRQ: can't find interrupt reg property");
-
-	while (ilen >= (4 * sizeof(u32))) {
-		unsigned long addr, size;
-
-		/* XXX Use proper OF parsing code here !!! */
-		addr = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		addr |= *ireg++;
-		ilen -= sizeof(u32);
-		size = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		size |= *ireg++;
-		ilen -= sizeof(u32);
-		xics_map_one_cpu(*indx, addr, size);
-		(*indx)++;
-	}
-}
-
-void __init xics_init_IRQ(void)
-{
-	struct device_node *np;
-	u32 indx = 0;
-	int found = 0;
-	const u32 *isize;
-
-	ppc64_boot_msg(0x20, "XICS Init");
-
-	ibm_get_xive = rtas_token("ibm,get-xive");
-	ibm_set_xive = rtas_token("ibm,set-xive");
-	ibm_int_on  = rtas_token("ibm,int-on");
-	ibm_int_off = rtas_token("ibm,int-off");
-
-	for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
-		found = 1;
-		if (firmware_has_feature(FW_FEATURE_LPAR)) {
-			of_node_put(np);
-			break;
-			}
-		xics_init_one_node(np, &indx);
-	}
-	if (found == 0)
-		return;
-
-	/* get the bit size of server numbers */
-	found = 0;
-
-	for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
-		isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-
-		if (!isize)
-			continue;
-
-		if (!found) {
-			interrupt_server_size = *isize;
-			found = 1;
-		} else if (*isize != interrupt_server_size) {
-			printk(KERN_WARNING "XICS: "
-			       "mismatched ibm,interrupt-server#-size\n");
-			interrupt_server_size = max(*isize,
-						    interrupt_server_size);
-		}
-	}
-
-	xics_update_irq_servers();
-	xics_init_host();
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		ppc_md.get_irq = xics_get_irq_lpar;
-	else
-		ppc_md.get_irq = xics_get_irq_direct;
-
-	xics_setup_cpu();
-
-	ppc64_boot_msg(0x21, "XICS Done");
-}
-
-/* Cpu startup, shutdown, and hotplug */
-
-static void xics_set_cpu_priority(unsigned char cppr)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	/*
-	 * we only really want to set the priority when there's
-	 * just one cppr value on the stack
-	 */
-	WARN_ON(os_cppr->index != 0);
-
-	os_cppr->stack[0] = cppr;
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_cppr_info(cppr);
-	else
-		direct_cppr_info(cppr);
-	iosync();
-}
-
-/* Have the calling processor join or leave the specified global queue */
-static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
-{
-	int index;
-	int status;
-
-	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
-		return;
-
-	index = (1UL << interrupt_server_size) - 1 - gserver;
-
-	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
-
-	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
-	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
-}
-
-void xics_setup_cpu(void)
-{
-	xics_set_cpu_priority(LOWEST_PRIORITY);
-
-	xics_set_cpu_giq(default_distrib_server, 1);
-}
-
-void xics_teardown_cpu(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	int cpu = smp_processor_id();
-
-	/*
-	 * we have to reset the cppr index to 0 because we're
-	 * not going to return from the IPI
-	 */
-	os_cppr->index = 0;
-	xics_set_cpu_priority(0);
-
-	/* Clear any pending IPI request */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, 0xff);
-	else
-		direct_qirr_info(cpu, 0xff);
-}
-
-void xics_kexec_teardown_cpu(int secondary)
-{
-	xics_teardown_cpu();
-
-	/*
-	 * we take the ipi irq but and never return so we
-	 * need to EOI the IPI, but want to leave our priority 0
-	 *
-	 * should we check all the other interrupts too?
-	 * should we be flagging idle loop instead?
-	 * or creating some task to be scheduled?
-	 */
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_xirr_info_set((0x00 << 24) | XICS_IPI);
-	else
-		direct_xirr_info_set((0x00 << 24) | XICS_IPI);
-
-	/*
-	 * Some machines need to have at least one cpu in the GIQ,
-	 * so leave the master cpu in the group.
-	 */
-	if (secondary)
-		xics_set_cpu_giq(default_distrib_server, 0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Interrupts are disabled. */
-void xics_migrate_irqs_away(void)
-{
-	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
-	int virq;
-
-	/* If we used to be the default server, move to the new "boot_cpuid" */
-	if (hw_cpu == default_server)
-		xics_update_irq_servers();
-
-	/* Reject any interrupt that was queued to us... */
-	xics_set_cpu_priority(0);
-
-	/* Remove ourselves from the global interrupt queue */
-	xics_set_cpu_giq(default_distrib_server, 0);
-
-	/* Allow IPIs again... */
-	xics_set_cpu_priority(DEFAULT_PRIORITY);
-
-	for_each_irq(virq) {
-		struct irq_desc *desc;
-		struct irq_chip *chip;
-		unsigned int hwirq;
-		int xics_status[2];
-		int status;
-		unsigned long flags;
-
-		/* We can't set affinity on ISA interrupts */
-		if (virq < NUM_ISA_INTERRUPTS)
-			continue;
-		if (irq_map[virq].host != xics_host)
-			continue;
-		hwirq = (unsigned int)irq_map[virq].hwirq;
-		/* We need to get IPIs still. */
-		if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-			continue;
-
-		desc = irq_to_desc(virq);
-
-		/* We only need to migrate enabled IRQS */
-		if (desc == NULL || desc->action == NULL)
-			continue;
-
-		chip = irq_desc_get_chip(desc);
-		if (chip == NULL || chip->irq_set_affinity == NULL)
-			continue;
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-
-		status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-		if (status) {
-			printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-					__func__, hwirq, status);
-			goto unlock;
-		}
-
-		/*
-		 * We only support delivery to all cpus or to one cpu.
-		 * The irq has to be migrated only in the single cpu
-		 * case.
-		 */
-		if (xics_status[0] != hw_cpu)
-			goto unlock;
-
-		/* This is expected during cpu offline. */
-		if (cpu_online(cpu))
-			printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
-			       virq, cpu);
-
-		/* Reset affinity to all cpus */
-		cpumask_setall(desc->irq_data.affinity);
-		chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
-unlock:
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	}
-}
-#endif
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
deleted file mode 100644
index d1d5a83039a..00000000000
--- a/arch/powerpc/platforms/pseries/xics.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.h
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef _POWERPC_KERNEL_XICS_H
-#define _POWERPC_KERNEL_XICS_H
-
-extern void xics_init_IRQ(void);
-extern void xics_setup_cpu(void);
-extern void xics_teardown_cpu(void);
-extern void xics_kexec_teardown_cpu(int secondary);
-extern void xics_migrate_irqs_away(void);
-extern int smp_xics_probe(void);
-extern void smp_xics_message_pass(int target, int msg);
-
-#endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
new file mode 100644
index 00000000000..c3c48eb62cc
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Kconfig
@@ -0,0 +1,28 @@
+config PPC_WSP
+	bool
+	default n
+
+menu "WSP platform selection"
+	depends on PPC_BOOK3E_64
+
+config PPC_PSR2
+	bool "PSR-2 platform"
+	select PPC_A2
+	select GENERIC_TBSYNC
+	select PPC_SCOM
+	select EPAPR_BOOT
+	select PPC_WSP
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	default y
+
+endmenu
+
+config PPC_A2_DD2
+	bool "Support for DD2 based A2/WSP systems"
+	depends on PPC_A2
+
+config WORKAROUND_ERRATUM_463
+	depends on PPC_A2_DD2
+	bool "Workaround erratum 463"
+	default y
diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
new file mode 100644
index 00000000000..095be73d6cd
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Makefile
@@ -0,0 +1,6 @@
+ccflags-y			+= -mno-minimal-toc
+
+obj-y				+= setup.o ics.o
+obj-$(CONFIG_PPC_PSR2)		+= psr2.o opb_pic.o
+obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
+obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
new file mode 100644
index 00000000000..e53bd9e7b12
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright 2008-2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/xics.h>
+
+#include "wsp.h"
+#include "ics.h"
+
+
+/* WSP ICS */
+
+struct wsp_ics {
+	struct ics ics;
+	struct device_node *dn;
+	void __iomem *regs;
+	spinlock_t lock;
+	unsigned long *bitmap;
+	u32 chip_id;
+	u32 lsi_base;
+	u32 lsi_count;
+	u64 hwirq_start;
+	u64 count;
+#ifdef CONFIG_SMP
+	int *hwirq_cpu_map;
+#endif
+};
+
+#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
+
+#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
+#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
+#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
+#define XIVE_UPDATE_REG(base)		((base) + 0x28)
+#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
+
+#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
+#define TBL_SELECT_XIST		(1UL << 48)
+#define TBL_SELECT_XIVT		(1UL << 49)
+
+#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
+
+#define XIST_REQUIRED		0x8
+#define XIST_REJECTED		0x4
+#define XIST_PRESENTED		0x2
+#define XIST_PENDING		0x1
+
+#define XIVE_SERVER_SHIFT	42
+#define XIVE_SERVER_MASK	0xFFFFULL
+#define XIVE_PRIORITY_MASK	0xFFULL
+#define XIVE_PRIORITY_SHIFT	32
+#define XIVE_WRITE_ENABLE	(1ULL << 63)
+
+/*
+ * The docs refer to a 6 bit field called ChipID, which consists of a
+ * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
+ * so we ignore it, and every where we use "chip id" in this code we
+ * mean the NodeID.
+ */
+#define WSP_ICS_CHIP_SHIFT		17
+
+
+static struct wsp_ics *ics_list;
+static int num_ics;
+
+/* ICS Source controller accessors */
+
+static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
+{
+	unsigned long flags;
+	u64 xive;
+
+	spin_lock_irqsave(&ics->lock, flags);
+	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
+	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
+	spin_unlock_irqrestore(&ics->lock, flags);
+
+	return xive;
+}
+
+static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
+{
+	xive &= ~XIVE_ADDR_MASK;
+	xive |= (irq & XIVE_ADDR_MASK);
+	xive |= XIVE_WRITE_ENABLE;
+
+	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
+}
+
+static u64 xive_set_server(u64 xive, unsigned int server)
+{
+	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
+
+	xive &= mask;
+	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
+
+	return xive;
+}
+
+static u64 xive_set_priority(u64 xive, unsigned int priority)
+{
+	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
+
+	xive &= mask;
+	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
+
+	return xive;
+}
+
+
+#ifdef CONFIG_SMP
+/* Find logical CPUs within mask on a given chip and store result in ret */
+void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
+{
+	int cpu, chip;
+	struct device_node *cpu_dn, *dn;
+	const u32 *prop;
+
+	cpumask_clear(ret);
+	for_each_cpu(cpu, mask) {
+		cpu_dn = of_get_cpu_node(cpu, NULL);
+		if (!cpu_dn)
+			continue;
+
+		prop = of_get_property(cpu_dn, "at-node", NULL);
+		if (!prop) {
+			of_node_put(cpu_dn);
+			continue;
+		}
+
+		dn = of_find_node_by_phandle(*prop);
+		of_node_put(cpu_dn);
+
+		chip = wsp_get_chip_id(dn);
+		if (chip == chip_id)
+			cpumask_set_cpu(cpu, ret);
+
+		of_node_put(dn);
+	}
+}
+
+/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	cpumask_var_t avail, newmask;
+	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
+	int index = hwirq - ics->hwirq_start;
+	unsigned int nodeid;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return -ENOMEM;
+
+	if (!distribute_irqs) {
+		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
+		return 0;
+	}
+
+	/* Allocate needed CPU masks */
+	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
+		goto ret;
+	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+		goto freeavail;
+
+	/* Find PBus attached to the source of this IRQ */
+	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
+
+	/* Find CPUs that could handle this IRQ */
+	if (affinity)
+		cpumask_and(avail, cpu_online_mask, affinity);
+	else
+		cpumask_copy(avail, cpu_online_mask);
+
+	/* Narrow selection down to logical CPUs on the same chip */
+	cpus_on_chip(nodeid, avail, newmask);
+
+	/* Ensure we haven't narrowed it down to 0 */
+	if (unlikely(cpumask_empty(newmask))) {
+		if (unlikely(cpumask_empty(avail))) {
+			ret = -1;
+			goto out;
+		}
+		cpumask_copy(newmask, avail);
+	}
+
+	/* Choose a CPU out of those we narrowed it down to in round robin */
+	target = hwirq % cpumask_weight(newmask);
+	for_each_cpu(cpu, newmask) {
+		if (cpu_rover++ >= target) {
+			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
+			ret = 0;
+			goto out;
+		}
+	}
+
+	/* Shouldn't happen */
+	WARN_ON(1);
+
+out:
+	free_cpumask_var(newmask);
+freeavail:
+	free_cpumask_var(avail);
+ret:
+	if (ret < 0) {
+		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
+		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
+			   hwirq, ics->hwirq_cpu_map[index]);
+	}
+	return ret;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics)
+{
+	int i;
+
+	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
+	if (!ics->hwirq_cpu_map) {
+		pr_warning("Allocate hwirq_cpu_map failed, "
+			   "IRQ balancing disabled\n");
+		return;
+	}
+
+	for (i=0; i < ics->count; i++)
+		ics->hwirq_cpu_map[i] = xics_default_server;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	int index = hwirq - ics->hwirq_start;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return xics_default_server;
+
+	return ics->hwirq_cpu_map[index];
+}
+#else /* !CONFIG_SMP */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	return 0;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	return xics_default_server;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics) { }
+#endif
+
+static void wsp_chip_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int server;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return;
+
+	server = get_irq_server(ics, hw_irq);
+
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, server);
+	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static unsigned int wsp_chip_startup(struct irq_data *d)
+{
+	/* unmask it */
+	wsp_chip_unmask_irq(d);
+	return 0;
+}
+
+static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
+{
+	u64 xive;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	if (WARN_ON(!ics))
+		return;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, xics_default_server);
+	xive = xive_set_priority(xive, LOWEST_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static void wsp_chip_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics = d->chip_data;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	wsp_mask_real_irq(hw_irq, ics);
+}
+
+static int wsp_chip_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask, bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int ret;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return -1;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+
+	/*
+	 * For the moment only implement delivery to all cpus or one cpu.
+	 * Get current irq_server for the given irq
+	 */
+	ret = cache_hwirq_map(ics, d->irq, cpumask);
+	if (ret == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
+			   __func__, cpulist, d->irq);
+		return -1;
+	} else if (ret == -ENOMEM) {
+		pr_warning("%s: Out of memory\n", __func__);
+		return -1;
+	}
+
+	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
+	wsp_ics_set_xive(ics, hw_irq, xive);
+
+	return 0;
+}
+
+static struct irq_chip wsp_irq_chip = {
+	.name = "WSP ICS",
+	.irq_startup		= wsp_chip_startup,
+	.irq_mask		= wsp_chip_mask_irq,
+	.irq_unmask		= wsp_chip_unmask_irq,
+	.irq_set_affinity	= wsp_chip_set_affinity
+};
+
+static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
+{
+	/* All ICSs in the system implement a global irq number space,
+	 * so match against them all. */
+	return of_device_is_compatible(dn, "ibm,ppc-xics");
+}
+
+static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
+{
+	if (hwirq >= wsp_ics->hwirq_start &&
+	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
+		return 1;
+
+	return 0;
+}
+
+static int wsp_ics_map(struct ics *ics, unsigned int virq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+	unsigned int hw_irq = virq_to_hw(virq);
+	unsigned long flags;
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
+
+	irq_set_chip_data(virq, wsp_ics);
+
+	spin_lock_irqsave(&wsp_ics->lock, flags);
+	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
+	spin_unlock_irqrestore(&wsp_ics->lock, flags);
+
+	return 0;
+}
+
+static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return;
+
+	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
+	wsp_mask_real_irq(hw_irq, wsp_ics);
+}
+
+static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	return get_irq_server(wsp_ics, hw_irq);
+}
+
+/* HW Number allocation API */
+
+static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
+{
+	struct device_node *iparent;
+	int i;
+
+	iparent = of_irq_find_parent(dn);
+	if (!iparent) {
+		pr_err("wsp_ics: Failed to find interrupt parent!\n");
+		return NULL;
+	}
+
+	for(i = 0; i < num_ics; i++) {
+		if(ics_list[i].dn == iparent)
+			break;
+	}
+
+	if (i >= num_ics) {
+		pr_err("wsp_ics: Unable to find parent bitmap!\n");
+		return NULL;
+	}
+
+	return &ics_list[i];
+}
+
+int wsp_ics_alloc_irq(struct device_node *dn, int num)
+{
+	struct wsp_ics *ics;
+	int order, offset;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (!ics)
+		return -ENODEV;
+
+	/* Fast, but overly strict if num isn't a power of two */
+	order = get_count_order(num);
+
+	spin_lock_irq(&ics->lock);
+	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
+	spin_unlock_irq(&ics->lock);
+
+	if (offset < 0)
+		return offset;
+
+	return offset + ics->hwirq_start;
+}
+
+void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
+{
+	struct wsp_ics *ics;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (WARN_ON(!ics))
+		return;
+
+	spin_lock_irq(&ics->lock);
+	bitmap_release_region(ics->bitmap, irq, 0);
+	spin_unlock_irq(&ics->lock);
+}
+
+/* Initialisation */
+
+static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
+				      struct device_node *dn)
+{
+	int len, i, j, size;
+	u32 start, count;
+	const u32 *p;
+
+	size = BITS_TO_LONGS(ics->count) * sizeof(long);
+	ics->bitmap = kzalloc(size, GFP_KERNEL);
+	if (!ics->bitmap) {
+		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&ics->lock);
+
+	p = of_get_property(dn, "available-ranges", &len);
+	if (!p || !len) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: No available-ranges defined for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: Invalid available-ranges for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	bitmap_fill(ics->bitmap, ics->count);
+
+	for (i = 0; i < len / sizeof(u32); i += 2) {
+		start = of_read_number(p + i, 1);
+		count = of_read_number(p + i + 1, 1);
+
+		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
+
+		if ((start + count) > (ics->hwirq_start + ics->count) ||
+		     start < ics->hwirq_start) {
+			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
+					start, start + count);
+			break;
+		}
+
+		for (j = 0; j < count; j++)
+			bitmap_release_region(ics->bitmap,
+				(start + j) - ics->hwirq_start, 0);
+	}
+
+	/* Ensure LSIs are not available for allocation */
+	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
+			       get_count_order(ics->lsi_count));
+
+	return 0;
+}
+
+static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
+{
+	u32 lsi_buid, msi_buid, msi_base, msi_count;
+	void __iomem *regs;
+	const u32 *p;
+	int rc, len, i;
+	u64 caps, buid;
+
+	p = of_get_property(dn, "interrupt-ranges", &len);
+	if (!p || len < (2 * sizeof(u32))) {
+		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
+			dn->full_name);
+		return -ENOENT;
+	}
+
+	if (len > (2 * sizeof(u32))) {
+		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
+		return -EINVAL;
+	}
+
+	regs = of_iomap(dn, 0);
+	if (!regs) {
+		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
+		return -ENXIO;
+	}
+
+	ics->hwirq_start = of_read_number(p, 1);
+	ics->count = of_read_number(p + 1, 1);
+	ics->regs = regs;
+
+	ics->chip_id = wsp_get_chip_id(dn);
+	if (WARN_ON(ics->chip_id < 0))
+		ics->chip_id = 0;
+
+	/* Get some informations about the critter */
+	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
+	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
+	ics->lsi_count = caps >> 56;
+	msi_count = (caps >> 44) & 0x7ff;
+
+	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
+	 * rest is mixed in the interrupt number. We store the whole
+	 * thing though
+	 */
+	lsi_buid = (buid >> 48) & 0x1ff;
+	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
+	msi_buid = (buid >> 37) & 0x7;
+	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
+
+	pr_info("wsp_ics: Found %s\n", dn->full_name);
+	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
+		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
+	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
+		ics->lsi_count, ics->lsi_base,
+		ics->lsi_base + ics->lsi_count - 1);
+	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
+		msi_count, msi_base,
+		msi_base + msi_count - 1);
+
+	/* Let's check the HW config is sane */
+	if (ics->lsi_base < ics->hwirq_start ||
+	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
+	if (msi_base < ics->hwirq_start ||
+	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
+
+	/* We don't check for overlap between LSI and MSI, which will happen
+	 * if we use the same BUID, I'm not sure yet how legit that is.
+	 */
+
+	rc = wsp_ics_bitmap_setup(ics, dn);
+	if (rc) {
+		iounmap(regs);
+		return rc;
+	}
+
+	ics->dn = of_node_get(dn);
+	alloc_irq_map(ics);
+
+	for(i = 0; i < ics->count; i++)
+		wsp_mask_real_irq(ics->hwirq_start + i, ics);
+
+	ics->ics.map = wsp_ics_map;
+	ics->ics.mask_unknown = wsp_ics_mask_unknown;
+	ics->ics.get_server = wsp_ics_get_server;
+	ics->ics.host_match = wsp_ics_host_match;
+
+	xics_register_ics(&ics->ics);
+
+	return 0;
+}
+
+static void __init wsp_ics_set_default_server(void)
+{
+	struct device_node *np;
+	u32 hwid;
+
+	/* Find the server number for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hwid = get_hard_smp_processor_id(boot_cpuid);
+
+	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
+	xics_default_server = hwid;
+
+	of_node_put(np);
+}
+
+static int __init wsp_ics_init(void)
+{
+	struct device_node *dn;
+	struct wsp_ics *ics;
+	int rc, found;
+
+	wsp_ics_set_default_server();
+
+	found = 0;
+	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
+		found++;
+
+	if (found == 0) {
+		pr_err("wsp_ics: No ICS's found!\n");
+		return -ENODEV;
+	}
+
+	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
+	if (!ics_list) {
+		pr_err("wsp_ics: No memory for structs.\n");
+		return -ENOMEM;
+	}
+
+	num_ics = 0;
+	ics = ics_list;
+	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
+		rc = wsp_ics_setup(ics, dn);
+		if (rc == 0) {
+			ics++;
+			num_ics++;
+		}
+	}
+
+	if (found != num_ics) {
+		pr_err("wsp_ics: Failed setting up %d ICS's\n",
+			found - num_ics);
+		return -1;
+	}
+
+	return 0;
+}
+
+void __init wsp_init_irq(void)
+{
+	wsp_ics_init();
+	xics_init();
+
+	/* We need to patch our irq chip's EOI to point to the right ICP */
+	wsp_irq_chip.irq_eoi = icp_ops->eoi;
+}
diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
new file mode 100644
index 00000000000..e34d5310264
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2009 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ICS_H
+#define __ICS_H
+
+#define XIVE_ADDR_MASK		0x7FFULL
+
+extern void wsp_init_irq(void);
+
+extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
+extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
+
+#endif /* __ICS_H */
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
new file mode 100644
index 00000000000..be05631a3c1
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -0,0 +1,332 @@
+/*
+ * IBM Onboard Peripheral Bus Interrupt Controller
+ *
+ * Copyright 2010 Jack Miller, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include <asm/reg_a2.h>
+#include <asm/irq.h>
+
+#define OPB_NR_IRQS 32
+
+#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
+#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
+#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
+#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
+#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
+
+static int opb_index = 0;
+
+struct opb_pic {
+	struct irq_host *host;
+	void *regs;
+	int index;
+	spinlock_t lock;
+};
+
+static u32 opb_in(struct opb_pic *opb, int offset)
+{
+	return in_be32(opb->regs + offset);
+}
+
+static void opb_out(struct opb_pic *opb, int offset, u32 val)
+{
+	out_be32(opb->regs + offset, val);
+}
+
+static void opb_unmask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier | bitset);
+	ier = opb_in(opb, OPB_MLSIER);
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, mask;
+
+	opb = d->chip_data;
+	mask = ~(1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & mask);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+	u32 ier, ir;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & ~bitset);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	int invert, ipr, mask, bit;
+
+	opb = d->chip_data;
+
+	/* The only information we're interested in in the type is whether it's
+	 * a high or low trigger. For high triggered interrupts, the polarity
+	 * set for it in the MLS Interrupt Polarity Register is 0, for low
+	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
+	 * Register is interrupted as asserting the interrupt. */
+
+	switch (flow) {
+		case IRQ_TYPE_NONE:
+			opb_mask_irq(d);
+			return 0;
+
+		case IRQ_TYPE_LEVEL_HIGH:
+			invert = 0;
+			break;
+
+		case IRQ_TYPE_LEVEL_LOW:
+			invert = 1;
+			break;
+
+		default:
+			return -EINVAL;
+	}
+
+	bit = (1 << (31 - irqd_to_hwirq(d)));
+	mask = ~bit;
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ipr = opb_in(opb, OPB_MLSIPR);
+	ipr = (ipr & mask) | (invert ? bit : 0);
+	opb_out(opb, OPB_MLSIPR, ipr);
+	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+
+	/* Record the type in the interrupt descriptor */
+	irqd_set_trigger_type(d, flow);
+
+	return 0;
+}
+
+static struct irq_chip opb_irq_chip = {
+	.name		= "OPB",
+	.irq_mask	= opb_mask_irq,
+	.irq_unmask	= opb_unmask_irq,
+	.irq_mask_ack	= opb_mask_ack_irq,
+	.irq_ack	= opb_ack_irq,
+	.irq_set_type	= opb_set_irq_type
+};
+
+static int opb_host_map(struct irq_host *host, unsigned int virq,
+		irq_hw_number_t hwirq)
+{
+	struct opb_pic *opb;
+
+	opb = host->host_data;
+
+	/* Most of the important stuff is handled by the generic host code, like
+	 * the lookup, so just attach some info to the virtual irq */
+
+	irq_set_chip_data(virq, opb);
+	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int opb_host_xlate(struct irq_host *host, struct device_node *dn,
+		const u32 *intspec, unsigned int intsize,
+		irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+	/* Interrupt size must == 2 */
+	BUG_ON(intsize != 2);
+	*out_hwirq = intspec[0];
+	*out_type = intspec[1];
+	return 0;
+}
+
+static struct irq_host_ops opb_host_ops = {
+	.map = opb_host_map,
+	.xlate = opb_host_xlate,
+};
+
+irqreturn_t opb_irq_handler(int irq, void *private)
+{
+	struct opb_pic *opb;
+	u32 ir, src, subvirq;
+
+	opb = (struct opb_pic *) private;
+
+	/* Read the OPB MLS Interrupt Register for
+	 * asserted interrupts */
+	ir = opb_in(opb, OPB_MLSIR);
+	if (!ir)
+		return IRQ_NONE;
+
+	do {
+		/* Get 1 - 32 source, *NOT* bit */
+		src = 32 - ffs(ir);
+
+		/* Translate from the OPB's conception of interrupt number to
+		 * Linux's virtual IRQ */
+
+		subvirq = irq_linear_revmap(opb->host, src);
+
+		generic_handle_irq(subvirq);
+	} while ((ir = opb_in(opb, OPB_MLSIR)));
+
+	return IRQ_HANDLED;
+}
+
+struct opb_pic *opb_pic_init_one(struct device_node *dn)
+{
+	struct opb_pic *opb;
+	struct resource res;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		printk(KERN_ERR "opb: Couldn't translate resource\n");
+		return  NULL;
+	}
+
+	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
+	if (!opb) {
+		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
+		return NULL;
+	}
+
+	/* Get access to the OPB MMIO registers */
+	opb->regs = ioremap(res.start + 0x10000, 0x1000);
+	if (!opb->regs) {
+		printk(KERN_ERR "opb: Failed to allocate register space!\n");
+		goto free_opb;
+	}
+
+	/* Allocate an irq host so that Linux knows that despite only
+	 * having one interrupt to issue, we're the controller for multiple
+	 * hardware IRQs, so later we can lookup their virtual IRQs. */
+
+	opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR,
+			OPB_NR_IRQS, &opb_host_ops, -1);
+
+	if (!opb->host) {
+		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
+		goto free_regs;
+	}
+
+	opb->index = opb_index++;
+	spin_lock_init(&opb->lock);
+	opb->host->host_data = opb;
+
+	/* Disable all interrupts by default */
+	opb_out(opb, OPB_MLSASIER, 0);
+	opb_out(opb, OPB_MLSIER, 0);
+
+	/* ACK any interrupts left by FW */
+	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
+
+	return opb;
+
+free_regs:
+	iounmap(opb->regs);
+free_opb:
+	kfree(opb);
+	return NULL;
+}
+
+void __init opb_pic_init(void)
+{
+	struct device_node *dn;
+	struct opb_pic *opb;
+	int virq;
+	int rc;
+
+	/* Call init_one for each OPB device */
+	for_each_compatible_node(dn, NULL, "ibm,opb") {
+
+		/* Fill in an OPB struct */
+		opb = opb_pic_init_one(dn);
+		if (!opb) {
+			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
+			continue;
+		}
+
+		/* Map / get opb's hardware virtual irq */
+		virq = irq_of_parse_and_map(dn, 0);
+		if (virq <= 0) {
+			printk("opb: irq_op_parse_and_map failed!\n");
+			continue;
+		}
+
+		/* Attach opb interrupt handler to new virtual IRQ */
+		rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb);
+		if (rc) {
+			printk("opb: request_irq failed: %d\n", rc);
+			continue;
+		}
+
+		printk("OPB%d init with %d IRQs at %p\n", opb->index,
+				OPB_NR_IRQS, opb->regs);
+	}
+}
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
new file mode 100644
index 00000000000..40f28916ff6
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2008-2011, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+
+static void psr2_spin(void)
+{
+	hard_irq_disable();
+	for (;;) ;
+}
+
+static void psr2_restart(char *cmd)
+{
+	psr2_spin();
+}
+
+static int psr2_probe_devices(void)
+{
+	struct device_node *np;
+
+	/* Our RTC is a ds1500. It seems to be programatically compatible
+	 * with the ds1511 for which we have a driver so let's use that
+	 */
+	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
+	if (np != NULL) {
+		struct resource res;
+		if (of_address_to_resource(np, 0, &res) == 0)
+			platform_device_register_simple("ds1511", 0, &res, 1);
+	}
+	return 0;
+}
+machine_arch_initcall(psr2_md, psr2_probe_devices);
+
+static void __init psr2_setup_arch(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	scom_init_wsp();
+
+	/* Setup SMP callback */
+#ifdef CONFIG_SMP
+	a2_setup_smp();
+#endif
+}
+
+static int __init psr2_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
+		return 0;
+
+	return 1;
+}
+
+static void __init psr2_init_irq(void)
+{
+	wsp_init_irq();
+	opb_pic_init();
+}
+
+define_machine(psr2_md) {
+	.name			= "PSR2 A2",
+	.probe			= psr2_probe,
+	.setup_arch		= psr2_setup_arch,
+	.restart		= psr2_restart,
+	.power_off		= psr2_spin,
+	.halt			= psr2_spin,
+	.calibrate_decr		= generic_calibrate_decr,
+	.init_IRQ		= psr2_init_irq,
+	.progress		= udbg_progress,
+	.power_save		= book3e_idle,
+};
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
new file mode 100644
index 00000000000..141e7803209
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -0,0 +1,427 @@
+/*
+ * SCOM support for A2 platforms
+ *
+ * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
+ *		       Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+#define SCOM_RAMC		0x2a		/* Ram Command */
+#define SCOM_RAMC_TGT1_EXT	0x80000000
+#define SCOM_RAMC_SRC1_EXT	0x40000000
+#define SCOM_RAMC_SRC2_EXT	0x20000000
+#define SCOM_RAMC_SRC3_EXT	0x10000000
+#define SCOM_RAMC_ENABLE	0x00080000
+#define SCOM_RAMC_THREADSEL	0x00060000
+#define SCOM_RAMC_EXECUTE	0x00010000
+#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
+#define SCOM_RAMC_MSR_PR	0x00004000
+#define SCOM_RAMC_MSR_GS	0x00002000
+#define SCOM_RAMC_FORCE		0x00001000
+#define SCOM_RAMC_FLUSH		0x00000800
+#define SCOM_RAMC_INTERRUPT	0x00000004
+#define SCOM_RAMC_ERROR		0x00000002
+#define SCOM_RAMC_DONE		0x00000001
+#define SCOM_RAMI		0x29		/* Ram Instruction */
+#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
+#define SCOM_RAMIC_INSN		0xffffffff00000000
+#define SCOM_RAMD		0x2d		/* Ram Data */
+#define SCOM_RAMDH		0x2e		/* Ram Data High */
+#define SCOM_RAMDL		0x2f		/* Ram Data Low */
+#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
+#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
+#define SCOM_PCCR0_ENABLE_RAM	0x40000000
+#define SCOM_THRCTL		0x30		/* Thread Control and Status */
+#define SCOM_THRCTL_T0_STOP	0x80000000
+#define SCOM_THRCTL_T1_STOP	0x40000000
+#define SCOM_THRCTL_T2_STOP	0x20000000
+#define SCOM_THRCTL_T3_STOP	0x10000000
+#define SCOM_THRCTL_T0_STEP	0x08000000
+#define SCOM_THRCTL_T1_STEP	0x04000000
+#define SCOM_THRCTL_T2_STEP	0x02000000
+#define SCOM_THRCTL_T3_STEP	0x01000000
+#define SCOM_THRCTL_T0_RUN	0x00800000
+#define SCOM_THRCTL_T1_RUN	0x00400000
+#define SCOM_THRCTL_T2_RUN	0x00200000
+#define SCOM_THRCTL_T3_RUN	0x00100000
+#define SCOM_THRCTL_T0_PM	0x00080000
+#define SCOM_THRCTL_T1_PM	0x00040000
+#define SCOM_THRCTL_T2_PM	0x00020000
+#define SCOM_THRCTL_T3_PM	0x00010000
+#define SCOM_THRCTL_T0_UDE	0x00008000
+#define SCOM_THRCTL_T1_UDE	0x00004000
+#define SCOM_THRCTL_T2_UDE	0x00002000
+#define SCOM_THRCTL_T3_UDE	0x00001000
+#define SCOM_THRCTL_ASYNC_DIS	0x00000800
+#define SCOM_THRCTL_TB_DIS	0x00000400
+#define SCOM_THRCTL_DEC_DIS	0x00000200
+#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
+#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
+
+
+static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
+
+static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
+{
+	scom_map_t scom = per_cpu(scom_ptrs, cpu);
+	int tcpu;
+
+	if (scom_map_ok(scom)) {
+		*first_thread = 0;
+		return scom;
+	}
+
+	*first_thread = 1;
+
+	scom = scom_map_device(np, 0);
+
+	for (tcpu = cpu_first_thread_sibling(cpu);
+	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
+		per_cpu(scom_ptrs, tcpu) = scom;
+
+	/* Hack: for the boot core, this will actually get called on
+	 * the second thread up, not the first so our test above will
+	 * set first_thread incorrectly. */
+	if (cpu_first_thread_sibling(cpu) == 0)
+		*first_thread = 0;
+
+	return scom;
+}
+
+static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
+{
+	u64 cmd, mask, val;
+	int n = 0;
+
+	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
+		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
+	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
+
+	scom_write(scom, SCOM_RAMIC, cmd);
+
+	while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
+		pr_devel("Waiting on RAMC = 0x%llx\n", val);
+		if (++n == 3) {
+			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
+			       insn, thread);
+			return -1;
+		}
+	}
+
+	if (val & SCOM_RAMC_INTERRUPT) {
+		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_INTERRUPT;
+	}
+
+	if (val & SCOM_RAMC_ERROR) {
+		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_ERROR;
+	}
+
+	return 0;
+}
+
+static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
+			  u64 *out_gpr)
+{
+	int rc;
+
+	/* or rN, rN, rN */
+	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
+	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
+	if (rc)
+		return rc;
+
+	*out_gpr = scom_read(scom, SCOM_RAMD);
+
+	return 0;
+}
+
+static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
+{
+	int rc, sprhi, sprlo;
+	u32 insn;
+
+	sprhi = spr >> 5;
+	sprlo = spr & 0x1f;
+	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
+
+	if (spr == 0x0ff0)
+		insn = 0x7c2000a6; /* mfmsr r1 */
+
+	rc = a2_scom_ram(scom, thread, insn, 0xf);
+	if (rc)
+		return rc;
+	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
+}
+
+static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
+			  int alt, u64 val)
+{
+	u32 lis = 0x3c000000 | (gpr << 21);
+	u32 li = 0x38000000 | (gpr << 21);
+	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
+	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
+	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
+	u32 highest = val >> 48;
+	u32 higher = (val >> 32) & 0xffff;
+	u32 high = (val >> 16) & 0xffff;
+	u32 low = val & 0xffff;
+	int lext = alt ? 0x8 : 0x0;
+	int oext = alt ? 0xf : 0x0;
+	int rc = 0;
+
+	if (highest)
+		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
+
+	if (higher) {
+		if (highest)
+			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, li | higher, lext);
+	}
+
+	if (highest || higher)
+		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
+
+	if (high) {
+		if (highest || higher)
+			rc |= a2_scom_ram(scom, thread, oris | high, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, lis | high, lext);
+	}
+
+	if (highest || higher || high)
+		rc |= a2_scom_ram(scom, thread, ori | low, oext);
+	else
+		rc |= a2_scom_ram(scom, thread, li | low, lext);
+
+	return rc;
+}
+
+static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
+{
+	int sprhi = spr >> 5;
+	int sprlo = spr & 0x1f;
+	/* mtspr spr, r1 */
+	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
+
+	if (spr == 0x0ff0)
+		insn = 0x7c200124; /* mtmsr r1 */
+
+	if (a2_scom_setgpr(scom, thread, 1, 1, val))
+		return -1;
+
+	return a2_scom_ram(scom, thread, insn, 0xf);
+}
+
+static int a2_scom_initial_tlb(scom_map_t scom, int thread)
+{
+	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
+	extern u32 a2_tlbinit_after_iprot_flush[];
+	extern u32 a2_tlbinit_after_linear_map[];
+	u32 assoc, entries, i;
+	u64 epn, tlbcfg;
+	u32 *p;
+	int rc;
+
+	/* Invalidate all entries (including iprot) */
+
+	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
+	if (rc)
+		goto scom_fail;
+	entries = tlbcfg & TLBnCFG_N_ENTRY;
+	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
+	epn = 0;
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
+
+	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
+	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
+	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+	for (i = 0; i < entries; i++) {
+
+		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
+
+		/* tlbwe */
+		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
+		if (rc)
+			goto scom_fail;
+
+		/* Next entry is new address? */
+		if((i + 1) % assoc == 0) {
+			epn += (1 << 30);
+			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+		}
+	}
+
+	/* Setup args for linear mapping */
+	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
+	if (rc)
+		goto scom_fail;
+
+	/* Linear mapping */
+	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+	/*
+	 * For the boot thread, between the linear mapping and the debug
+	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
+	 * branches, but the secondary threads don't need to be nearly as smart
+	 * (i.e. we don't need to worry about invalidating the mapping we're
+	 * standing on).
+	 */
+
+	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
+	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+scom_fail:
+	if (rc)
+		pr_err("Setting up initial TLB failed, err %d\n", rc);
+
+	if (rc == -SCOM_RAMC_INTERRUPT) {
+		/* Interrupt, dump some status */
+		int rc[10];
+		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
+		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
+		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
+		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
+		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
+		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
+		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
+		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
+		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
+		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
+		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
+		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
+		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
+		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
+		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
+		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
+		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
+		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
+		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
+		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
+		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
+	}
+
+	return rc;
+}
+
+int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+				  struct device_node *np)
+{
+	u64 init_iar, init_msr, init_ccr2;
+	unsigned long start_here;
+	int rc, core_setup;
+	scom_map_t scom;
+	u64 pccr0;
+
+	scom = get_scom(lcpu, np, &core_setup);
+	if (!scom) {
+		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
+		return -1;
+	}
+
+	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
+
+	pccr0 = scom_read(scom, SCOM_PCCR0);
+	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
+				     SCOM_PCCR0_ENABLE_RAM);
+
+	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
+	 * threads. We also disable asynchronous interrupts while RAMing.
+	 */
+	if (core_setup)
+		scom_write(scom, SCOM_THRCTL_OR,
+			      SCOM_THRCTL_T0_STOP |
+			      SCOM_THRCTL_T1_STOP |
+			      SCOM_THRCTL_T2_STOP |
+			      SCOM_THRCTL_T3_STOP |
+			      SCOM_THRCTL_ASYNC_DIS);
+	else
+		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
+
+	/* Flush its pipeline just in case */
+	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
+		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
+
+	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
+	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
+	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
+
+	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
+	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
+	if (rc) {
+		pr_err("Failed to set MSR ! err %d\n", rc);
+		return rc;
+	}
+
+	/* RAM in an sync/isync for the sake of it */
+	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
+	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
+
+	if (core_setup) {
+		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
+			 lcpu);
+		rc = a2_scom_initial_tlb(scom, thr_idx);
+		if (rc)
+			goto fail;
+	}
+
+	start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
+					: generic_secondary_thread_init);
+	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
+	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
+			     get_hard_smp_processor_id(lcpu));
+	/*
+	 * Tell book3e_secondary_core_init not to set up the TLB, we've
+	 * already done that.
+	 */
+	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
+
+	scom_write(scom, SCOM_RAMC, 0);
+	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
+	scom_write(scom, SCOM_PCCR0, pccr0);
+fail:
+	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
+	if (rc) {
+		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
+		       init_iar, init_msr, init_ccr2);
+	}
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
new file mode 100644
index 00000000000..4052e2259f3
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -0,0 +1,77 @@
+/*
+ *  SCOM backend for WSP
+ *
+ *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+
+static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+	struct resource r;
+	u64 xscom_addr;
+
+	if (!of_get_property(dev, "scom-controller", NULL)) {
+		pr_err("%s: device %s is not a SCOM controller\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+
+	if (of_address_to_resource(dev, 0, &r)) {
+		pr_debug("Failed to find SCOM controller address\n");
+		return 0;
+	}
+
+	/* Transform the SCOM address into an XSCOM offset */
+	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
+
+	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
+}
+
+static void wsp_scom_unmap(scom_map_t map)
+{
+	iounmap((void *)map);
+}
+
+static u64 wsp_scom_read(scom_map_t map, u32 reg)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return in_be64(addr + reg);
+}
+
+static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return out_be64(addr + reg, value);
+}
+
+static const struct scom_controller wsp_scom_controller = {
+	.map	= wsp_scom_map,
+	.unmap	= wsp_scom_unmap,
+	.read	= wsp_scom_read,
+	.write	= wsp_scom_write
+};
+
+void scom_init_wsp(void)
+{
+	scom_init(&wsp_scom_controller);
+}
diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
new file mode 100644
index 00000000000..11ac2f05e01
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/setup.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include "wsp.h"
+
+/*
+ * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
+ * Won't work for nodes that are not a descendant of a wsp node.
+ */
+int wsp_get_chip_id(struct device_node *dn)
+{
+	const u32 *p;
+	int rc;
+
+	/* Start looking at the specified node, not its parent */
+	dn = of_node_get(dn);
+	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
+		dn = of_get_next_parent(dn);
+
+	if (!dn)
+		return -1;
+
+	rc = *p;
+	of_node_put(dn);
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
new file mode 100644
index 00000000000..9d20fa9d371
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/smp.c
@@ -0,0 +1,88 @@
+/*
+ *  SMP Support for A2 platforms
+ *
+ *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/dbell.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+static void __devinit smp_a2_setup_cpu(int cpu)
+{
+	doorbell_setup_this_cpu();
+
+	if (cpu != boot_cpuid)
+		xics_setup_cpu();
+}
+
+int __devinit smp_a2_kick_cpu(int nr)
+{
+	const char *enable_method;
+	struct device_node *np;
+	int thr_idx;
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return -ENOENT;
+
+	np = of_get_cpu_node(nr, &thr_idx);
+	if (!np)
+		return -ENODEV;
+
+	enable_method = of_get_property(np, "enable-method", NULL);
+	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
+
+	if (!enable_method) {
+                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
+		return -ENOENT;
+	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
+		if (a2_scom_startup_cpu(nr, thr_idx, np))
+			return -1;
+	} else {
+		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
+                       nr, enable_method);
+		return -EINVAL;
+	}
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+
+	return 0;
+}
+
+static int __init smp_a2_probe(void)
+{
+	return cpus_weight(cpu_possible_map);
+}
+
+static struct smp_ops_t a2_smp_ops = {
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= doorbell_cause_ipi,
+	.probe		= smp_a2_probe,
+	.kick_cpu	= smp_a2_kick_cpu,
+	.setup_cpu	= smp_a2_setup_cpu,
+};
+
+void __init a2_setup_smp(void)
+{
+	smp_ops = &a2_smp_ops;
+}
diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
new file mode 100644
index 00000000000..7c3e087fd2f
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/wsp.h
@@ -0,0 +1,17 @@
+#ifndef __WSP_H
+#define __WSP_H
+
+#include <asm/wsp.h>
+
+extern void wsp_setup_pci(void);
+extern void scom_init_wsp(void);
+
+extern void a2_setup_smp(void);
+extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+			       struct device_node *np);
+int smp_a2_cpu_bootable(unsigned int nr);
+int __devinit smp_a2_kick_cpu(int nr);
+
+void opb_pic_init(void);
+
+#endif /*  __WSP_H */
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 396582835cb..d775fd148d1 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -12,3 +12,13 @@ config PPC_MSI_BITMAP
 	depends on PCI_MSI
 	default y if MPIC
 	default y if FSL_PCI
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+
+config PPC_SCOM
+	bool
+
+config SCOM_DEBUGFS
+	bool "Expose SCOM controllers via debugfs"
+	depends on PPC_SCOM
+	default n
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 1e0c933ef77..6076e0074a8 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -57,3 +57,9 @@ obj-$(CONFIG_PPC_MPC52xx)	+= mpc5xxx_clocks.o
 ifeq ($(CONFIG_SUSPEND),y)
 obj-$(CONFIG_6xx)		+= 6xx-suspend.o
 endif
+
+obj-$(CONFIG_PPC_SCOM)		+= scom.o
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-$(CONFIG_PPC_XICS)		+= xics/
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1636dd89670..bd0d54060b9 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device)
 			AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
 
 	bank->ph_addr = resource.start;
-	bank->io_addr = (unsigned long) ioremap_flags(
+	bank->io_addr = (unsigned long) ioremap_prot(
 			bank->ph_addr, bank->size, _PAGE_NO_CACHE);
 	if (bank->io_addr == 0) {
 		dev_err(&device->dev, "ioremap() failed\n");
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index e0bc944eb23..350787c83e2 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host;
 
 static void cpm_mask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_end_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec));
 }
@@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void)
 		goto end;
 
 	/* Initialize the CPM interrupt controller. */
-	hwirq = (unsigned int)irq_map[sirq].hwirq;
+	hwirq = (unsigned int)virq_to_hw(sirq);
 	out_be32(&cpic_reg->cpic_cicr,
 	    (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
 		((hwirq/2) << 13) | CICR_HP_MASK);
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index 5495c1be472..bcab50e2a9e 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = {
 static void cpm2_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d)
 static void cpm2_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d)
 static void cpm2_ack(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d)
 static void cpm2_end_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d)
 
 static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 54fb1922fe3..11641589917 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev,
 		goto out_free;
 	}
 
-	cache_sram->base_virt = ioremap_flags(cache_sram->base_phys,
+	cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
 				cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
 	if (!cache_sram->base_virt) {
-		dev_err(&dev->dev, "%s: ioremap_flags failed\n",
+		dev_err(&dev->dev, "%s: ioremap_prot failed\n",
 				dev->dev.of_node->full_name);
 		ret = -ENOMEM;
 		goto out_release;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20..92e78333c47 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
 	list_for_each_entry(entry, &pdev->msi_list, list) {
 		if (entry->irq == NO_IRQ)
 			continue;
-		msi_data = irq_get_handler_data(entry->irq);
+		msi_data = irq_get_chip_data(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		msi_bitmap_free_hwirqs(&msi_data->bitmap,
 				       virq_to_hw(entry->irq), 1);
@@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			rc = -ENOSPC;
 			goto out_free;
 		}
-		irq_set_handler_data(virq, msi_data);
+		/* chip_data is msi_data via host->hostdata in host->map() */
 		irq_set_msi_desc(virq, entry);
 
 		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
@@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	u32 have_shift = 0;
 	struct fsl_msi_cascade_data *cascade_data;
 
-	cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq);
+	cascade_data = irq_get_handler_data(irq);
 	msi_data = cascade_data->msi_data;
 
 	raw_spin_lock(&desc->lock);
@@ -253,7 +253,7 @@ unlock:
 
 static int fsl_of_msi_remove(struct platform_device *ofdev)
 {
-	struct fsl_msi *msi = ofdev->dev.platform_data;
+	struct fsl_msi *msi = platform_get_drvdata(ofdev);
 	int virq, i;
 	struct fsl_msi_cascade_data *cascade_data;
 
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
 	int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 	u32 offset;
 	static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-	if (!dev->dev.of_match)
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	features = dev->dev.of_match->data;
+	features = match->data;
 
 	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
@@ -327,7 +330,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "No memory for MSI structure\n");
 		return -ENOMEM;
 	}
-	dev->dev.platform_data = msi;
+	platform_set_drvdata(dev, msi);
 
 	msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR,
 				      NR_MSI_IRQS, &fsl_msi_host_ops, 0);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 142770cb84b..d18bb27e4df 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* Make sure irq is masked in hardware */
-	i8259_mask_irq(irq_get_irq_data(virq));
-
-	/* remove chip and handler */
-	irq_set_chip_and_handler(virq, NULL, NULL);
-
-	/* Make sure it's completed */
-	synchronize_irq(virq);
-}
-
 static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
@@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 static struct irq_host_ops i8259_host_ops = {
 	.match = i8259_host_match,
 	.map = i8259_host_map,
-	.unmap = i8259_host_unmap,
 	.xlate = i8259_host_xlate,
 };
 
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index fa438be962b..7367d17364c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -18,7 +18,7 @@
 #include <linux/stddef.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/bootmem.h>
 #include <linux/spinlock.h>
@@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq)
 	return primary_ipic;
 }
 
-#define ipic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void ipic_unmask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d)
 static void ipic_mask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d)
 static void ipic_ack_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d)
 static void ipic_mask_irq_and_ack(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d)
 static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	if (flow_type == IRQ_TYPE_NONE)
@@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
 int ipic_set_priority(unsigned int virq, unsigned int priority)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	if (priority > 7)
@@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority)
 void ipic_set_highest_priority(unsigned int virq)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	temp = ipic_read(ipic->regs, IPIC_SICFR);
@@ -902,7 +900,7 @@ static struct {
 	u32 sercr;
 } ipic_saved_state;
 
-static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
+static int ipic_suspend(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -933,7 +931,7 @@ static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
 	return 0;
 }
 
-static int ipic_resume(struct sys_device *sdev)
+static void ipic_resume(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -949,44 +947,26 @@ static int ipic_resume(struct sys_device *sdev)
 	ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr);
 	ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr);
 	ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr);
-
-	return 0;
 }
 #else
 #define ipic_suspend NULL
 #define ipic_resume NULL
 #endif
 
-static struct sysdev_class ipic_sysclass = {
-	.name = "ipic",
+static struct syscore_ops ipic_syscore_ops = {
 	.suspend = ipic_suspend,
 	.resume = ipic_resume,
 };
 
-static struct sys_device device_ipic = {
-	.id		= 0,
-	.cls		= &ipic_sysclass,
-};
-
-static int __init init_ipic_sysfs(void)
+static int __init init_ipic_syscore(void)
 {
-	int rc;
-
 	if (!primary_ipic || !primary_ipic->regs)
 		return -ENODEV;
-	printk(KERN_DEBUG "Registering ipic with sysfs...\n");
 
-	rc = sysdev_class_register(&ipic_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&device_ipic);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys device\n");
-		return -ENODEV;
-	}
+	printk(KERN_DEBUG "Registering ipic system core operations\n");
+	register_syscore_ops(&ipic_syscore_ops);
+
 	return 0;
 }
 
-subsys_initcall(init_ipic_sysfs);
+subsys_initcall(init_ipic_syscore);
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 20732420906..ddc877a3a23 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -115,6 +115,8 @@ int __init mmio_nvram_init(void)
 	int ret;
 
 	nvram_node = of_find_node_by_type(NULL, "nvram");
+	if (!nvram_node)
+		nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
 	if (!nvram_node) {
 		printk(KERN_WARNING "nvram: no node found in device-tree\n");
 		return -ENODEV;
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index a88800ff4d0..20924f2246f 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs);
 static void mpc8xx_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d)
 static void mpc8xx_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d)
 static void mpc8xx_ack(struct irq_data *d)
 {
 	int	bit;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	out_be32(&siu_reg->sc_sipend, 1 << (31-bit));
@@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d)
 static void mpc8xx_end_irq(struct irq_data *d)
 {
 	int bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d)
 static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	if (flow_type & IRQ_TYPE_EDGE_FALLING) {
-		irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq;
+		irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d);
 		unsigned int siel = in_be32(&siu_reg->sc_siel);
 
 		/* only external IRQ senses are programmable */
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 0892a2841c2..fb4963abdf5 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d)
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
 
-	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 }
 
 static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
@@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
 	case IRQ_TYPE_EDGE_FALLING:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		setbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
 	case IRQ_TYPE_EDGE_BOTH:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		clrbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
@@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
-	unsigned long gpio = virq_to_hw(d->irq);
+	unsigned long gpio = irqd_to_hwirq(d);
 	void __iomem *reg;
 	unsigned int shift;
 	unsigned long flags;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f91c065bed5..3a8de5bb628 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -6,6 +6,7 @@
  *  with various broken implementations of this HW.
  *
  *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *  Copyright 2010-2011 Freescale Semiconductor, Inc.
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of this archive
@@ -27,6 +28,7 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/ptrace.h>
 #include <asm/signal.h>
@@ -218,6 +220,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu
 	_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
 }
 
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	_mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
 static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
 {
 	unsigned int cpu = mpic_processor_id(mpic);
@@ -268,6 +292,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
 #define mpic_write(b,r,v)	_mpic_write(mpic->reg_type,&(b),(r),(v))
 #define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
 #define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i)		_mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v)	_mpic_tm_write(mpic,(i),(v))
 #define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
 #define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
 #define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
@@ -607,8 +633,6 @@ static int irq_choose_cpu(const struct cpumask *mask)
 }
 #endif
 
-#define mpic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq)
 {
@@ -621,11 +645,18 @@ static struct mpic *mpic_find(unsigned int irq)
 /* Determine if the linux irq is an IPI */
 static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
 {
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 
 	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
 }
 
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq)
+{
+	unsigned int src = virq_to_hw(irq);
+
+	return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
 
 /* Convert a cpu mask from logical to physical cpu numbers. */
 static inline u32 mpic_physmask(u32 cpumask)
@@ -633,7 +664,7 @@ static inline u32 mpic_physmask(u32 cpumask)
 	int i;
 	u32 mask = 0;
 
-	for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
+	for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
 		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
 	return mask;
 }
@@ -674,7 +705,7 @@ void mpic_unmask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
 
@@ -695,7 +726,7 @@ void mpic_mask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
 
@@ -733,7 +764,7 @@ void mpic_end_irq(struct irq_data *d)
 static void mpic_unmask_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 
@@ -744,7 +775,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d)
 static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 	mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
@@ -755,7 +786,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 static void mpic_shutdown_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_shutdown_ht_interrupt(mpic, src);
 	mpic_mask_irq(d);
@@ -764,7 +795,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d)
 static void mpic_end_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 #ifdef DEBUG_IRQ
 	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
@@ -785,7 +816,7 @@ static void mpic_end_ht_irq(struct irq_data *d)
 static void mpic_unmask_ipi(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_ipi(d);
-	unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0];
+	unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
 
 	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
 	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
@@ -812,27 +843,42 @@ static void mpic_end_ipi(struct irq_data *d)
 
 #endif /* CONFIG_SMP */
 
+static void mpic_unmask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src);
+	mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
 int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
 		      bool force)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
 		int cpuid = irq_choose_cpu(cpumask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
 	} else {
-		cpumask_var_t tmp;
-
-		alloc_cpumask_var(&tmp, GFP_KERNEL);
+		u32 mask = cpumask_bits(cpumask)[0];
 
-		cpumask_and(tmp, cpumask, cpu_online_mask);
+		mask &= cpumask_bits(cpu_online_mask)[0];
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
-			       mpic_physmask(cpumask_bits(tmp)[0]));
-
-		free_cpumask_var(tmp);
+			       mpic_physmask(mask));
 	}
 
 	return 0;
@@ -862,7 +908,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
 int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vecpri, vold, vnew;
 
 	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
@@ -898,7 +944,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 void mpic_set_vector(unsigned int virq, unsigned int vector)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	unsigned int vecpri;
 
 	DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
@@ -916,7 +962,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector)
 void mpic_set_destination(unsigned int virq, unsigned int cpuid)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 
 	DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
 	    mpic, virq, src, cpuid);
@@ -942,6 +988,12 @@ static struct irq_chip mpic_ipi_chip = {
 };
 #endif /* CONFIG_SMP */
 
+static struct irq_chip mpic_tm_chip = {
+	.irq_mask	= mpic_mask_tm,
+	.irq_unmask	= mpic_unmask_tm,
+	.irq_eoi	= mpic_end_irq,
+};
+
 #ifdef CONFIG_MPIC_U3_HT_IRQS
 static struct irq_chip mpic_irq_ht_chip = {
 	.irq_startup	= mpic_startup_ht_irq,
@@ -985,6 +1037,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
 	}
 #endif /* CONFIG_SMP */
 
+	if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+		WARN_ON(!(mpic->flags & MPIC_PRIMARY));
+
+		DBG("mpic: mapping as timer\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_tm,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
 	if (hw >= mpic->irq_count)
 		return -EINVAL;
 
@@ -1025,6 +1087,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
+	struct mpic *mpic = h->host_data;
 	static unsigned char map_mpic_senses[4] = {
 		IRQ_TYPE_EDGE_RISING,
 		IRQ_TYPE_LEVEL_LOW,
@@ -1033,7 +1096,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 	};
 
 	*out_hwirq = intspec[0];
-	if (intsize > 1) {
+	if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+		/*
+		 * Freescale MPIC with extended intspec:
+		 * First two cells are as usual.  Third specifies
+		 * an "interrupt type".  Fourth is type-specific data.
+		 *
+		 * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+		 */
+		switch (intspec[2]) {
+		case 0:
+		case 1: /* no EISR/EIMR support for now, treat as shared IRQ */
+			break;
+		case 2:
+			if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->ipi_vecs[intspec[0]];
+			break;
+		case 3:
+			if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->timer_vecs[intspec[0]];
+			break;
+		default:
+			pr_debug("%s: unknown irq type %u\n",
+				 __func__, intspec[2]);
+			return -EINVAL;
+		}
+
+		*out_flags = map_mpic_senses[intspec[1] & 3];
+	} else if (intsize > 1) {
 		u32 mask = 0x3;
 
 		/* Apple invented a new race of encoding on machines with
@@ -1109,6 +1203,9 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	mpic->hc_ipi.name = name;
 #endif /* CONFIG_SMP */
 
+	mpic->hc_tm = mpic_tm_chip;
+	mpic->hc_tm.name = name;
+
 	mpic->flags = flags;
 	mpic->isu_size = isu_size;
 	mpic->irq_count = irq_count;
@@ -1119,10 +1216,14 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	else
 		intvec_top = 255;
 
-	mpic->timer_vecs[0] = intvec_top - 8;
-	mpic->timer_vecs[1] = intvec_top - 7;
-	mpic->timer_vecs[2] = intvec_top - 6;
-	mpic->timer_vecs[3] = intvec_top - 5;
+	mpic->timer_vecs[0] = intvec_top - 12;
+	mpic->timer_vecs[1] = intvec_top - 11;
+	mpic->timer_vecs[2] = intvec_top - 10;
+	mpic->timer_vecs[3] = intvec_top - 9;
+	mpic->timer_vecs[4] = intvec_top - 8;
+	mpic->timer_vecs[5] = intvec_top - 7;
+	mpic->timer_vecs[6] = intvec_top - 6;
+	mpic->timer_vecs[7] = intvec_top - 5;
 	mpic->ipi_vecs[0]   = intvec_top - 4;
 	mpic->ipi_vecs[1]   = intvec_top - 3;
 	mpic->ipi_vecs[2]   = intvec_top - 2;
@@ -1132,6 +1233,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	/* Check for "big-endian" in device-tree */
 	if (node && of_get_property(node, "big-endian", NULL) != NULL)
 		mpic->flags |= MPIC_BIG_ENDIAN;
+	if (node && of_device_is_compatible(node, "fsl,mpic"))
+		mpic->flags |= MPIC_FSL;
 
 	/* Look for protected sources */
 	if (node) {
@@ -1323,15 +1426,17 @@ void __init mpic_init(struct mpic *mpic)
 	/* Set current processor priority to max */
 	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
 
-	/* Initialize timers: just disable them all */
+	/* Initialize timers to our reserved vectors and mask them for now */
 	for (i = 0; i < 4; i++) {
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
-			   MPIC_INFO(TIMER_DESTINATION), 0);
+			   MPIC_INFO(TIMER_DESTINATION),
+			   1 << hard_smp_processor_id());
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
 			   MPIC_INFO(TIMER_VECTOR_PRI),
 			   MPIC_VECPRI_MASK |
+			   (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
 			   (mpic->timer_vecs[0] + i));
 	}
 
@@ -1427,7 +1532,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable)
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	struct mpic *mpic = mpic_find(irq);
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 	unsigned long flags;
 	u32 reg;
 
@@ -1440,6 +1545,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 			~MPIC_VECPRI_PRIORITY_MASK;
 		mpic_ipi_write(src - mpic->ipi_vecs[0],
 			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else if (mpic_is_tm(mpic, irq)) {
+		reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_tm_write(src - mpic->timer_vecs[0],
+			      reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
 	} else {
 		reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
 			& ~MPIC_VECPRI_PRIORITY_MASK;
@@ -1619,46 +1729,28 @@ void mpic_request_ipis(void)
 	}
 }
 
-static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask)
+void smp_mpic_message_pass(int cpu, int msg)
 {
 	struct mpic *mpic = mpic_primary;
+	u32 physmask;
 
 	BUG_ON(mpic == NULL);
 
-#ifdef DEBUG_IPI
-	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
-#endif
-
-	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
-		       ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE),
-		       mpic_physmask(cpumask_bits(cpu_mask)[0]));
-}
-
-void smp_mpic_message_pass(int target, int msg)
-{
-	cpumask_var_t tmp;
-
 	/* make sure we're sending something that translates to an IPI */
 	if ((unsigned int)msg > 3) {
 		printk("SMP %d: smp_message_pass: unknown msg %d\n",
 		       smp_processor_id(), msg);
 		return;
 	}
-	switch (target) {
-	case MSG_ALL:
-		mpic_send_ipi(msg, cpu_online_mask);
-		break;
-	case MSG_ALL_BUT_SELF:
-		alloc_cpumask_var(&tmp, GFP_NOWAIT);
-		cpumask_andnot(tmp, cpu_online_mask,
-			       cpumask_of(smp_processor_id()));
-		mpic_send_ipi(msg, tmp);
-		free_cpumask_var(tmp);
-		break;
-	default:
-		mpic_send_ipi(msg, cpumask_of(target));
-		break;
-	}
+
+#ifdef DEBUG_IPI
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+	physmask = 1 << get_hard_smp_processor_id(cpu);
+
+	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
 }
 
 int __init smp_mpic_probe(void)
@@ -1702,9 +1794,8 @@ void mpic_reset_core(int cpu)
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PM
-static int mpic_suspend(struct sys_device *dev, pm_message_t state)
+static void mpic_suspend_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1713,13 +1804,22 @@ static int mpic_suspend(struct sys_device *dev, pm_message_t state)
 		mpic->save_data[i].dest =
 			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION));
 	}
+}
+
+static int mpic_suspend(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_suspend_one(mpic);
+		mpic = mpic->next;
+	}
 
 	return 0;
 }
 
-static int mpic_resume(struct sys_device *dev)
+static void mpic_resume_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1746,33 +1846,28 @@ static int mpic_resume(struct sys_device *dev)
 	}
 #endif
 	} /* end for loop */
+}
 
-	return 0;
+static void mpic_resume(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_resume_one(mpic);
+		mpic = mpic->next;
+	}
 }
-#endif
 
-static struct sysdev_class mpic_sysclass = {
-#ifdef CONFIG_PM
+static struct syscore_ops mpic_syscore_ops = {
 	.resume = mpic_resume,
 	.suspend = mpic_suspend,
-#endif
-	.name = "mpic",
 };
 
 static int mpic_init_sys(void)
 {
-	struct mpic *mpic = mpics;
-	int error, id = 0;
-
-	error = sysdev_class_register(&mpic_sysclass);
-
-	while (mpic && !error) {
-		mpic->sysdev.cls = &mpic_sysclass;
-		mpic->sysdev.id = id++;
-		error = sysdev_register(&mpic->sysdev);
-		mpic = mpic->next;
-	}
-	return error;
+	register_syscore_ops(&mpic_syscore_ops);
+	return 0;
 }
 
 device_initcall(mpic_init_sys);
+#endif
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index e9c633c7c08..14d130268e7 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host;
 
 static void mv64x60_mask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d)
 
 static void mv64x60_unmask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = {
 
 static void mv64x60_mask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d)
 
 static void mv64x60_unmask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = {
 
 static void mv64x60_mask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d)
 
 static void mv64x60_mask_ack_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d)
 
 static void mv64x60_unmask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 832d6924ad1..b2acda07220 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d)
 	return irq_data_get_irq_chip_data(d);
 }
 
-#define virq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void qe_ic_unmask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d)
 static void qe_ic_mask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
new file mode 100644
index 00000000000..b2593ce30c9
--- /dev/null
+++ b/arch/powerpc/sysdev/scom.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/scom.h>
+
+const struct scom_controller *scom_controller;
+EXPORT_SYMBOL_GPL(scom_controller);
+
+struct device_node *scom_find_parent(struct device_node *node)
+{
+	struct device_node *par, *tmp;
+	const u32 *p;
+
+	for (par = of_node_get(node); par;) {
+		if (of_get_property(par, "scom-controller", NULL))
+			break;
+		p = of_get_property(par, "scom-parent", NULL);
+		tmp = par;
+		if (p == NULL)
+			par = of_get_parent(par);
+		else
+			par = of_find_node_by_phandle(*p);
+		of_node_put(tmp);
+	}
+	return par;
+}
+EXPORT_SYMBOL_GPL(scom_find_parent);
+
+scom_map_t scom_map_device(struct device_node *dev, int index)
+{
+	struct device_node *parent;
+	unsigned int cells, size;
+	const u32 *prop;
+	u64 reg, cnt;
+	scom_map_t ret;
+
+	parent = scom_find_parent(dev);
+
+	if (parent == NULL)
+		return 0;
+
+	prop = of_get_property(parent, "#scom-cells", NULL);
+	cells = prop ? *prop : 1;
+
+	prop = of_get_property(dev, "scom-reg", &size);
+	if (!prop)
+		return 0;
+	size >>= 2;
+
+	if (index >= (size / (2*cells)))
+		return 0;
+
+	reg = of_read_number(&prop[index * cells * 2], cells);
+	cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
+
+	ret = scom_map(parent, reg, cnt);
+	of_node_put(parent);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scom_map_device);
+
+#ifdef CONFIG_SCOM_DEBUGFS
+struct scom_debug_entry {
+	struct device_node *dn;
+	unsigned long addr;
+	scom_map_t map;
+	spinlock_t lock;
+	char name[8];
+	struct debugfs_blob_wrapper blob;
+};
+
+static int scom_addr_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	ent->addr = 0;
+	scom_unmap(ent->map);
+
+	ent->map = scom_map(ent->dn, val, 1);
+	if (scom_map_ok(ent->map))
+		ent->addr = val;
+	else
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scom_addr_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+	*val = ent->addr;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set,
+			"0x%llx\n");
+
+static int scom_val_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	scom_write(ent->map, 0, val);
+
+	return 0;
+}
+
+static int scom_val_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	*val = scom_read(ent->map, 0);
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set,
+			"0x%llx\n");
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+			       int i)
+{
+	struct scom_debug_entry *ent;
+	struct dentry *dir;
+
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+
+	ent->dn = of_node_get(dn);
+	ent->map = SCOM_MAP_INVALID;
+	spin_lock_init(&ent->lock);
+	snprintf(ent->name, 8, "scom%d", i);
+	ent->blob.data = dn->full_name;
+	ent->blob.size = strlen(dn->full_name);
+
+	dir = debugfs_create_dir(ent->name, root);
+	if (!dir) {
+		of_node_put(dn);
+		kfree(ent);
+		return -1;
+	}
+
+	debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops);
+	debugfs_create_file("value", 0600, dir, ent, &scom_val_fops);
+	debugfs_create_blob("path", 0400, dir, &ent->blob);
+
+	return 0;
+}
+
+static int scom_debug_init(void)
+{
+	struct device_node *dn;
+	struct dentry *root;
+	int i, rc;
+
+	root = debugfs_create_dir("scom", powerpc_debugfs_root);
+	if (!root)
+		return -1;
+
+	i = rc = 0;
+	for_each_node_with_property(dn, "scom-controller")
+		rc |= scom_debug_init_one(root, dn, i++);
+
+	return rc;
+}
+device_initcall(scom_debug_init);
+#endif /* CONFIG_SCOM_DEBUGFS */
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 5d913851662..984cd202915 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -41,8 +41,6 @@
 #define UIC_VR		0x7
 #define UIC_VCR		0x8
 
-#define uic_irq_to_hw(virq)	(irq_map[virq].hwirq)
-
 struct uic *primary_uic;
 
 struct uic {
@@ -58,7 +56,7 @@ struct uic {
 static void uic_unmask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d)
 static void uic_mask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er;
 
@@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d)
 static void uic_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 
 	spin_lock_irqsave(&uic->lock, flags);
@@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d)
 static void uic_mask_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
 static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	int trigger, polarity;
 	u32 tr, pr, mask;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 00000000000..0031eda320c
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,13 @@
+config PPC_XICS
+       def_bool n
+       select PPC_SMP_MUXED_IPI
+
+config PPC_ICP_NATIVE
+       def_bool n
+
+config PPC_ICP_HV
+       def_bool n
+
+config PPC_ICS_RTAS
+       def_bool n
+
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 00000000000..b75a6059337
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,6 @@
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 00000000000..9518d367a64
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc != H_SUCCESS)
+		panic(" bad return code xirr - rc = %lx\n", rc);
+	return (unsigned int)retbuf[0];
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code EOI - rc = %ld, value=%x\n", rc, value);
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code cppr - rc = %lx\n", rc);
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu),
+				     value);
+	if (rc != H_SUCCESS)
+		panic("bad return code qirr - rc = %lx\n", rc);
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_hv_cause_ipi(int cpu, unsigned long data)
+{
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.cause_ipi	= icp_hv_cause_ipi,
+#endif
+};
+
+int icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 00000000000..1f15ad43614
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+#ifdef CONFIG_SMP
+
+static void icp_native_cause_ipi(int cpu, unsigned long data)
+{
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_native_set_qirr(cpu, 0xff);
+
+	return smp_ipi_demux();
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warning("icp_native: Could not reserve ICP MMIO"
+			   " for CPU %d, interrupt server #0x%x\n",
+			   cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	if (!icp_native_regs[cpu]) {
+		pr_warning("icp_native: Failed ioremap for CPU %d, "
+			   "interrupt server #0x%x, addr %#lx\n",
+			   cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const u32 *ireg;
+	int i;
+	int reg_tuple_size;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	ireg = of_get_property(np, "reg", &ilen);
+	if (!ireg) {
+		pr_err("icp_native: Can't find interrupt reg property");
+		return -1;
+	}
+
+	reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4;
+	if (((ilen % reg_tuple_size) != 0)
+	    || (num_servers && (num_servers != (ilen / reg_tuple_size)))) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       ilen / reg_tuple_size, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < (ilen / reg_tuple_size); i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.cause_ipi	= icp_native_cause_ipi,
+#endif
+};
+
+int icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 00000000000..c782f85cf7e
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,240 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq);
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.map		= ics_rtas_map,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
+};
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, d->affinity, 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (d->msi_desc)
+		unmask_msi_irq(d);
+#endif
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		printk(KERN_WARNING
+			"%s: No online cpus in the mask %s for irq %d\n",
+			__func__, cpulist, d->irq);
+		return -1;
+	}
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity
+};
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq)
+{
+	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
+	irq_set_chip_data(virq, &ics_rtas);
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_token("ibm,get-xive");
+	ibm_set_xive = rtas_token("ibm,set-xive");
+	ibm_int_on  = rtas_token("ibm,int-on");
+	ibm_int_off = rtas_token("ibm,int-off");
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 00000000000..445c5a01b76
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_host *xics_host;
+
+static LIST_HEAD(ics_list);
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const u32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (ireg[j] == hcpuid) {
+			xics_default_distrib_server = ireg[j+1];
+			break;
+		}
+	}
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	struct ics *ics;
+
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
+
+	list_for_each_entry(ics, &ics_list, link)
+		ics->mask_unknown(ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+static void xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(ipi == NO_IRQ);
+
+	/*
+	 * IPIs are marked IRQF_DISABLED as they must run with irqs
+	 * disabled, and PERCPU.  The handler was set in map.
+	 */
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL));
+}
+
+int __init xics_smp_probe(void)
+{
+	/* Setup cause_ipi callback  based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
+
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	return cpumask_weight(cpu_possible_mask);
+}
+
+#endif /* CONFIG_SMP */
+
+void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	/* Allow IPIs again... */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+	for_each_irq(virq) {
+		struct irq_desc *desc;
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct ics *ics;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NUM_ISA_INTERRUPTS)
+			continue;
+		if (!virq_is_host(virq, xics_host))
+			continue;
+		irq = (unsigned int)virq_to_hw(virq);
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		desc = irq_to_desc(virq);
+		/* We only need to migrate enabled IRQS */
+		if (!desc || !desc->action)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = -1;
+		ics = irq_get_chip_data(virq);
+		if (ics)
+			server = ics->get_server(ics, irq);
+		if (server < 0) {
+			printk(KERN_ERR "%s: Can't find server for irq %d\n",
+			       __func__, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warning("IRQ %u affinity broken off cpu %u\n",
+			       virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_host *h, struct device_node *node)
+{
+	struct ics *ics;
+
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->host_match(ics, node))
+			return 1;
+
+	return 0;
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ics *ics;
+
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	/* Insert the interrupt mapping into the radix tree for fast lookup */
+	irq_radix_revmap_insert(xics_host, virq, hw);
+
+	/* They aren't all level sensitive but we just don't really know */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hw == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_percpu_irq);
+		return 0;
+	}
+
+	/* Let the ICS setup the chip data */
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->map(ics, virq) == 0)
+			return 0;
+
+	return -EINVAL;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Current xics implementation translates everything
+	 * to level. It is not technically right for MSIs but this
+	 * is irrelevant at this point. We might get smarter in the future
+	 */
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static struct irq_host_ops xics_host_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
+				   XICS_IRQ_SPURIOUS);
+	BUG_ON(xics_host == NULL);
+	irq_set_default_host(xics_host);
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	list_add(&ics->link, &ics_list);
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const u32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (!isize)
+		return;
+	xics_interrupt_server_size = *isize;
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+#ifdef CONFIG_PPC_ICP_HV
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+#endif
+#ifdef CONFIG_PPC_ICP_NATIVE
+	if (rc < 0)
+		rc = icp_native_init();
+#endif
+	if (rc < 0) {
+		pr_warning("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+#ifdef CONFIG_PPC_ICS_RTAS
+	rc = ics_rtas_init();
+#endif
+	if (rc < 0)
+		pr_warning("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	xics_init_host();
+	xics_setup_cpu();
+}
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 0a13fc19e28..6183799754a 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = {
  */
 static void xilinx_intc_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("mask: %d\n", irq);
 	out_be32(regs + XINTC_CIE, 1 << irq);
@@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type)
  */
 static void xilinx_intc_level_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = {
  */
 static void xilinx_intc_edge_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void *regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d)
 
 static void xilinx_intc_edge_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("ack: %d\n", irq);
 	out_be32(regs + XINTC_IAR, 1 << irq);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33794c1d92c..42541bbcc7f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -334,7 +334,7 @@ static void release_output_lock(void)
 
 int cpus_are_in_xmon(void)
 {
-	return !cpus_empty(cpus_in_xmon);
+	return !cpumask_empty(&cpus_in_xmon);
 }
 #endif
 
@@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
 #ifdef CONFIG_SMP
 	cpu = smp_processor_id();
-	if (cpu_isset(cpu, cpus_in_xmon)) {
+	if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		get_output_lock();
 		excprint(regs);
 		printf("cpu 0x%x: Exception %lx %s in xmon, "
@@ -396,10 +396,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	}
 
 	xmon_fault_jmp[cpu] = recurse_jmp;
-	cpu_set(cpu, cpus_in_xmon);
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
 
 	bp = NULL;
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
 		bp = at_breakpoint(regs->nip);
 	if (bp || unrecoverable_excp(regs))
 		fromipi = 0;
@@ -437,10 +437,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		xmon_owner = cpu;
 		mb();
 		if (ncpus > 1) {
-			smp_send_debugger_break(MSG_ALL_BUT_SELF);
+			smp_send_debugger_break();
 			/* wait for other cpus to come in */
 			for (timeout = 100000000; timeout != 0; --timeout) {
-				if (cpus_weight(cpus_in_xmon) >= ncpus)
+				if (cpumask_weight(&cpus_in_xmon) >= ncpus)
 					break;
 				barrier();
 			}
@@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
  leave:
-	cpu_clear(cpu, cpus_in_xmon);
+	cpumask_clear_cpu(cpu, &cpus_in_xmon);
 	xmon_fault_jmp[cpu] = NULL;
 #else
 	/* UP is simple... */
@@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
 #else
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = at_breakpoint(regs->nip);
 		if (bp != NULL) {
 			int stepped = emulate_step(regs, bp->instr[0]);
@@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs)
 	struct bpt *bp;
 	unsigned long offset;
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 
 	/* Are we at the trap at bp->instr[1] for some bp? */
@@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_dabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (dabr.enabled == 0)
 		return 0;
@@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs)
 
 static int xmon_iabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (iabr == NULL)
 		return 0;
@@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs)
 static int xmon_ipi(struct pt_regs *regs)
 {
 #ifdef CONFIG_SMP
-	if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon))
+	if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon))
 		xmon_core(regs, 1);
 #endif
 	return 0;
@@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
 	if (in_xmon && catch_memory_errors)
 		handle_fault(regs);	/* doesn't return */
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = in_breakpoint_table(regs->nip, &offset);
 		if (bp != NULL) {
 			regs->nip = bp->address + offset;
@@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs)
 	int stepped;
 
 	/* check we are in 64-bit kernel mode, translation enabled */
-	if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) {
+	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
 		if (mread(regs->nip, &instr, 4) == 4) {
 			stepped = emulate_step(regs, instr);
 			if (stepped < 0) {
@@ -976,7 +976,7 @@ static int cpu_cmd(void)
 		printf("cpus stopped:");
 		count = 0;
 		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-			if (cpu_isset(cpu, cpus_in_xmon)) {
+			if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 				if (count == 0)
 					printf(" %x", cpu);
 				++count;
@@ -992,7 +992,7 @@ static int cpu_cmd(void)
 		return 0;
 	}
 	/* try to switch to cpu specified */
-	if (!cpu_isset(cpu, cpus_in_xmon)) {
+	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		printf("cpu 0x%x isn't in xmon\n", cpu);
 		return 0;
 	}
@@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp)
 #endif
 	printf("pc  = ");
 	xmon_print_symbol(fp->nip, " ", "\n");
+	if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) {
+		printf("cfar= ");
+		xmon_print_symbol(fp->orig_gpr3, " ", "\n");
+	}
 	printf("lr  = ");
 	xmon_print_symbol(fp->link, " ", "\n");
 	printf("msr = "REG"   cr  = %.8lx\n", fp->msr, fp->ccr);
@@ -2663,7 +2667,7 @@ static void dump_stab(void)
 
 void dump_segments(void)
 {
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		dump_slb();
 	else
 		dump_stab();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2508a6f3158..4a7f14079e0 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -88,6 +88,7 @@ config S390
 	select HAVE_KERNEL_XZ
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
+	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 1cf81d77c5a..7f0b7cda625 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 58f46734465..a9ce135893f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,7 +31,8 @@
 #define AES_KEYLEN_192		2
 #define AES_KEYLEN_256		4
 
-static char keylen_flag = 0;
+static u8 *ctrblk;
+static char keylen_flag;
 
 struct s390_aes_ctx {
 	u8 iv[AES_BLOCK_SIZE];
@@ -45,6 +46,24 @@ struct s390_aes_ctx {
 	} fallback;
 };
 
+struct pcc_param {
+	u8 key[32];
+	u8 tweak[16];
+	u8 block[16];
+	u8 bit[16];
+	u8 xts[16];
+};
+
+struct s390_xts_ctx {
+	u8 key[32];
+	u8 xts_param[16];
+	struct pcc_param pcc;
+	long enc;
+	long dec;
+	int key_len;
+	struct crypto_blkcipher *fallback;
+};
+
 /*
  * Check if the key_len is supported by the HW.
  * Returns 0 if it is, a positive number if it is not and software fallback is
@@ -504,15 +523,337 @@ static struct crypto_alg cbc_aes_alg = {
 	}
 };
 
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+				   unsigned int len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	unsigned int ret;
+
+	xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	switch (key_len) {
+	case 32:
+		xts_ctx->enc = KM_XTS_128_ENCRYPT;
+		xts_ctx->dec = KM_XTS_128_DECRYPT;
+		memcpy(xts_ctx->key + 16, in_key, 16);
+		memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16);
+		break;
+	case 48:
+		xts_ctx->enc = 0;
+		xts_ctx->dec = 0;
+		xts_fallback_setkey(tfm, in_key, key_len);
+		break;
+	case 64:
+		xts_ctx->enc = KM_XTS_256_ENCRYPT;
+		xts_ctx->dec = KM_XTS_256_DECRYPT;
+		memcpy(xts_ctx->key, in_key, 32);
+		memcpy(xts_ctx->pcc.key, in_key + 32, 32);
+		break;
+	default:
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	xts_ctx->key_len = key_len;
+	return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_xts_ctx *xts_ctx,
+			 struct blkcipher_walk *walk)
+{
+	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+	unsigned int n;
+	u8 *in, *out;
+	void *param;
+
+	if (!nbytes)
+		goto out;
+
+	memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block));
+	memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit));
+	memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts));
+	memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak));
+	param = xts_ctx->pcc.key + offset;
+	ret = crypt_s390_pcc(func, param);
+	BUG_ON(ret < 0);
+
+	memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16);
+	param = xts_ctx->key + offset;
+	do {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON(ret < 0 || ret != n);
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+out:
+	return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(xts_ctx->fallback)) {
+		pr_err("Allocating XTS fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(xts_ctx->fallback);
+	}
+	return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_blkcipher(xts_ctx->fallback);
+	xts_ctx->fallback = NULL;
+}
+
+static struct crypto_alg xts_aes_alg = {
+	.cra_name		=	"xts(aes)",
+	.cra_driver_name	=	"xts-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(xts_aes_alg.cra_list),
+	.cra_init		=	xts_fallback_init,
+	.cra_exit		=	xts_fallback_exit,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	xts_aes_set_key,
+			.encrypt		=	xts_aes_encrypt,
+			.decrypt		=	xts_aes_decrypt,
+		}
+	}
+};
+
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMCTR_AES_128_ENCRYPT;
+		sctx->dec = KMCTR_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMCTR_AES_192_ENCRYPT;
+		sctx->dec = KMCTR_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMCTR_AES_256_ENCRYPT;
+		sctx->dec = KMCTR_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	unsigned int i, n, nbytes;
+	u8 buf[AES_BLOCK_SIZE];
+	u8 *out, *in;
+
+	if (!walk->nbytes)
+		return ret;
+
+	memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= AES_BLOCK_SIZE) {
+			/* only use complete blocks, max. PAGE_SIZE */
+			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+						 nbytes & ~(AES_BLOCK_SIZE - 1);
+			for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+				memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+				crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
+			}
+			ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
+			BUG_ON(ret < 0 || ret != n);
+			if (n > AES_BLOCK_SIZE)
+				memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+			crypto_inc(ctrblk, AES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+	/*
+	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
+	 */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
+				       AES_BLOCK_SIZE, ctrblk);
+		BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE);
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrblk, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+	}
+	memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+	return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+	.cra_name		=	"ctr(aes)",
+	.cra_driver_name	=	"ctr-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ctr_aes_set_key,
+			.encrypt		=	ctr_aes_encrypt,
+			.decrypt		=	ctr_aes_decrypt,
+		}
+	}
+};
+
 static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (crypt_s390_func_available(KM_AES_128_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_128;
-	if (crypt_s390_func_available(KM_AES_192_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_192;
-	if (crypt_s390_func_available(KM_AES_256_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_256;
 
 	if (!keylen_flag)
@@ -535,9 +876,40 @@ static int __init aes_s390_init(void)
 	if (ret)
 		goto cbc_aes_err;
 
+	if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KM_XTS_256_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&xts_aes_alg);
+		if (ret)
+			goto xts_aes_err;
+	}
+
+	if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_aes_err;
+		}
+		ret = crypto_register_alg(&ctr_aes_alg);
+		if (ret) {
+			free_page((unsigned long) ctrblk);
+			goto ctr_aes_err;
+		}
+	}
+
 out:
 	return ret;
 
+ctr_aes_err:
+	crypto_unregister_alg(&xts_aes_alg);
+xts_aes_err:
+	crypto_unregister_alg(&cbc_aes_alg);
 cbc_aes_err:
 	crypto_unregister_alg(&ecb_aes_alg);
 ecb_aes_err:
@@ -548,6 +920,9 @@ aes_err:
 
 static void __exit aes_s390_fini(void)
 {
+	crypto_unregister_alg(&ctr_aes_alg);
+	free_page((unsigned long) ctrblk);
+	crypto_unregister_alg(&xts_aes_alg);
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 7ee9a1b4ad9..49676771bd6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -24,13 +24,18 @@
 #define CRYPT_S390_PRIORITY 300
 #define CRYPT_S390_COMPOSITE_PRIORITY 400
 
+#define CRYPT_S390_MSA	0x1
+#define CRYPT_S390_MSA3	0x2
+#define CRYPT_S390_MSA4	0x4
+
 /* s390 cryptographic operations */
 enum crypt_s390_operations {
 	CRYPT_S390_KM   = 0x0100,
 	CRYPT_S390_KMC  = 0x0200,
 	CRYPT_S390_KIMD = 0x0300,
 	CRYPT_S390_KLMD = 0x0400,
-	CRYPT_S390_KMAC = 0x0500
+	CRYPT_S390_KMAC = 0x0500,
+	CRYPT_S390_KMCTR = 0x0600
 };
 
 /*
@@ -51,6 +56,10 @@ enum crypt_s390_km_func {
 	KM_AES_192_DECRYPT  = CRYPT_S390_KM | 0x13 | 0x80,
 	KM_AES_256_ENCRYPT  = CRYPT_S390_KM | 0x14,
 	KM_AES_256_DECRYPT  = CRYPT_S390_KM | 0x14 | 0x80,
+	KM_XTS_128_ENCRYPT  = CRYPT_S390_KM | 0x32,
+	KM_XTS_128_DECRYPT  = CRYPT_S390_KM | 0x32 | 0x80,
+	KM_XTS_256_ENCRYPT  = CRYPT_S390_KM | 0x34,
+	KM_XTS_256_DECRYPT  = CRYPT_S390_KM | 0x34 | 0x80,
 };
 
 /*
@@ -75,6 +84,26 @@ enum crypt_s390_kmc_func {
 };
 
 /*
+ * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+enum crypt_s390_kmctr_func {
+	KMCTR_QUERY            = CRYPT_S390_KMCTR | 0x0,
+	KMCTR_DEA_ENCRYPT      = CRYPT_S390_KMCTR | 0x1,
+	KMCTR_DEA_DECRYPT      = CRYPT_S390_KMCTR | 0x1 | 0x80,
+	KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
+	KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
+	KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
+	KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
+	KMCTR_AES_128_ENCRYPT  = CRYPT_S390_KMCTR | 0x12,
+	KMCTR_AES_128_DECRYPT  = CRYPT_S390_KMCTR | 0x12 | 0x80,
+	KMCTR_AES_192_ENCRYPT  = CRYPT_S390_KMCTR | 0x13,
+	KMCTR_AES_192_DECRYPT  = CRYPT_S390_KMCTR | 0x13 | 0x80,
+	KMCTR_AES_256_ENCRYPT  = CRYPT_S390_KMCTR | 0x14,
+	KMCTR_AES_256_DECRYPT  = CRYPT_S390_KMCTR | 0x14 | 0x80,
+};
+
+/*
  * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
  * instruction
  */
@@ -83,6 +112,7 @@ enum crypt_s390_kimd_func {
 	KIMD_SHA_1   = CRYPT_S390_KIMD | 1,
 	KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
 	KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
+	KIMD_GHASH   = CRYPT_S390_KIMD | 65,
 };
 
 /*
@@ -284,6 +314,45 @@ static inline int crypt_s390_kmac(long func, void *param,
 }
 
 /**
+ * crypt_s390_kmctr:
+ * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
+				 const u8 *src, long src_len, u8 *counter)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	register u8 *__dest asm("4") = dest;
+	register u8 *__ctr asm("6") = counter;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
+		  "+a" (__ctr)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
  * crypt_s390_func_available:
  * @func: the function code of the specific function; 0 if op in general
  *
@@ -291,13 +360,17 @@ static inline int crypt_s390_kmac(long func, void *param,
  *
  * Returns 1 if func available; 0 if func or op in general not available
  */
-static inline int crypt_s390_func_available(int func)
+static inline int crypt_s390_func_available(int func,
+					    unsigned int facility_mask)
 {
 	unsigned char status[16];
 	int ret;
 
-	/* check if CPACF facility (bit 17) is available */
-	if (!test_facility(17))
+	if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
 		return 0;
 
 	switch (func & CRYPT_S390_OP_MASK) {
@@ -316,6 +389,10 @@ static inline int crypt_s390_func_available(int func)
 	case CRYPT_S390_KMAC:
 		ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
 		break;
+	case CRYPT_S390_KMCTR:
+		ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
+				       NULL);
+		break;
 	default:
 		return 0;
 	}
@@ -326,4 +403,31 @@ static inline int crypt_s390_func_available(int func)
 	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
 }
 
+/**
+ * crypt_s390_pcc:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for success.
+ */
+static inline int crypt_s390_pcc(long func, void *param)
+{
+	register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
+	register void *__param asm("1") = param;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rre,0xb92c0000,0,0 \n" /* PCC opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "+d" (ret)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	return ret;
+}
+
+
 #endif	/* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c
deleted file mode 100644
index 5706af26644..00000000000
--- a/arch/s390/crypto/des_check_key.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * Originally released as descore by Dana L. How <how@isl.stanford.edu>.
- * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel.
- * Derived from Cryptoapi and Nettle implementations, adapted for in-place
- * scatterlist interface.  Changed LGPL to GPL per section 3 of the LGPL.
- *
- * s390 Version:
- *   Copyright IBM Corp. 2003
- *   Author(s): Thomas Spatzier
- *		Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/des.c"
- *   Copyright (c) 1992 Dana L. How.
- *   Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de>
- *   Copyright (c) Gisle Sflensminde <gisle@ii.uib.no>
- *   Copyright (C) 2001 Niels Mvller.
- *   Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include "crypto_des.h"
-
-#define ROR(d,c,o)	((d) = (d) >> (c) | (d) << (o))
-
-static const u8 parity[] = {
-	8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8,
-};
-
-/*
- * RFC2451: Weak key checks SHOULD be performed.
- */
-int
-crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags)
-{
-	u32 n, w;
-
-	n  = parity[key[0]]; n <<= 4;
-	n |= parity[key[1]]; n <<= 4;
-	n |= parity[key[2]]; n <<= 4;
-	n |= parity[key[3]]; n <<= 4;
-	n |= parity[key[4]]; n <<= 4;
-	n |= parity[key[5]]; n <<= 4;
-	n |= parity[key[6]]; n <<= 4;
-	n |= parity[key[7]];
-	w = 0x88888888L;
-
-	if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY)
-	    && !((n - (w >> 3)) & w)) {  /* 1 in 10^10 keys passes this test */
-		if (n < 0x41415151) {
-			if (n < 0x31312121) {
-				if (n < 0x14141515) {
-					/* 01 01 01 01 01 01 01 01 */
-					if (n == 0x11111111) goto weak;
-					/* 01 1F 01 1F 01 0E 01 0E */
-					if (n == 0x13131212) goto weak;
-				} else {
-					/* 01 E0 01 E0 01 F1 01 F1 */
-					if (n == 0x14141515) goto weak;
-					/* 01 FE 01 FE 01 FE 01 FE */
-					if (n == 0x16161616) goto weak;
-				}
-			} else {
-				if (n < 0x34342525) {
-					/* 1F 01 1F 01 0E 01 0E 01 */
-					if (n == 0x31312121) goto weak;
-					/* 1F 1F 1F 1F 0E 0E 0E 0E (?) */
-					if (n == 0x33332222) goto weak;
-				} else {
-					/* 1F E0 1F E0 0E F1 0E F1 */
-					if (n == 0x34342525) goto weak;
-					/* 1F FE 1F FE 0E FE 0E FE */
-					if (n == 0x36362626) goto weak;
-				}
-			}
-		} else {
-			if (n < 0x61616161) {
-				if (n < 0x44445555) {
-					/* E0 01 E0 01 F1 01 F1 01 */
-					if (n == 0x41415151) goto weak;
-					/* E0 1F E0 1F F1 0E F1 0E */
-					if (n == 0x43435252) goto weak;
-				} else {
-					/* E0 E0 E0 E0 F1 F1 F1 F1 (?) */
-					if (n == 0x44445555) goto weak;
-					/* E0 FE E0 FE F1 FE F1 FE */
-					if (n == 0x46465656) goto weak;
-				}
-			} else {
-				if (n < 0x64646565) {
-					/* FE 01 FE 01 FE 01 FE 01 */
-					if (n == 0x61616161) goto weak;
-					/* FE 1F FE 1F FE 0E FE 0E */
-					if (n == 0x63636262) goto weak;
-				} else {
-					/* FE E0 FE E0 FE F1 FE F1 */
-					if (n == 0x64646565) goto weak;
-					/* FE FE FE FE FE FE FE FE */
-					if (n == 0x66666666) goto weak;
-				}
-			}
-		}
-	}
-	return 0;
-weak:
-	*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(crypto_des_check_key);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Key Check function for DES &  DES3 Cipher Algorithms");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index cc542011839..a52bfd124d8 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -3,7 +3,7 @@
  *
  * s390 implementation of the DES Cipher Algorithm.
  *
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003,2011
  * Author(s): Thomas Spatzier
  *	      Jan Glauber (jan.glauber@de.ibm.com)
  *
@@ -22,22 +22,19 @@
 
 #include "crypt_s390.h"
 
-#define DES3_192_KEY_SIZE	(3 * DES_KEY_SIZE)
+#define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
-struct crypt_s390_des_ctx {
-	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES_KEY_SIZE];
-};
+static u8 *ctrblk;
 
-struct crypt_s390_des3_192_ctx {
+struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES3_192_KEY_SIZE];
+	u8 key[DES3_KEY_SIZE];
 };
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen)
+		      unsigned int key_len)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 
@@ -47,22 +44,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		return -EINVAL;
 	}
 
-	memcpy(dctx->key, key, keylen);
+	memcpy(ctx->key, key, key_len);
 	return 0;
 }
 
 static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+	crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+	crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des_alg = {
@@ -71,7 +68,7 @@ static struct crypto_alg des_alg = {
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(des_alg.cra_list),
 	.cra_u			=	{
@@ -86,7 +83,7 @@ static struct crypto_alg des_alg = {
 };
 
 static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
-			    void *param, struct blkcipher_walk *walk)
+			    u8 *key, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes;
@@ -97,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_km(func, param, out, in, n);
+		ret = crypt_s390_km(func, key, out, in, n);
 		BUG_ON((ret < 0) || (ret != n));
 
 		nbytes &= DES_BLOCK_SIZE - 1;
@@ -108,7 +105,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 }
 
 static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
-			    void *param, struct blkcipher_walk *walk)
+			    u8 *iv, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
@@ -116,20 +113,20 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
 	if (!nbytes)
 		goto out;
 
-	memcpy(param, walk->iv, DES_BLOCK_SIZE);
+	memcpy(iv, walk->iv, DES_BLOCK_SIZE);
 	do {
 		/* only use complete blocks */
 		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_kmc(func, param, out, in, n);
+		ret = crypt_s390_kmc(func, iv, out, in, n);
 		BUG_ON((ret < 0) || (ret != n));
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	} while ((nbytes = walk->nbytes));
-	memcpy(walk->iv, param, DES_BLOCK_SIZE);
+	memcpy(walk->iv, iv, DES_BLOCK_SIZE);
 
 out:
 	return ret;
@@ -139,22 +136,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -163,7 +160,7 @@ static struct crypto_alg ecb_des_alg = {
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(ecb_des_alg.cra_list),
@@ -182,22 +179,22 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -206,7 +203,7 @@ static struct crypto_alg cbc_des_alg = {
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(cbc_des_alg.cra_list),
@@ -235,10 +232,10 @@ static struct crypto_alg cbc_des_alg = {
  *   property.
  *
  */
-static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int key_len)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -248,141 +245,276 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
-	memcpy(dctx->key, key, keylen);
+	memcpy(ctx->key, key, key_len);
 	return 0;
 }
 
-static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES_BLOCK_SIZE);
+	crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
-static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
-		      DES_BLOCK_SIZE);
+	crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
-static struct crypto_alg des3_192_alg = {
+static struct crypto_alg des3_alg = {
 	.cra_name		=	"des3_ede",
 	.cra_driver_name	=	"des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des3_192_alg.cra_list),
+	.cra_list		=	LIST_HEAD_INIT(des3_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
-			.cia_min_keysize	=	DES3_192_KEY_SIZE,
-			.cia_max_keysize	=	DES3_192_KEY_SIZE,
-			.cia_setkey		=	des3_192_setkey,
-			.cia_encrypt		=	des3_192_encrypt,
-			.cia_decrypt		=	des3_192_decrypt,
+			.cia_min_keysize	=	DES3_KEY_SIZE,
+			.cia_max_keysize	=	DES3_KEY_SIZE,
+			.cia_setkey		=	des3_setkey,
+			.cia_encrypt		=	des3_encrypt,
+			.cia_decrypt		=	des3_decrypt,
 		}
 	}
 };
 
-static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
 }
 
-static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
 }
 
-static struct crypto_alg ecb_des3_192_alg = {
+static struct crypto_alg ecb_des3_alg = {
 	.cra_name		=	"ecb(des3_ede)",
 	.cra_driver_name	=	"ecb-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(
-						ecb_des3_192_alg.cra_list),
+						ecb_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
-			.min_keysize		=	DES3_192_KEY_SIZE,
-			.max_keysize		=	DES3_192_KEY_SIZE,
-			.setkey			=	des3_192_setkey,
-			.encrypt		=	ecb_des3_192_encrypt,
-			.decrypt		=	ecb_des3_192_decrypt,
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ecb_des3_encrypt,
+			.decrypt		=	ecb_des3_decrypt,
 		}
 	}
 };
 
-static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
 }
 
-static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
 }
 
-static struct crypto_alg cbc_des3_192_alg = {
+static struct crypto_alg cbc_des3_alg = {
 	.cra_name		=	"cbc(des3_ede)",
 	.cra_driver_name	=	"cbc-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(
-						cbc_des3_192_alg.cra_list),
+						cbc_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
-			.min_keysize		=	DES3_192_KEY_SIZE,
-			.max_keysize		=	DES3_192_KEY_SIZE,
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
 			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_192_setkey,
-			.encrypt		=	cbc_des3_192_encrypt,
-			.decrypt		=	cbc_des3_192_decrypt,
+			.setkey			=	des3_setkey,
+			.encrypt		=	cbc_des3_encrypt,
+			.decrypt		=	cbc_des3_decrypt,
 		}
 	}
 };
 
-static int des_s390_init(void)
+static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
+			    struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	unsigned int i, n, nbytes;
+	u8 buf[DES_BLOCK_SIZE];
+	u8 *out, *in;
+
+	memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= DES_BLOCK_SIZE) {
+			/* align to block size, max. PAGE_SIZE */
+			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+				nbytes & ~(DES_BLOCK_SIZE - 1);
+			for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+				memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
+				       DES_BLOCK_SIZE);
+				crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
+			}
+			ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
+			BUG_ON((ret < 0) || (ret != n));
+			if (n > DES_BLOCK_SIZE)
+				memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+				       DES_BLOCK_SIZE);
+			crypto_inc(ctrblk, DES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
+				       DES_BLOCK_SIZE, ctrblk);
+		BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE);
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrblk, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+	}
+	memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
+	return ret;
+}
+
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des_alg = {
+	.cra_name		=	"ctr(des)",
+	.cra_driver_name	=	"ctr-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ctr_des_encrypt,
+			.decrypt		=	ctr_des_decrypt,
+		}
+	}
+};
+
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des3_alg = {
+	.cra_name		=	"ctr(des3_ede)",
+	.cra_driver_name	=	"ctr-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_des3_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ctr_des3_encrypt,
+			.decrypt		=	ctr_des3_decrypt,
+		}
+	}
+};
+
+static int __init des_s390_init(void)
 {
 	int ret;
 
-	if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
-	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
+	if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
+	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	ret = crypto_register_alg(&des_alg);
@@ -394,23 +526,46 @@ static int des_s390_init(void)
 	ret = crypto_register_alg(&cbc_des_alg);
 	if (ret)
 		goto cbc_des_err;
-	ret = crypto_register_alg(&des3_192_alg);
+	ret = crypto_register_alg(&des3_alg);
 	if (ret)
-		goto des3_192_err;
-	ret = crypto_register_alg(&ecb_des3_192_alg);
+		goto des3_err;
+	ret = crypto_register_alg(&ecb_des3_alg);
 	if (ret)
-		goto ecb_des3_192_err;
-	ret = crypto_register_alg(&cbc_des3_192_alg);
+		goto ecb_des3_err;
+	ret = crypto_register_alg(&cbc_des3_alg);
 	if (ret)
-		goto cbc_des3_192_err;
+		goto cbc_des3_err;
+
+	if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&ctr_des_alg);
+		if (ret)
+			goto ctr_des_err;
+		ret = crypto_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto ctr_des3_err;
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_mem_err;
+		}
+	}
 out:
 	return ret;
 
-cbc_des3_192_err:
-	crypto_unregister_alg(&ecb_des3_192_alg);
-ecb_des3_192_err:
-	crypto_unregister_alg(&des3_192_alg);
-des3_192_err:
+ctr_mem_err:
+	crypto_unregister_alg(&ctr_des3_alg);
+ctr_des3_err:
+	crypto_unregister_alg(&ctr_des_alg);
+ctr_des_err:
+	crypto_unregister_alg(&cbc_des3_alg);
+cbc_des3_err:
+	crypto_unregister_alg(&ecb_des3_alg);
+ecb_des3_err:
+	crypto_unregister_alg(&des3_alg);
+des3_err:
 	crypto_unregister_alg(&cbc_des_alg);
 cbc_des_err:
 	crypto_unregister_alg(&ecb_des_alg);
@@ -422,9 +577,14 @@ des_err:
 
 static void __exit des_s390_exit(void)
 {
-	crypto_unregister_alg(&cbc_des3_192_alg);
-	crypto_unregister_alg(&ecb_des3_192_alg);
-	crypto_unregister_alg(&des3_192_alg);
+	if (ctrblk) {
+		crypto_unregister_alg(&ctr_des_alg);
+		crypto_unregister_alg(&ctr_des3_alg);
+		free_page((unsigned long) ctrblk);
+	}
+	crypto_unregister_alg(&cbc_des3_alg);
+	crypto_unregister_alg(&ecb_des3_alg);
+	crypto_unregister_alg(&des3_alg);
 	crypto_unregister_alg(&cbc_des_alg);
 	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 00000000000..b1bd170f24b
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,162 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+
+#include "crypt_s390.h"
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_ctx {
+	u8 icv[16];
+	u8 key[16];
+};
+
+struct ghash_desc_ctx {
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+	memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	unsigned int n;
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		n = min(srclen, dctx->bytes);
+		dctx->bytes -= n;
+		srclen -= n;
+
+		memcpy(pos, src, n);
+		src += n;
+
+		if (!dctx->bytes) {
+			ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+					      GHASH_BLOCK_SIZE);
+			BUG_ON(ret != GHASH_BLOCK_SIZE);
+		}
+	}
+
+	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+	if (n) {
+		ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+		BUG_ON(ret != n);
+		src += n;
+		srclen -= n;
+	}
+
+	if (srclen) {
+		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+		memcpy(buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		memset(pos, 0, dctx->bytes);
+
+		ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+		BUG_ON(ret != GHASH_BLOCK_SIZE);
+	}
+
+	dctx->bytes = 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+	ghash_flush(ctx, dctx);
+	memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-s390",
+		.cra_priority		= CRYPT_S390_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
+	},
+};
+
+static int __init ghash_mod_init(void)
+{
+	if (!crypt_s390_func_available(KIMD_GHASH,
+				       CRYPT_S390_MSA | CRYPT_S390_MSA4))
+		return -EOPNOTSUPP;
+
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 975e3ab13cb..0808fbf0f7d 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -76,7 +76,7 @@ static void prng_seed(int nbytes)
 
 	/* Add the entropy */
 	while (nbytes >= 8) {
-		*((__u64 *)parm_block) ^= *((__u64 *)buf+i*8);
+		*((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
 		prng_add_entropy();
 		i += 8;
 		nbytes -= 8;
@@ -166,7 +166,7 @@ static int __init prng_init(void)
 	int ret;
 
 	/* check if the CPU has a PRNG */
-	if (!crypt_s390_func_available(KMC_PRNG))
+	if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	if (prng_chunk_size < 8)
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index f6de7826c97..e9868c6e0a0 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -90,7 +90,7 @@ static struct shash_alg alg = {
 
 static int __init sha1_s390_init(void)
 {
-	if (!crypt_s390_func_available(KIMD_SHA_1))
+	if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 	return crypto_register_shash(&alg);
 }
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 61a7db37212..5ed8d64fc2e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg alg = {
 
 static int sha256_s390_init(void)
 {
-	if (!crypt_s390_func_available(KIMD_SHA_256))
+	if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	return crypto_register_shash(&alg);
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 4bf73d0dc52..32a81383b69 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -132,7 +132,7 @@ static int __init init(void)
 {
 	int ret;
 
-	if (!crypt_s390_func_available(KIMD_SHA_512))
+	if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
 		goto out;
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
index 43a5c78046d..3e20383d092 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/cacheflush.h
@@ -11,5 +11,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable);
 int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
 
 #endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 72b2e2f2d32..7e91c58072e 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -9,9 +9,22 @@
 #define _ASM_S390_DIAG_H
 
 /*
- * Diagnose 10: Release pages
+ * Diagnose 10: Release page range
  */
-extern void diag10(unsigned long addr);
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+	unsigned long start_addr, end_addr;
+
+	start_addr = start_pfn << PAGE_SHIFT;
+	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+	asm volatile(
+		"0:	diag	%0,%1,0x10\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		EX_TABLE(1b, 1b)
+		: : "a" (start_addr), "a" (end_addr));
+}
 
 /*
  * Diagnose 14: Input spool file manipulation
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3c29be4836e..b7931faaef6 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,15 +11,13 @@ struct dyn_arch_ftrace { };
 
 #ifdef CONFIG_64BIT
 #define MCOUNT_INSN_SIZE  12
-#define MCOUNT_OFFSET	   8
 #else
 #define MCOUNT_INSN_SIZE  20
-#define MCOUNT_OFFSET	   4
 #endif
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
-	return addr - MCOUNT_OFFSET;
+	return addr;
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 00000000000..95a6cf2b5b6
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+
+#ifdef CONFIG_64BIT
+#define ASM_PTR ".quad"
+#define ASM_ALIGN ".balign 8"
+#else
+#define ASM_PTR ".long"
+#define ASM_ALIGN ".balign 4"
+#endif
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("0:	brcl 0,0\n"
+		".pushsection __jump_table, \"aw\"\n"
+		ASM_ALIGN "\n"
+		ASM_PTR " 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (key) : : label);
+	return false;
+label:
+	return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index a6f0e7cc9cd..8c277caa8d3 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	if (current->mm->context.alloc_pgste) {
+	if (current->mm && current->mm->context.alloc_pgste) {
 		/*
 		 * alloc_pgste indicates, that any NEW context will be created
 		 * with extended page tables. The old context is unchanged. The
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 64230bc392f..5ff15dacb57 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o setup.o \
 	    processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
 	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
-	    vdso.o vtime.o sysinfo.o nmi.o sclp.o
+	    vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index c032d11da8a..8237fc07ac7 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -9,27 +9,6 @@
 #include <asm/diag.h>
 
 /*
- * Diagnose 10: Release pages
- */
-void diag10(unsigned long addr)
-{
-	if (addr >= 0x7ff00000)
-		return;
-	asm volatile(
-#ifdef CONFIG_64BIT
-		"	sam31\n"
-		"	diag	%0,%0,0x10\n"
-		"0:	sam64\n"
-#else
-		"	diag	%0,%0,0x10\n"
-		"0:\n"
-#endif
-		EX_TABLE(0b, 0b)
-		: : "a" (addr));
-}
-EXPORT_SYMBOL(diag10);
-
-/*
  * Diagnose 14: Input spool file manipulation
  */
 int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index c83726c9fe0..3d4a78fc1ad 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -672,6 +672,7 @@ static struct insn opcode_b2[] = {
 	{ "rp", 0x77, INSTR_S_RD },
 	{ "stcke", 0x78, INSTR_S_RD },
 	{ "sacf", 0x79, INSTR_S_RD },
+	{ "spp", 0x80, INSTR_S_RD },
 	{ "stsi", 0x7d, INSTR_S_RD },
 	{ "srnm", 0x99, INSTR_S_RD },
 	{ "stfpc", 0x9c, INSTR_S_RD },
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 648f64239a9..1b67fc6ebdc 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -836,7 +836,7 @@ restart_base:
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	basr	%r14,0
 	l	%r14,restart_addr-.(%r14)
-	br	%r14			# branch to start_secondary
+	basr	%r14,%r14		# branch to start_secondary
 restart_addr:
 	.long	start_secondary
 	.align	8
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9d3603d6c51..9fd86456349 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -841,7 +841,7 @@ restart_base:
 	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
 	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
-	jg	start_secondary
+	brasl	%r14,start_secondary
 	.align	8
 restart_vtime:
 	.long	0x7fffffff,0xffffffff
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 00000000000..44cc06bedf7
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,59 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+struct insn_args {
+	unsigned long *target;
+	struct insn *insn;
+	ssize_t size;
+};
+
+static int __arch_jump_label_transform(void *data)
+{
+	struct insn_args *args = data;
+	int rc;
+
+	rc = probe_kernel_write(args->target, args->insn, args->size);
+	WARN_ON_ONCE(rc < 0);
+	return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	struct insn_args args;
+	struct insn insn;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		/* brcl 15,offset */
+		insn.opcode = 0xc0f4;
+		insn.offset = (entry->target - entry->code) >> 1;
+	} else {
+		/* brcl 0,0 */
+		insn.opcode = 0xc004;
+		insn.offset = 0;
+	}
+
+	args.target = (void *) entry->code;
+	args.insn = &insn;
+	args.size = JUMP_LABEL_NOP_SIZE;
+
+	stop_machine(__arch_jump_label_transform, &args, NULL);
+}
+
+#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63a97db83f9..63c7d9ff220 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
-	 *
-	 * For the ec_schedule signal we have to do nothing. All the work
-	 * is done automatically when we return from the interrupt.
 	 */
 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
 
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+
 	if (test_bit(ec_call_function, &bits))
 		generic_smp_call_function_interrupt();
 
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index 7e9d30d567b..ab0e041ac54 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -48,10 +48,10 @@ sie_irq_handler:
 	tm	__TI_flags+7(%r2),_TIF_EXIT_SIE
 	jz	0f
 	larl	%r2,sie_exit			# work pending, leave sie
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 	br	%r14
 0:	larl	%r2,sie_reenter			# re-enter with guest id
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 1:	br	%r14
 
 /*
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index c66ffd8dbbb..1f1dba9dcf5 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10(addr);
+		diag10_range(addr >> PAGE_SHIFT, 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9217e332b11..ab988135e5c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -543,7 +543,6 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	struct task_struct *tsk;
 	__u16 subcode;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 	/*
 	 * Get the external interruption subcode & pfault
 	 * initial/completion signal bit. VM stores this 
@@ -553,14 +552,15 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != __SUBCODE_MASK)
 		return;
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 
 	/*
 	 * Get the token (= address of the task structure of the affected task).
 	 */
 #ifdef CONFIG_64BIT
-	tsk = *(struct task_struct **) param64;
+	tsk = (struct task_struct *) param64;
 #else
-	tsk = *(struct task_struct **) param32;
+	tsk = (struct task_struct *) param32;
 #endif
 
 	if (subcode & 0x0080) {
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 122ffbd08ce..f05edcc3bef 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -24,12 +24,13 @@ static void change_page_attr(unsigned long addr, int numpages,
 			WARN_ON_ONCE(1);
 			continue;
 		}
-		ptep = pte_offset_kernel(pmdp, addr + i * PAGE_SIZE);
+		ptep = pte_offset_kernel(pmdp, addr);
 
 		pte = *ptep;
 		pte = set(pte);
-		ptep_invalidate(&init_mm, addr + i * PAGE_SIZE, ptep);
+		ptep_invalidate(&init_mm, addr, ptep);
 		*ptep = pte;
+		addr += PAGE_SIZE;
 	}
 }
 
@@ -53,3 +54,8 @@ int set_memory_nx(unsigned long addr, int numpages)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(set_memory_nx);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return 0;
+}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 4952872d6f0..33cbd373cce 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -1021,20 +1021,14 @@ deallocate_exit:
 	return rc;
 }
 
-long hwsampler_query_min_interval(void)
+unsigned long hwsampler_query_min_interval(void)
 {
-	if (min_sampler_rate)
-		return min_sampler_rate;
-	else
-		return -EINVAL;
+	return min_sampler_rate;
 }
 
-long hwsampler_query_max_interval(void)
+unsigned long hwsampler_query_max_interval(void)
 {
-	if (max_sampler_rate)
-		return max_sampler_rate;
-	else
-		return -EINVAL;
+	return max_sampler_rate;
 }
 
 unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 8c72b59316b..1912f3bb190 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -102,8 +102,8 @@ int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
 int hwsampler_deallocate(void);
-long hwsampler_query_min_interval(void);
-long hwsampler_query_max_interval(void);
+unsigned long hwsampler_query_min_interval(void);
+unsigned long hwsampler_query_max_interval(void);
 int hwsampler_start_all(unsigned long interval);
 int hwsampler_stop_all(void);
 int hwsampler_deactivate(unsigned int cpu);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index c63d7e58352..5995e9bc72d 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -145,15 +145,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 	 * create hwsampler files only if hwsampler_setup() succeeds.
 	 */
 	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval < 0) {
-		oprofile_min_interval = 0;
+	if (oprofile_min_interval == 0)
 		return -ENODEV;
-	}
 	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval < 0) {
-		oprofile_max_interval = 0;
+	if (oprofile_max_interval == 0)
 		return -ENODEV;
-	}
 
 	if (oprofile_timer_init(ops))
 		return -ENODEV;
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4b89da248d1..bc439de48cd 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,6 @@ config SUPERH
 	select RTC_LIB
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
-	select ARCH_NO_SYSDEV_OPS
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index e71a531f1e3..77ec0e7b8dd 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -48,7 +48,6 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index dc4a2eb6a61..c41650572d7 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -83,7 +83,6 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 14726eef1ce..f0907995b4c 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/io.h>
+#include <linux/prefetch.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <cpu/sq.h>
diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
index 6dcb8166a64..22db127afa7 100644
--- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c
+++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
@@ -139,7 +139,7 @@ void platform_pm_runtime_suspend_idle(void)
 	queue_work(pm_wq, &hwblk_work);
 }
 
-int platform_pm_runtime_suspend(struct device *dev)
+static int default_platform_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
@@ -147,7 +147,7 @@ int platform_pm_runtime_suspend(struct device *dev)
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -183,20 +183,20 @@ int platform_pm_runtime_suspend(struct device *dev)
 	mutex_unlock(&ad->mutex);
 
 out:
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		 __func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_resume(struct device *dev)
+static int default_platform_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -228,19 +228,19 @@ int platform_pm_runtime_resume(struct device *dev)
 	 */
 	mutex_unlock(&ad->mutex);
 out:
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		__func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	int hwblk = pdev->archdata.hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -252,10 +252,19 @@ int platform_pm_runtime_idle(struct device *dev)
 	/* suspend synchronously to disable clocks immediately */
 	ret = pm_runtime_suspend(dev);
 out:
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d] done!\n", hwblk);
+	dev_dbg(dev, "%s() [%d] done!\n", __func__, hwblk);
 	return ret;
 }
 
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = default_platform_runtime_suspend,
+		.runtime_resume = default_platform_runtime_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
+
 static int platform_bus_notify(struct notifier_block *nb,
 			       unsigned long action, void *data)
 {
@@ -276,6 +285,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		hwblk_disable(hwblk_info, hwblk);
 		/* make sure driver re-inits itself once */
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
+		dev->pwr_domain = &default_power_domain;
 		break;
 	/* TODO: add BUS_NOTIFY_BIND_DRIVER and increase idle count */
 	case BUS_NOTIFY_BOUND_DRIVER:
@@ -289,6 +299,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
 		break;
 	case BUS_NOTIFY_DEL_DEVICE:
+		dev->pwr_domain = NULL;
 		break;
 	}
 	return 0;
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 2130ca674e9..3d7b209b217 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -117,7 +117,11 @@ void user_enable_single_step(struct task_struct *child)
 
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 
+	if (ptrace_get_breakpoints(child) < 0)
+		return;
+
 	set_single_step(child, pc);
+	ptrace_put_breakpoints(child);
 }
 
 void user_disable_single_step(struct task_struct *child)
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 509b36b4511..6207561ea34 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case SMP_MSG_RESCHEDULE:
+		scheduler_ipi();
 		break;
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65ab..b51a17104b5 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(1);
 
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-	sysfs_printk_last_file();
 	print_modules();
 	show_regs(regs);
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e560d102215..63a027c9ada 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,10 @@ config SPARC
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_HARDIRQS_NO_DEPRECATED
+	select GENERIC_IRQ_SHOW
+	select USE_GENERIC_SMP_HELPERS if SMP
 
 config SPARC32
 	def_bool !64BIT
@@ -43,15 +47,12 @@ config SPARC64
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_SYSCALL_TRACEPOINTS
-	select USE_GENERIC_SMP_HELPERS if SMP
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
-	select HAVE_GENERIC_HARDIRQS
-	select GENERIC_IRQ_SHOW
 	select IRQ_PREFLOW_FASTEOI
 
 config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h
index 31d48a0e32c..a4c5a938b93 100644
--- a/arch/sparc/include/asm/cpudata_32.h
+++ b/arch/sparc/include/asm/cpudata_32.h
@@ -16,6 +16,10 @@ typedef struct {
 	unsigned long clock_tick;
 	unsigned int multiplier;
 	unsigned int counter;
+#ifdef CONFIG_SMP
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+#endif
 	int prom_node;
 	int mid;
 	int next;
@@ -23,5 +27,6 @@ typedef struct {
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
 #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
 
 #endif /* _SPARC_CPUDATA_H */
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
index 86666f70322..482c79e2a41 100644
--- a/arch/sparc/include/asm/floppy_32.h
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void)
 	pdma_areasize = pdma_size;
 }
 
-/* Our low-level entry point in arch/sparc/kernel/entry.S */
-extern int sparc_floppy_request_irq(int irq, unsigned long flags,
-				    irq_handler_t irq_handler);
+extern int sparc_floppy_request_irq(unsigned int irq,
+                                    irq_handler_t irq_handler);
 
 static int sun_fd_request_irq(void)
 {
 	static int once = 0;
-	int error;
 
-	if(!once) {
+	if (!once) {
 		once = 1;
-		error = sparc_floppy_request_irq(FLOPPY_IRQ,
-						 IRQF_DISABLED,
-						 floppy_interrupt);
-		return ((error == 0) ? 0 : -1);
-	} else return 0;
+		return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
+	} else {
+		return 0;
+	}
 }
 
 static struct linux_prom_registers fd_regs[2];
 
 static int sun_floppy_init(void)
 {
+	struct platform_device *op;
+	struct device_node *dp;
 	char state[128];
 	phandle tnode, fd_node;
 	int num_regs;
@@ -310,7 +309,6 @@ static int sun_floppy_init(void)
 
 	use_virtual_dma = 1;
 
-	FLOPPY_IRQ = 11;
 	/* Forget it if we aren't on a machine that could possibly
 	 * ever have a floppy drive.
 	 */
@@ -349,6 +347,26 @@ static int sun_floppy_init(void)
 	sun_fdc = (struct sun_flpy_controller *)
 	    of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
 
+	/* Look up irq in platform_device.
+	 * We try "SUNW,fdtwo" and "fd"
+	 */
+	for_each_node_by_name(dp, "SUNW,fdtwo") {
+		op = of_find_device_by_node(dp);
+		if (op)
+			break;
+	}
+	if (!op) {
+		for_each_node_by_name(dp, "fd") {
+			op = of_find_device_by_node(dp);
+			if (op)
+				break;
+		}
+	}
+	if (!op)
+		goto no_sun_fdc;
+
+	FLOPPY_IRQ = op->archdata.irqs[0];
+
 	/* Last minute sanity check... */
 	if(sun_fdc->status_82072 == 0xff) {
 		sun_fdc = NULL;
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h
index a34b2994937..f6902cf3cbe 100644
--- a/arch/sparc/include/asm/io.h
+++ b/arch/sparc/include/asm/io.h
@@ -5,4 +5,17 @@
 #else
 #include <asm/io_32.h>
 #endif
+
+/*
+ * Defines used for both SPARC32 and SPARC64
+ */
+
+/* Big endian versions of memory read/write routines */
+#define readb_be(__addr)	__raw_readb(__addr)
+#define readw_be(__addr)	__raw_readw(__addr)
+#define readl_be(__addr)	__raw_readl(__addr)
+#define writeb_be(__b, __addr)	__raw_writeb(__b, __addr)
+#define writel_be(__w, __addr)	__raw_writel(__w, __addr)
+#define writew_be(__l, __addr)	__raw_writew(__l, __addr)
+
 #endif
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index eced3e3ebd3..2ae3acaeb1b 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -6,7 +6,11 @@
 #ifndef _SPARC_IRQ_H
 #define _SPARC_IRQ_H
 
-#define NR_IRQS    16
+/* Allocated number of logical irq numbers.
+ * sun4d boxes (ss2000e) should be OK with ~32.
+ * Be on the safe side and make room for 64
+ */
+#define NR_IRQS    64
 
 #include <linux/interrupt.h>
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d..fc73a82366f 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:\n\t"				\
-			 "nop\n\t"				\
-			 "nop\n\t"				\
-			 ".pushsection __jump_table,  \"a\"\n\t"\
-			 ".align 4\n\t"				\
-			 ".word 1b, %l[" #label "], %c0\n\t"	\
-			 ".popsection \n\t"			\
-			 : :  "i" (key) :  : label);\
-	} while (0)
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+		asm goto("1:\n\t"
+			 "nop\n\t"
+			 "nop\n\t"
+			 ".pushsection __jump_table,  \"aw\"\n\t"
+			 ".align 4\n\t"
+			 ".word 1b, %l[l_yes], %c0\n\t"
+			 ".popsection \n\t"
+			 : :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
index c04f96fb753..6bdaf1e43d2 100644
--- a/arch/sparc/include/asm/leon.h
+++ b/arch/sparc/include/asm/leon.h
@@ -52,29 +52,6 @@
 #define LEON_DIAGF_VALID	0x2000
 #define LEON_DIAGF_VALID_SHIFT	13
 
-/*
- *  Interrupt Sources
- *
- *  The interrupt source numbers directly map to the trap type and to
- *  the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
- *  and the Interrupt Pending Registers.
- */
-#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR	1
-#define LEON_INTERRUPT_UART_1_RX_TX		2
-#define LEON_INTERRUPT_UART_0_RX_TX		3
-#define LEON_INTERRUPT_EXTERNAL_0		4
-#define LEON_INTERRUPT_EXTERNAL_1		5
-#define LEON_INTERRUPT_EXTERNAL_2		6
-#define LEON_INTERRUPT_EXTERNAL_3		7
-#define LEON_INTERRUPT_TIMER1			8
-#define LEON_INTERRUPT_TIMER2			9
-#define LEON_INTERRUPT_EMPTY1			10
-#define LEON_INTERRUPT_EMPTY2			11
-#define LEON_INTERRUPT_OPEN_ETH			12
-#define LEON_INTERRUPT_EMPTY4			13
-#define LEON_INTERRUPT_EMPTY5			14
-#define LEON_INTERRUPT_EMPTY6			15
-
 /* irq masks */
 #define LEON_HARD_INT(x)	(1 << (x))	/* irq 0-15 */
 #define LEON_IRQMASK_R		0x0000fffe	/* bit 15- 1 of lregs.irqmask */
@@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void)
 /* macro access for leon_readnobuffer_reg() */
 #define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
 
-extern void sparc_leon_eirq_register(int eirq);
 extern void leon_init(void);
 extern void leon_switch_mm(void);
 extern void leon_init_IRQ(void);
@@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void)
 #endif /*!__ASSEMBLY__*/
 
 #ifdef CONFIG_SMP
-# define LEON3_IRQ_RESCHEDULE		13
-# define LEON3_IRQ_TICKER		(leon_percpu_timer_dev[0].irq)
+# define LEON3_IRQ_IPI_DEFAULT		13
+# define LEON3_IRQ_TICKER		(leon3_ticker_irq)
 # define LEON3_IRQ_CROSS_CALL		15
 #endif
 
@@ -339,9 +315,9 @@ struct leon2_cacheregs {
 #include <linux/interrupt.h>
 
 struct device_node;
-extern int sparc_leon_eirq_get(int eirq, int cpu);
-extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id);
-extern void sparc_leon_eirq_register(int eirq);
+extern unsigned int leon_build_device_irq(unsigned int real_irq,
+					   irq_flow_handler_t flow_handler,
+					   const char *name, int do_ack);
 extern void leon_clear_clock_irq(void);
 extern void leon_load_profile_irq(int cpu, unsigned int limit);
 extern void leon_init_timers(irq_handler_t counter_fn);
@@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
 extern int leon_flush_needed(void);
 extern void leon_switch_mm(void);
 extern int srmmu_swprobe_trace;
+extern int leon3_ticker_irq;
 
 #ifdef CONFIG_SMP
 extern int leon_smp_nrcpus(void);
@@ -366,17 +343,19 @@ extern void leon_smp_done(void);
 extern void leon_boot_cpus(void);
 extern int leon_boot_one_cpu(int i);
 void leon_init_smp(void);
-extern void cpu_probe(void);
 extern void cpu_idle(void);
 extern void init_IRQ(void);
 extern void cpu_panic(void);
 extern int __leon_processor_id(void);
 void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
+extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
 
-extern unsigned int real_irq_entry[], smpleon_ticker[];
+extern unsigned int real_irq_entry[];
+extern unsigned int smpleon_ipi[];
 extern unsigned int patchme_maybe_smp_msg[];
 extern unsigned int t_nmi[], linux_trap_ipi15_leon[];
 extern unsigned int linux_trap_ipi15_sun4m[];
+extern int leon_ipi_irq;
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h
index f20ef562b26..7eb5d78f521 100644
--- a/arch/sparc/include/asm/pcic.h
+++ b/arch/sparc/include/asm/pcic.h
@@ -29,11 +29,17 @@ struct linux_pcic {
 	int			pcic_imdim;
 };
 
-extern int pcic_probe(void);
-/* Erm... MJ redefined pcibios_present() so that it does not work early. */
+#ifdef CONFIG_PCI
 extern int pcic_present(void);
+extern int pcic_probe(void);
+extern void pci_time_init(void);
 extern void sun4m_pci_init_IRQ(void);
-
+#else
+static inline int pcic_present(void) { return 0; }
+static inline int pcic_probe(void) { return 0; }
+static inline void pci_time_init(void) {}
+static inline void sun4m_pci_init_IRQ(void) {}
+#endif
 #endif
 
 /* Size of PCI I/O space which we relocate. */
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 303bd4dc829..5b31a8e8982 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -8,6 +8,8 @@
  *  Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/const.h>
+
 #ifndef __ASSEMBLY__
 #include <asm-generic/4level-fixup.h>
 
@@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
 
 #endif /* !(__ASSEMBLY__) */
 
-#define VMALLOC_START           0xfe600000
+#define VMALLOC_START           _AC(0xfe600000,UL)
 /* XXX Alter this when I get around to fixing sun4c - Anton */
-#define VMALLOC_END             0xffc00000
+#define VMALLOC_END             _AC(0xffc00000,UL)
 
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index f8dddb7045b..b77128c8052 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048];
 extern void paging_init(void);
 extern unsigned long find_ecache_flush_span(unsigned long size);
 
+struct seq_file;
+extern void mmu_info(struct seq_file *);
+
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)		(vaddr)
 #define mmu_unlockarea(vaddr, len)		do { } while(0)
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 2643c62f4ac..64718ba2643 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -11,4 +11,16 @@
 # define COMMAND_LINE_SIZE 256
 #endif
 
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SPARC32
+/* The CPU that was used for booting
+ * Only sun4d + leon may have boot_cpu_id != 0
+ */
+extern unsigned char boot_cpu_id;
+extern unsigned char boot_cpu_id4;
+#endif
+
+#endif /* __KERNEL__ */
+
 #endif /* _SPARC_SETUP_H */
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index d82d7f4c0a7..093f10843ff 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -50,42 +50,38 @@ void smp_callin(void);
 void smp_boot_cpus(void);
 void smp_store_cpu_info(int);
 
+void smp_resched_interrupt(void);
+void smp_call_function_single_interrupt(void);
+void smp_call_function_interrupt(void);
+
 struct seq_file;
 void smp_bogo(struct seq_file *);
 void smp_info(struct seq_file *);
 
 BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long)
 BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void)
+BTFIXUPDEF_CALL(void, smp_ipi_resched, int);
+BTFIXUPDEF_CALL(void, smp_ipi_single, int);
+BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int);
 BTFIXUPDEF_BLACKBOX(hard_smp_processor_id)
 BTFIXUPDEF_BLACKBOX(load_current)
 
 #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4)
 
-static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); }
+static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); }
 static inline void xc1(smpfunc_t func, unsigned long arg1)
-{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); }
 static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); }
 static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); }
 static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 			   unsigned long arg3, unsigned long arg4)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); }
-
-static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-	xc1((smpfunc_t)func, (unsigned long)info);
-	return 0;
-}
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); }
 
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
-					   void *info, int wait)
-{
-	smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid),
-		       (unsigned long) info, 0, 0, 0);
-	return 0;
-}
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
 static inline int cpu_logical_map(int cpu)
 {
@@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void)
 		__asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t"
 				     "nop; nop" :
 				     "=&r" (cpuid));
+		     - leon
+		__asm__ __volatile__(	"rd %asr17, %0\n\t"
+					"srl %0, 0x1c, %0\n\t"
+					"nop\n\t" :
+					"=&r" (cpuid));
 	   See btfixup.h and btfixupprep.c to understand how a blackbox works.
 	 */
 	__asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t"
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index f49e11cd4de..20bca895071 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -49,6 +49,10 @@ extern void cpu_play_dead(void);
 
 extern void smp_fetch_global_regs(void);
 
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
 #ifdef CONFIG_HOTPLUG_CPU
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 7f9b9dba38a..5f5b8bf3f50 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,6 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h
index 890036b3689..47a7e862474 100644
--- a/arch/sparc/include/asm/system_32.h
+++ b/arch/sparc/include/asm/system_32.h
@@ -15,11 +15,6 @@
 
 #include <linux/irqflags.h>
 
-static inline unsigned int probe_irq_mask(unsigned long val)
-{
-	return 0;
-}
-
 /*
  * Sparc (general) CPU types
  */
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index e3b65d8cf41..3c96d3bb9f1 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -29,10 +29,6 @@ enum sparc_cpu {
 /* This cannot ever be a sun4c :) That's just history. */
 #define ARCH_SUN4C 0
 
-extern const char *sparc_cpu_type;
-extern const char *sparc_fpu_type;
-extern const char *sparc_pmu_type;
-
 extern char reboot_command[];
 
 /* These are here in an effort to more fully work around Spitfire Errata
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 1c79f32734a..8b9c556d630 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -65,6 +65,10 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #define smt_capable()				(sparc64_multi_core)
 #endif /* CONFIG_SMP */
 
-#define cpu_coregroup_mask(cpu)			(&cpu_core_map[cpu])
+extern cpumask_t cpu_core_map[NR_CPUS];
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+        return &cpu_core_map[cpu];
+}
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db98..c5387ed0add 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
 #define __NR_open_by_handle_at	333
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
+#define __NR_sendmmsg		336
 
-#define NR_syscalls		336
+#define NR_syscalls		337
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
index 5b0a06dc3bc..a9be04b0d04 100644
--- a/arch/sparc/include/asm/winmacro.h
+++ b/arch/sparc/include/asm/winmacro.h
@@ -103,6 +103,7 @@
         st       %scratch, [%cur_reg + TI_W_SAVED];
 
 #ifdef CONFIG_SMP
+/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */
 #define LOAD_CURRENT4M(dest_reg, idreg) \
         rd       %tbr, %idreg; \
 	sethi    %hi(current_set), %dest_reg; \
@@ -118,6 +119,14 @@
 	or	%dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \
 	ld	[%idreg + %dest_reg], %dest_reg;
 
+#define LOAD_CURRENT_LEON(dest_reg, idreg)			\
+	rd	%asr17, %idreg;					\
+	sethi	%hi(current_set), %dest_reg;			\
+	srl	%idreg, 0x1c, %idreg;				\
+	or	%dest_reg, %lo(current_set), %dest_reg;		\
+	sll	%idreg, 0x2, %idreg;				\
+	ld	[%idreg + %dest_reg], %dest_reg;
+
 /* Blackbox - take care with this... - check smp4m and smp4d before changing this. */
 #define LOAD_CURRENT(dest_reg, idreg) 					\
 	sethi	 %hi(___b_load_current), %idreg;			\
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 99aa4db6e9c..9cff2709a96 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64)	+= pcr.o
 obj-$(CONFIG_SPARC64)	+= nmi.o
 obj-$(CONFIG_SPARC64_SMP) += cpumap.o
 
-# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
-obj-$(CONFIG_SPARC32)     += devres.o
-devres-y                  := ../../../kernel/irq/devres.o
-
 obj-y                     += dma.o
 
 obj-$(CONFIG_SPARC32_PCI) += pcic.o
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index f679c57644d..1e34f29e58b 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata apc_match[] = {
+static struct of_device_id apc_match[] = {
 	{
 		.name = APC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 7925c54f413..138dbbc8dc8 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -4,6 +4,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/seq_file.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -11,7 +12,9 @@
 #include <linux/threads.h>
 
 #include <asm/spitfire.h>
+#include <asm/pgtable.h>
 #include <asm/oplib.h>
+#include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/head.h>
 #include <asm/psr.h>
@@ -23,6 +26,9 @@
 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
 EXPORT_PER_CPU_SYMBOL(__cpu_data);
 
+int ncpus_probed;
+unsigned int fsr_storage;
+
 struct cpu_info {
 	int psr_vers;
 	const char *name;
@@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
  * machine type value into consideration too.  I will fix this.
  */
 
-const char *sparc_cpu_type;
-const char *sparc_fpu_type;
+static const char *sparc_cpu_type;
+static const char *sparc_fpu_type;
 const char *sparc_pmu_type;
 
-unsigned int fsr_storage;
 
-static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
 {
 	const struct manufacturer_info *manuf;
 	int i;
@@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
 }
 
 #ifdef CONFIG_SPARC32
-void __cpuinit cpu_probe(void)
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+	seq_printf(m,
+		   "cpu\t\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "promlib\t\t: Version %d Revision %d\n"
+		   "prom\t\t: %d.%d\n"
+		   "type\t\t: %s\n"
+		   "ncpus probed\t: %d\n"
+		   "ncpus active\t: %d\n"
+#ifndef CONFIG_SMP
+		   "CPU0Bogo\t: %lu.%02lu\n"
+		   "CPU0ClkTck\t: %ld\n"
+#endif
+		   ,
+		   sparc_cpu_type,
+		   sparc_fpu_type ,
+		   romvec->pv_romvers,
+		   prom_rev,
+		   romvec->pv_printrev >> 16,
+		   romvec->pv_printrev & 0xffff,
+		   &cputypval[0],
+		   ncpus_probed,
+		   num_online_cpus()
+#ifndef CONFIG_SMP
+		   , cpu_data(0).udelay_val/(500000/HZ),
+		   (cpu_data(0).udelay_val/(5000/HZ)) % 100,
+		   cpu_data(0).clock_tick
+#endif
+		);
+
+#ifdef CONFIG_SMP
+	smp_bogo(m);
+#endif
+	mmu_info(m);
+#ifdef CONFIG_SMP
+	smp_info(m);
+#endif
+	return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
+
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+	seq_printf(m,
+		   "cpu\t\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "pmu\t\t: %s\n"
+		   "prom\t\t: %s\n"
+		   "type\t\t: %s\n"
+		   "ncpus probed\t: %d\n"
+		   "ncpus active\t: %d\n"
+		   "D$ parity tl1\t: %u\n"
+		   "I$ parity tl1\t: %u\n"
+#ifndef CONFIG_SMP
+		   "Cpu0ClkTck\t: %016lx\n"
+#endif
+		   ,
+		   sparc_cpu_type,
+		   sparc_fpu_type,
+		   sparc_pmu_type,
+		   prom_version,
+		   ((tlb_type == hypervisor) ?
+		    "sun4v" :
+		    "sun4u"),
+		   ncpus_probed,
+		   num_online_cpus(),
+		   dcache_parity_tl1_occurred,
+		   icache_parity_tl1_occurred
+#ifndef CONFIG_SMP
+		   , cpu_data(0).clock_tick
+#endif
+		);
+#ifdef CONFIG_SMP
+	smp_bogo(m);
+#endif
+	mmu_info(m);
+#ifdef CONFIG_SMP
+	smp_info(m);
+#endif
+	return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	/* The pointer we are returning is arbitrary,
+	 * it just has to be non-NULL and not IS_ERR
+	 * in the success case.
+	 */
+	return *pos == 0 ? &c_start : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start =c_start,
+	.next =	c_next,
+	.stop =	c_stop,
+	.show =	show_cpuinfo,
+};
+
+#ifdef CONFIG_SPARC32
+static int __init cpu_type_probe(void)
 {
 	int psr_impl, psr_vers, fpu_vers;
 	int psr;
@@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void)
 	put_psr(psr);
 
 	set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+
+	return 0;
 }
-#else
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
 static void __init sun4v_cpu_probe(void)
 {
 	switch (sun4v_chip_type) {
@@ -374,6 +499,6 @@ static int __init cpu_type_probe(void)
 	}
 	return 0;
 }
+#endif /* CONFIG_SPARC64 */
 
 early_initcall(cpu_type_probe);
-#endif
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 8de64c8126b..d91fd782743 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
 	new_tree->total_nodes = n;
 	memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
 
-	prev_cpu = cpu = first_cpu(cpu_online_map);
+	prev_cpu = cpu = cpumask_first(cpu_online_mask);
 
 	/* Initialize all levels in the tree with the first CPU */
 	for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
@@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index)
 	}
 
 	/* Impossible, since num_online_cpus() <= num_possible_cpus() */
-	return first_cpu(cpu_online_map);
+	return cpumask_first(cpu_online_mask);
 }
 
 static int _map_to_cpu(unsigned int index)
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index d2eddd6647c..113c052c304 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -20,7 +20,6 @@
 #include <asm/system.h>
 #include <asm/cpudata.h>
 
-extern void cpu_probe(void);
 extern void clock_stop_probe(void); /* tadpole.c */
 extern void sun4c_probe_memerr_reg(void);
 
@@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node)
 
 void __init device_scan(void)
 {
-	prom_printf("Booting Linux...\n");
+	printk(KERN_NOTICE "Booting Linux...\n");
 
 #ifndef CONFIG_SMP
 	{
@@ -133,7 +132,6 @@ void __init device_scan(void)
 	}
 #endif /* !CONFIG_SMP */
 
-	cpu_probe();
 	{
 		extern void auxio_probe(void);
 		extern void auxio_power_probe(void);
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 3add4de8a1a..dd1342c0a3b 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
 	tag->num_records = ncpus;
 
 	i = 0;
-	for_each_cpu_mask(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		ent[i].cpu = cpu;
 		ent[i].result = DR_CPU_RES_OK;
 		ent[i].stat = default_stat;
@@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
 	int resp_len, ncpus, cpu;
 	unsigned long flags;
 
-	ncpus = cpus_weight(*mask);
+	ncpus = cpumask_weight(mask);
 	resp_len = dr_cpu_size_response(ncpus);
 	resp = kzalloc(resp_len, GFP_KERNEL);
 	if (!resp)
@@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
 	mdesc_populate_present_mask(mask);
 	mdesc_fill_in_cpu_data(mask);
 
-	for_each_cpu_mask(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		int err;
 
 		printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
@@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
 	int resp_len, ncpus, cpu;
 	unsigned long flags;
 
-	ncpus = cpus_weight(*mask);
+	ncpus = cpumask_weight(mask);
 	resp_len = dr_cpu_size_response(ncpus);
 	resp = kzalloc(resp_len, GFP_KERNEL);
 	if (!resp)
@@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
 			     resp_len, ncpus, mask,
 			     DR_CPU_STAT_UNCONFIGURED);
 
-	for_each_cpu_mask(cpu, *mask) {
+	for_each_cpu(cpu, mask) {
 		int err;
 
 		printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
@@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp,
 
 	purge_dups(cpu_list, tag->num_records);
 
-	cpus_clear(mask);
+	cpumask_clear(&mask);
 	for (i = 0; i < tag->num_records; i++) {
 		if (cpu_list[i] == CPU_SENTINEL)
 			continue;
 
 		if (cpu_list[i] < nr_cpu_ids)
-			cpu_set(cpu_list[i], mask);
+			cpumask_set_cpu(cpu_list[i], &mask);
 	}
 
 	if (tag->type == DR_CPU_CONFIGURE)
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 6da784a5612..8341963f4c8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -269,19 +269,22 @@ smp4m_ticker:
 	/* Here is where we check for possible SMP IPI passed to us
 	 * on some level other than 15 which is the NMI and only used
 	 * for cross calls.  That has a separate entry point below.
+	 *
+	 * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
 	 */
 maybe_smp4m_msg:
 	GET_PROCESSOR4M_ID(o3)
 	sethi	%hi(sun4m_irq_percpu), %l5
 	sll	%o3, 2, %o3
 	or	%l5, %lo(sun4m_irq_percpu), %o5
-	sethi	%hi(0x40000000), %o2
+	sethi	%hi(0x70000000), %o2	! Check all soft-IRQs
 	ld	[%o5 + %o3], %o1
 	ld	[%o1 + 0x00], %o3	! sun4m_irq_percpu[cpu]->pending
 	andcc	%o3, %o2, %g0
 	be,a	smp4m_ticker
 	 cmp	%l7, 14
-	st	%o2, [%o1 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x40000000
+	/* Soft-IRQ IPI */
+	st	%o2, [%o1 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x70000000
 	WRITE_PAUSE
 	ld	[%o1 + 0x00], %g0	! sun4m_irq_percpu[cpu]->pending
 	WRITE_PAUSE
@@ -290,9 +293,27 @@ maybe_smp4m_msg:
 	WRITE_PAUSE
 	wr	%l4, PSR_ET, %psr
 	WRITE_PAUSE
-	call	smp_reschedule_irq
+	sll	%o2, 28, %o2		! shift for simpler checks below
+maybe_smp4m_msg_check_single:
+	andcc	%o2, 0x1, %g0
+	beq,a	maybe_smp4m_msg_check_mask
+	 andcc	%o2, 0x2, %g0
+	call	smp_call_function_single_interrupt
 	 nop
-
+	andcc	%o2, 0x2, %g0
+maybe_smp4m_msg_check_mask:
+	beq,a	maybe_smp4m_msg_check_resched
+	 andcc	%o2, 0x4, %g0
+	call	smp_call_function_interrupt
+	 nop
+	andcc	%o2, 0x4, %g0
+maybe_smp4m_msg_check_resched:
+	/* rescheduling is done in RESTORE_ALL regardless, but incr stats */
+	beq,a	maybe_smp4m_msg_out
+	 nop
+	call	smp_resched_interrupt
+	 nop
+maybe_smp4m_msg_out:
 	RESTORE_ALL
 
 	.align	4
@@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d:
 1:	b,a	1b
 
 #ifdef CONFIG_SPARC_LEON
-
-	.globl	smpleon_ticker
-	/* SMP per-cpu ticker interrupts are handled specially. */
-smpleon_ticker:
+	.globl	smpleon_ipi
+	.extern leon_ipi_interrupt
+	/* SMP per-cpu IPI interrupts are handled specially. */
+smpleon_ipi:
         SAVE_ALL
 	or	%l0, PSR_PIL, %g2
 	wr	%g2, 0x0, %psr
 	WRITE_PAUSE
 	wr	%g2, PSR_ET, %psr
 	WRITE_PAUSE
-	call	leon_percpu_timer_interrupt
-	 add	%sp, STACKFRAME_SZ, %o0
+	call	leonsmp_ipi_interrupt
+	 add	%sp, STACKFRAME_SZ, %o1 ! pt_regs
 	wr	%l0, PSR_ET, %psr
 	WRITE_PAUSE
 	RESTORE_ALL
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 59423491cef..58778575983 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -810,31 +810,25 @@ found_version:
 got_prop:
 #ifdef CONFIG_SPARC_LEON
 	        /* no cpu-type check is needed, it is a SPARC-LEON */
-#ifdef CONFIG_SMP
-		ba leon_smp_init
-		 nop
 
-		.global leon_smp_init
-leon_smp_init:
-		sethi	%hi(boot_cpu_id), %g1    ! master always 0
-		stb	%g0, [%g1 + %lo(boot_cpu_id)]
-		sethi	%hi(boot_cpu_id4), %g1   ! master always 0
-		stb	%g0, [%g1 + %lo(boot_cpu_id4)]
+		sethi	%hi(boot_cpu_id), %g2	! boot-cpu index
 
-		rd     %asr17,%g1
-		srl    %g1,28,%g1
+#ifdef CONFIG_SMP
+		ldub	[%g2 + %lo(boot_cpu_id)], %g1
+		cmp	%g1, 0xff		! unset means first CPU
+		bne	leon_smp_cpu_startup	! continue only with master
+		 nop
+#endif
+		/* Get CPU-ID from most significant 4-bit of ASR17 */
+		rd     %asr17, %g1
+		srl    %g1, 28, %g1
 
-		cmp %g0,%g1
-		 beq sun4c_continue_boot         !continue with master
-		nop
+		/* Update boot_cpu_id only on boot cpu */
+		stub	%g1, [%g2 + %lo(boot_cpu_id)]
 
-		ba leon_smp_cpu_startup
-		 nop
-#else
 		ba sun4c_continue_boot
 		 nop
 #endif
-#endif
 		set	cputypval, %o2
 		ldub	[%o2 + 0x4], %l1
 
@@ -893,9 +887,6 @@ sun4d_init:
 	sta     %g4, [%g0] ASI_M_VIKING_TMP1
 	sethi	%hi(boot_cpu_id), %g5
 	stb	%g4, [%g5 + %lo(boot_cpu_id)]
-	sll	%g4, 2, %g4
-	sethi	%hi(boot_cpu_id4), %g5
-	stb	%g4, [%g5 + %lo(boot_cpu_id4)]
 #endif
 
 	/* Fall through to sun4m_init */
@@ -1024,14 +1015,28 @@ sun4c_continue_boot:
 		bl	1b
 		 add	%o0, 0x1, %o0
 
+		/* If boot_cpu_id has not been setup by machine specific
+		 * init-code above we default it to zero.
+		 */
+		sethi	%hi(boot_cpu_id), %g2
+		ldub	[%g2 + %lo(boot_cpu_id)], %g3
+		cmp	%g3, 0xff
+		bne	1f
+		 nop
+		mov	%g0, %g3
+		stub	%g3, [%g2 + %lo(boot_cpu_id)]
+
+1:		/* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */
+		sll	%g3, 2, %g3
+		sethi	%hi(boot_cpu_id4), %g2
+		stub	%g3, [%g2 + %lo(boot_cpu_id4)]
+
 		/* Initialize the uwinmask value for init task just in case.
 		 * But first make current_set[boot_cpu_id] point to something useful.
 		 */
 		set	init_thread_union, %g6
 		set	current_set, %g2
 #ifdef CONFIG_SMP
-		sethi	%hi(boot_cpu_id4), %g3
-		ldub	[%g3 + %lo(boot_cpu_id4)], %g3
 		st	%g6, [%g2]
 		add	%g2, %g3, %g2
 #endif
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index c6ce9a6a479..1c9c80a1a86 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -50,10 +50,15 @@
 #include <asm/io-unit.h>
 #include <asm/leon.h>
 
+/* This function must make sure that caches and memory are coherent after DMA
+ * On LEON systems without cache snooping it flushes the entire D-CACHE.
+ */
 #ifndef CONFIG_SPARC_LEON
-#define mmu_inval_dma_area(p, l)	/* Anton pulled it out for 2.4.0-xx */
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
+{
+}
 #else
-static inline void mmu_inval_dma_area(void *va, unsigned long len)
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
 {
 	if (!sparc_leon3_snooping_enabled())
 		leon_flush_dcache_all();
@@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
 		printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
 		goto err_nova;
 	}
-	mmu_inval_dma_area((void *)va, len_total);
 
 	// XXX The mmu_map_dma_area does this for us below, see comments.
 	// sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
@@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
 	release_resource(res);
 	kfree(res);
 
-	/* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
 	pgv = virt_to_page(p);
 	mmu_unmap_dma_area(dev, ba, n);
 
@@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
 		printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
 		goto err_nova;
 	}
-	mmu_inval_dma_area(va, len_total);
 	sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
 
 	*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
@@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
 				dma_addr_t ba)
 {
 	struct resource *res;
-	void *pgp;
 
 	if ((res = _sparc_find_resource(&_sparc_dvma,
 	    (unsigned long)p)) == NULL) {
@@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
 		return;
 	}
 
-	pgp = phys_to_virt(ba);	/* bus_to_virt actually */
-	mmu_inval_dma_area(pgp, n);
+	dma_make_coherent(ba, n);
 	sparc_unmapiorange((unsigned long)p, n);
 
 	release_resource(res);
 	kfree(res);
-
-	free_pages((unsigned long)pgp, get_order(n));
+	free_pages((unsigned long)phys_to_virt(ba), get_order(n));
 }
 
 /*
@@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
 			     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	if (dir != PCI_DMA_TODEVICE)
-		mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size));
+		dma_make_coherent(ba, PAGE_ALIGN(size));
 }
 
 /* Map a set of buffers described by scatterlist in streaming
@@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
 
 	/* IIep is write-through, not flushing. */
 	for_each_sg(sgl, sg, nents, n) {
-		BUG_ON(page_address(sg_page(sg)) == NULL);
-		sg->dma_address = virt_to_phys(sg_virt(sg));
+		sg->dma_address = sg_phys(sg);
 		sg->dma_length = sg->length;
 	}
 	return nents;
@@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
 
 	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
-			BUG_ON(page_address(sg_page(sg)) == NULL);
-			mmu_inval_dma_area(page_address(sg_page(sg)),
-					   PAGE_ALIGN(sg->length));
+			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
 		}
 	}
 }
@@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
 				      size_t size, enum dma_data_direction dir)
 {
 	if (dir != PCI_DMA_TODEVICE) {
-		mmu_inval_dma_area(phys_to_virt(ba),
-				   PAGE_ALIGN(size));
+		dma_make_coherent(ba, PAGE_ALIGN(size));
 	}
 }
 
@@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
 					 size_t size, enum dma_data_direction dir)
 {
 	if (dir != PCI_DMA_TODEVICE) {
-		mmu_inval_dma_area(phys_to_virt(ba),
-				   PAGE_ALIGN(size));
+		dma_make_coherent(ba, PAGE_ALIGN(size));
 	}
 }
 
@@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 
 	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
-			BUG_ON(page_address(sg_page(sg)) == NULL);
-			mmu_inval_dma_area(page_address(sg_page(sg)),
-					   PAGE_ALIGN(sg->length));
+			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
 		}
 	}
 }
@@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
 
 	if (dir != PCI_DMA_TODEVICE) {
 		for_each_sg(sgl, sg, nents, n) {
-			BUG_ON(page_address(sg_page(sg)) == NULL);
-			mmu_inval_dma_area(page_address(sg_page(sg)),
-					   PAGE_ALIGN(sg->length));
+			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
 		}
 	}
 }
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
index 008453b798e..100b9c204e7 100644
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -2,6 +2,23 @@
 
 #include <asm/btfixup.h>
 
+struct irq_bucket {
+        struct irq_bucket *next;
+        unsigned int real_irq;
+        unsigned int irq;
+        unsigned int pil;
+};
+
+#define SUN4D_MAX_BOARD 10
+#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
+
+/* Map between the irq identifier used in hw to the
+ * irq_bucket. The map is sufficient large to hold
+ * the sun4d hw identifiers.
+ */
+extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+
+
 /* sun4m specific type definitions */
 
 /* This maps direct to CPU specific interrupt registers */
@@ -35,6 +52,10 @@ struct sparc_irq_config {
 };
 extern struct sparc_irq_config sparc_irq_config;
 
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
+void irq_link(unsigned int irq);
+void irq_unlink(unsigned int irq);
+void handler_irq(unsigned int pil, struct pt_regs *regs);
 
 /* Dave Redman (djhr@tadpole.co.uk)
  * changed these to function pointers.. it saves cycles and will allow
@@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config;
  * Changed these to btfixup entities... It saves cycles :)
  */
 
-BTFIXUPDEF_CALL(void, disable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int)
 BTFIXUPDEF_CALL(void, clear_clock_irq, void)
 BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int)
 
-static inline void __disable_irq(unsigned int irq)
-{
-	BTFIXUP_CALL(disable_irq)(irq);
-}
-
-static inline void __enable_irq(unsigned int irq)
-{
-	BTFIXUP_CALL(enable_irq)(irq);
-}
-
-static inline void disable_pil_irq(unsigned int irq)
-{
-	BTFIXUP_CALL(disable_pil_irq)(irq);
-}
-
-static inline void enable_pil_irq(unsigned int irq)
-{
-	BTFIXUP_CALL(enable_pil_irq)(irq);
-}
-
 static inline void clear_clock_irq(void)
 {
 	BTFIXUP_CALL(clear_clock_irq)();
@@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
 #define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level)
 #define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level)
 #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
+
+/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
+#define SUN4D_IPI_IRQ 14
+
+extern void sun4d_ipi_interrupt(void);
+
 #endif
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index 7c93df4099c..9b89d842913 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cpudata.h>
 #include <asm/pcic.h>
 #include <asm/leon.h>
 
@@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore);
  * directed CPU interrupts using the existing enable/disable irq code
  * with tweaks.
  *
+ * Sun4d complicates things even further.  IRQ numbers are arbitrary
+ * 32-bit values in that case.  Since this is similar to sparc64,
+ * we adopt a virtual IRQ numbering scheme as is done there.
+ * Virutal interrupt numbers are allocated by build_irq().  So NR_IRQS
+ * just becomes a limit of how many interrupt sources we can handle in
+ * a single system.  Even fully loaded SS2000 machines top off at
+ * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
+ * is more than enough.
+  *
+ * We keep a map of per-PIL enable interrupts.  These get wired
+ * up via the irq_chip->startup() method which gets invoked by
+ * the generic IRQ layer during request_irq().
  */
 
 
+/* Table of allocated irqs. Unused entries has irq == 0 */
+static struct irq_bucket irq_table[NR_IRQS];
+/* Protect access to irq_table */
+static DEFINE_SPINLOCK(irq_table_lock);
 
-/*
- * Dave Redman (djhr@tadpole.co.uk)
- *
- * There used to be extern calls and hard coded values here.. very sucky!
- * instead, because some of the devices attach very early, I do something
- * equally sucky but at least we'll never try to free statically allocated
- * space or call kmalloc before kmalloc_init :(.
- *
- * In fact it's the timer10 that attaches first.. then timer14
- * then kmalloc_init is called.. then the tty interrupts attach.
- * hmmm....
- *
- */
-#define MAX_STATIC_ALLOC	4
-struct irqaction static_irqaction[MAX_STATIC_ALLOC];
-int static_irq_count;
-
-static struct {
-	struct irqaction *action;
-	int flags;
-} sparc_irq[NR_IRQS];
-#define SPARC_IRQ_INPROGRESS 1
-
-/* Used to protect the IRQ action lists */
-DEFINE_SPINLOCK(irq_action_lock);
+/* Map between the irq identifier used in hw to the irq_bucket. */
+struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+/* Protect access to irq_map */
+static DEFINE_SPINLOCK(irq_map_lock);
 
-int show_interrupts(struct seq_file *p, void *v)
+/* Allocate a new irq from the irq_table */
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
 {
-	int i = *(loff_t *)v;
-	struct irqaction *action;
 	unsigned long flags;
-#ifdef CONFIG_SMP
-	int j;
-#endif
+	unsigned int i;
+
+	spin_lock_irqsave(&irq_table_lock, flags);
+	for (i = 1; i < NR_IRQS; i++) {
+		if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
+			goto found;
+	}
 
-	if (sparc_cpu_model == sun4d)
-		return show_sun4d_interrupts(p, v);
+	for (i = 1; i < NR_IRQS; i++) {
+		if (!irq_table[i].irq)
+			break;
+	}
 
-	spin_lock_irqsave(&irq_action_lock, flags);
 	if (i < NR_IRQS) {
-		action = sparc_irq[i].action;
-		if (!action)
-			goto out_unlock;
-		seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j) {
-			seq_printf(p, "%10u ",
-				    kstat_cpu(j).irqs[i]);
-		}
-#endif
-		seq_printf(p, " %c %s",
-			(action->flags & IRQF_DISABLED) ? '+' : ' ',
-			action->name);
-		for (action = action->next; action; action = action->next) {
-			seq_printf(p, ",%s %s",
-				(action->flags & IRQF_DISABLED) ? " +" : "",
-				action->name);
-		}
-		seq_putc(p, '\n');
+		irq_table[i].real_irq = real_irq;
+		irq_table[i].irq = i;
+		irq_table[i].pil = pil;
+	} else {
+		printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+		i = 0;
 	}
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-	return 0;
+found:
+	spin_unlock_irqrestore(&irq_table_lock, flags);
+
+	return i;
 }
 
-void free_irq(unsigned int irq, void *dev_id)
+/* Based on a single pil handler_irq may need to call several
+ * interrupt handlers. Use irq_map as entry to irq_table,
+ * and let each entry in irq_table point to the next entry.
+ */
+void irq_link(unsigned int irq)
 {
-	struct irqaction *action;
-	struct irqaction **actionp;
+	struct irq_bucket *p;
 	unsigned long flags;
-	unsigned int cpu_irq;
-
-	if (sparc_cpu_model == sun4d) {
-		sun4d_free_irq(irq, dev_id);
-		return;
-	}
-	cpu_irq = irq & (NR_IRQS - 1);
-	if (cpu_irq > 14) {  /* 14 irq levels on the sparc */
-		printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq);
-		return;
-	}
+	unsigned int pil;
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+	BUG_ON(irq >= NR_IRQS);
 
-	actionp = &sparc_irq[cpu_irq].action;
-	action = *actionp;
+	spin_lock_irqsave(&irq_map_lock, flags);
 
-	if (!action->handler) {
-		printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
-		goto out_unlock;
-	}
-	if (dev_id) {
-		for (; action; action = action->next) {
-			if (action->dev_id == dev_id)
-				break;
-			actionp = &action->next;
-		}
-		if (!action) {
-			printk(KERN_ERR "Trying to free free shared IRQ%d\n",
-			       irq);
-			goto out_unlock;
-		}
-	} else if (action->flags & IRQF_SHARED) {
-		printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
-		       irq);
-		goto out_unlock;
-	}
-	if (action->flags & SA_STATIC_ALLOC) {
-		/*
-		 * This interrupt is marked as specially allocated
-		 * so it is a bad idea to free it.
-		 */
-		printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
-		       irq, action->name);
-		goto out_unlock;
-	}
-
-	*actionp = action->next;
+	p = &irq_table[irq];
+	pil = p->pil;
+	BUG_ON(pil > SUN4D_MAX_IRQ);
+	p->next = irq_map[pil];
+	irq_map[pil] = p;
 
-	spin_unlock_irqrestore(&irq_action_lock, flags);
+	spin_unlock_irqrestore(&irq_map_lock, flags);
+}
 
-	synchronize_irq(irq);
+void irq_unlink(unsigned int irq)
+{
+	struct irq_bucket *p, **pnext;
+	unsigned long flags;
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+	BUG_ON(irq >= NR_IRQS);
 
-	kfree(action);
+	spin_lock_irqsave(&irq_map_lock, flags);
 
-	if (!sparc_irq[cpu_irq].action)
-		__disable_irq(irq);
+	p = &irq_table[irq];
+	BUG_ON(p->pil > SUN4D_MAX_IRQ);
+	pnext = &irq_map[p->pil];
+	while (*pnext != p)
+		pnext = &(*pnext)->next;
+	*pnext = p->next;
 
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
+	spin_unlock_irqrestore(&irq_map_lock, flags);
 }
-EXPORT_SYMBOL(free_irq);
-
-/*
- * This is called when we want to synchronize with
- * interrupts. We may for example tell a device to
- * stop sending interrupts: but to make sure there
- * are no interrupts that are executing on another
- * CPU we need to call this function.
- */
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	unsigned int cpu_irq;
 
-	cpu_irq = irq & (NR_IRQS - 1);
-	while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS)
-		cpu_relax();
-}
-EXPORT_SYMBOL(synchronize_irq);
-#endif /* SMP */
 
-void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs)
+/* /proc/interrupts printing */
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int i;
-	struct irqaction *action;
-	unsigned int cpu_irq;
+	int j;
 
-	cpu_irq = irq & (NR_IRQS - 1);
-	action = sparc_irq[cpu_irq].action;
-
-	printk(KERN_ERR "IO device interrupt, irq = %d\n", irq);
-	printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc,
-		    regs->npc, regs->u_regs[14]);
-	if (action) {
-		printk(KERN_ERR "Expecting: ");
-		for (i = 0; i < 16; i++)
-			if (action->handler)
-				printk(KERN_CONT "[%s:%d:0x%x] ", action->name,
-				       i, (unsigned int)action->handler);
-	}
-	printk(KERN_ERR "AIEEE\n");
-	panic("bogus interrupt received");
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
+	seq_printf(p, "     IPI rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
+	seq_printf(p, "     IPI function call interrupts\n");
+#endif
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).counter);
+	seq_printf(p, "     Non-maskable interrupts\n");
+	return 0;
 }
 
-void handler_irq(int pil, struct pt_regs *regs)
+void handler_irq(unsigned int pil, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
-	struct irqaction *action;
-	int cpu = smp_processor_id();
+	struct irq_bucket *p;
 
+	BUG_ON(pil > 15);
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	disable_pil_irq(pil);
-#ifdef CONFIG_SMP
-	/* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */
-	if ((sparc_cpu_model==sun4m) && (pil < 10))
-		smp4m_irq_rotate(cpu);
-#endif
-	action = sparc_irq[pil].action;
-	sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS;
-	kstat_cpu(cpu).irqs[pil]++;
-	do {
-		if (!action || !action->handler)
-			unexpected_irq(pil, NULL, regs);
-		action->handler(pil, action->dev_id);
-		action = action->next;
-	} while (action);
-	sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS;
-	enable_pil_irq(pil);
+
+	p = irq_map[pil];
+	while (p) {
+		struct irq_bucket *next = p->next;
+
+		generic_handle_irq(p->irq);
+		p = next;
+	}
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
 #if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+static unsigned int floppy_irq;
 
-/*
- * Fast IRQs on the Sparc can only have one routine attached to them,
- * thus no sharing possible.
- */
-static int request_fast_irq(unsigned int irq,
-			    void (*handler)(void),
-			    unsigned long irqflags, const char *devname)
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
 {
-	struct irqaction *action;
-	unsigned long flags;
 	unsigned int cpu_irq;
-	int ret;
+	int err;
+
 #if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON
 	struct tt_entry *trap_table;
 #endif
-	cpu_irq = irq & (NR_IRQS - 1);
-	if (cpu_irq > 14) {
-		ret = -EINVAL;
-		goto out;
-	}
-	if (!handler) {
-		ret = -EINVAL;
-		goto out;
-	}
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+	err = request_irq(irq, irq_handler, 0, "floppy", NULL);
+	if (err)
+		return -1;
 
-	action = sparc_irq[cpu_irq].action;
-	if (action) {
-		if (action->flags & IRQF_SHARED)
-			panic("Trying to register fast irq when already shared.\n");
-		if (irqflags & IRQF_SHARED)
-			panic("Trying to register fast irq as shared.\n");
+	/* Save for later use in floppy interrupt handler */
+	floppy_irq = irq;
 
-		/* Anyway, someone already owns it so cannot be made fast. */
-		printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n");
-		ret = -EBUSY;
-		goto out_unlock;
-	}
-
-	/*
-	 * If this is flagged as statically allocated then we use our
-	 * private struct which is never freed.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-			       irq, devname);
-	}
-
-	if (action == NULL)
-		action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-	if (!action) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
+	cpu_irq = (irq & (NR_IRQS - 1));
 
 	/* Dork with trap table if we get this far. */
 #define INSTANTIATE(table) \
 	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
 	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
-		SPARC_BRANCH((unsigned long) handler, \
+		SPARC_BRANCH((unsigned long) floppy_hardint, \
 			     (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
 	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
 	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
@@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq,
 	 * writing we have no CPU-neutral interface to fine-grained flushes.
 	 */
 	flush_cache_all();
-
-	action->flags = irqflags;
-	action->name = devname;
-	action->dev_id = NULL;
-	action->next = NULL;
-
-	sparc_irq[cpu_irq].action = action;
-
-	__enable_irq(irq);
-
-	ret = 0;
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-	return ret;
+	return 0;
 }
+EXPORT_SYMBOL(sparc_floppy_request_irq);
 
 /*
  * These variables are used to access state from the assembler
@@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base);
 unsigned long pdma_areasize;
 EXPORT_SYMBOL(pdma_areasize);
 
-static irq_handler_t floppy_irq_handler;
-
+/* Use the generic irq support to call floppy_interrupt
+ * which was setup using request_irq() in sparc_floppy_request_irq().
+ * We only have one floppy interrupt so we do not need to check
+ * for additional handlers being wired up by irq_link()
+ */
 void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
-	int cpu = smp_processor_id();
 
 	old_regs = set_irq_regs(regs);
-	disable_pil_irq(irq);
 	irq_enter();
-	kstat_cpu(cpu).irqs[irq]++;
-	floppy_irq_handler(irq, dev_id);
+	generic_handle_irq(floppy_irq);
 	irq_exit();
-	enable_pil_irq(irq);
 	set_irq_regs(old_regs);
-	/*
-	 * XXX Eek, it's totally changed with preempt_count() and such
-	 * if (softirq_pending(cpu))
-	 *	do_softirq();
-	 */
-}
-
-int sparc_floppy_request_irq(int irq, unsigned long flags,
-			     irq_handler_t irq_handler)
-{
-	floppy_irq_handler = irq_handler;
-	return request_fast_irq(irq, floppy_hardint, flags, "floppy");
 }
-EXPORT_SYMBOL(sparc_floppy_request_irq);
-
 #endif
 
-int request_irq(unsigned int irq,
-		irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
-{
-	struct irqaction *action, **actionp;
-	unsigned long flags;
-	unsigned int cpu_irq;
-	int ret;
-
-	if (sparc_cpu_model == sun4d)
-		return sun4d_request_irq(irq, handler, irqflags, devname, dev_id);
-
-	cpu_irq = irq & (NR_IRQS - 1);
-	if (cpu_irq > 14) {
-		ret = -EINVAL;
-		goto out;
-	}
-	if (!handler) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	spin_lock_irqsave(&irq_action_lock, flags);
-
-	actionp = &sparc_irq[cpu_irq].action;
-	action = *actionp;
-	if (action) {
-		if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-		if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) {
-			printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
-			       irq);
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-		for ( ; action; action = *actionp)
-			actionp = &action->next;
-	}
-
-	/* If this is flagged as statically allocated then we use our
-	 * private struct which is never freed.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-			       irq, devname);
-	}
-	if (action == NULL)
-		action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-	if (!action) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	*actionp = action;
-
-	__enable_irq(irq);
-
-	ret = 0;
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-	return ret;
-}
-EXPORT_SYMBOL(request_irq);
-
-void disable_irq_nosync(unsigned int irq)
-{
-	__disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-void disable_irq(unsigned int irq)
-{
-	__disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-void enable_irq(unsigned int irq)
-{
-	__enable_irq(irq);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * We really don't need these at all on the Sparc.  We only have
- * stubs here because they are exported to modules.
- */
-unsigned long probe_irq_on(void)
-{
-	return 0;
-}
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off(unsigned long mask)
-{
-	return 0;
-}
-EXPORT_SYMBOL(probe_irq_off);
-
-static unsigned int build_device_irq(struct platform_device *op,
-                                     unsigned int real_irq)
-{
-	return real_irq;
-}
-
 /* djhr
  * This could probably be made indirect too and assigned in the CPU
  * bits of the code. That would be much nicer I think and would also
@@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op,
 
 void __init init_IRQ(void)
 {
-	sparc_irq_config.build_device_irq = build_device_irq;
-
 	switch (sparc_cpu_model) {
 	case sun4c:
 	case sun4:
@@ -607,14 +351,11 @@ void __init init_IRQ(void)
 		break;
 
 	case sun4m:
-#ifdef CONFIG_PCI
 		pcic_probe();
-		if (pcic_present()) {
+		if (pcic_present())
 			sun4m_pci_init_IRQ();
-			break;
-		}
-#endif
-		sun4m_init_IRQ();
+		else
+			sun4m_init_IRQ();
 		break;
 
 	case sun4d:
@@ -632,9 +373,3 @@ void __init init_IRQ(void)
 	btfixup();
 }
 
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
-	/* For now, nothing... */
-}
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index b1d275ce343..4e78862d12f 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
 	int cpuid;
 
 	cpumask_copy(&mask, affinity);
-	if (cpus_equal(mask, cpu_online_map)) {
+	if (cpumask_equal(&mask, cpu_online_mask)) {
 		cpuid = map_to_cpu(irq);
 	} else {
 		cpumask_t tmp;
 
-		cpus_and(tmp, cpu_online_map, mask);
-		cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp);
+		cpumask_and(&tmp, cpu_online_mask, &mask);
+		cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
 	}
 
 	return cpuid;
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 24ad449886b..6f6544cfa0e 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -6,11 +6,9 @@
 #include <asm/traps.h>
 
 /* cpu.c */
-extern const char *sparc_cpu_type;
 extern const char *sparc_pmu_type;
-extern const char *sparc_fpu_type;
-
 extern unsigned int fsr_storage;
+extern int ncpus_probed;
 
 #ifdef CONFIG_SPARC32
 /* cpu.c */
@@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void);
 extern unsigned int lvl14_resolution;
 
 extern void sun4m_init_IRQ(void);
+extern void sun4m_unmask_profile_irq(void);
 extern void sun4m_clear_profile_irq(int cpu);
 
 /* sun4d_irq.c */
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 2969f777fa1..2f538ac2e13 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -19,53 +19,70 @@
 #include <asm/leon_amba.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
 
 #include "prom.h"
 #include "irq.h"
 
 struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
 struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
-struct amba_apb_device leon_percpu_timer_dev[16];
 
 int leondebug_irq_disable;
 int leon_debug_irqout;
 static int dummy_master_l10_counter;
 unsigned long amba_system_id;
+static DEFINE_SPINLOCK(leon_irq_lock);
 
 unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
 unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+int leon3_ticker_irq; /* Timer ticker IRQ */
 unsigned int sparc_leon_eirq;
-#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0]))
+#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
+#define LEON_IACK (&leon3_irqctrl_regs->iclear)
+#define LEON_DO_ACK_HW 1
 
-/* Return the IRQ of the pending IRQ on the extended IRQ controller */
-int sparc_leon_eirq_get(int eirq, int cpu)
+/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
+ * been (automatically) ACKed when the CPU takes the trap.
+ */
+static inline unsigned int leon_eirq_get(int cpu)
 {
 	return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
 }
 
-irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id)
+/* Handle one or multiple IRQs from the extended interrupt controller */
+static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
 {
-	printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n");
-	return IRQ_HANDLED;
+	unsigned int eirq;
+	int cpu = sparc_leon3_cpuid();
+
+	eirq = leon_eirq_get(cpu);
+	if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
+		generic_handle_irq(irq_map[eirq]->irq);
 }
 
 /* The extended IRQ controller has been found, this function registers it */
-void sparc_leon_eirq_register(int eirq)
+void leon_eirq_setup(unsigned int eirq)
 {
-	int irq;
+	unsigned long mask, oldmask;
+	unsigned int veirq;
 
-	/* Register a "BAD" handler for this interrupt, it should never happen */
-	irq = request_irq(eirq, sparc_leon_eirq_isr,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL);
-
-	if (irq) {
-		printk(KERN_ERR
-		       "sparc_leon_eirq_register: unable to attach IRQ%d\n",
-		       eirq);
-	} else {
-		sparc_leon_eirq = eirq;
+	if (eirq < 1 || eirq > 0xf) {
+		printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
+		return;
 	}
 
+	veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
+
+	/*
+	 * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
+	 * controller have a mask-bit of their own, so this is safe.
+	 */
+	irq_link(veirq);
+	mask = 1 << eirq;
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
+	sparc_leon_eirq = eirq;
 }
 
 static inline unsigned long get_irqmask(unsigned int irq)
@@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq)
 	return mask;
 }
 
-static void leon_enable_irq(unsigned int irq_nr)
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(const struct cpumask *affinity)
 {
-	unsigned long mask, flags;
-	mask = get_irqmask(irq_nr);
-	local_irq_save(flags);
-	LEON3_BYPASS_STORE_PA(LEON_IMASK,
-			      (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask)));
-	local_irq_restore(flags);
+	cpumask_t mask;
+
+	cpus_and(mask, cpu_online_map, *affinity);
+	if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask))
+		return boot_cpu_id;
+	else
+		return first_cpu(mask);
 }
+#else
+#define irq_choose_cpu(affinity) boot_cpu_id
+#endif
 
-static void leon_disable_irq(unsigned int irq_nr)
+static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
+			     bool force)
 {
-	unsigned long mask, flags;
-	mask = get_irqmask(irq_nr);
-	local_irq_save(flags);
-	LEON3_BYPASS_STORE_PA(LEON_IMASK,
-			      (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask)));
-	local_irq_restore(flags);
+	unsigned long mask, oldmask, flags;
+	int oldcpu, newcpu;
+
+	mask = (unsigned long)data->chip_data;
+	oldcpu = irq_choose_cpu(data->affinity);
+	newcpu = irq_choose_cpu(dest);
+
+	if (oldcpu == newcpu)
+		goto out;
+
+	/* unmask on old CPU first before enabling on the selected CPU */
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+out:
+	return IRQ_SET_MASK_OK;
+}
+
+static void leon_unmask_irq(struct irq_data *data)
+{
+	unsigned long mask, oldmask, flags;
+	int cpu;
+
+	mask = (unsigned long)data->chip_data;
+	cpu = irq_choose_cpu(data->affinity);
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static void leon_mask_irq(struct irq_data *data)
+{
+	unsigned long mask, oldmask, flags;
+	int cpu;
+
+	mask = (unsigned long)data->chip_data;
+	cpu = irq_choose_cpu(data->affinity);
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static unsigned int leon_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	leon_unmask_irq(data);
+	return 0;
+}
 
+static void leon_shutdown_irq(struct irq_data *data)
+{
+	leon_mask_irq(data);
+	irq_unlink(data->irq);
+}
+
+/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
+static void leon_eoi_irq(struct irq_data *data)
+{
+	unsigned long mask = (unsigned long)data->chip_data;
+
+	if (mask & LEON_DO_ACK_HW)
+		LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
+}
+
+static struct irq_chip leon_irq = {
+	.name			= "leon",
+	.irq_startup		= leon_startup_irq,
+	.irq_shutdown		= leon_shutdown_irq,
+	.irq_mask		= leon_mask_irq,
+	.irq_unmask		= leon_unmask_irq,
+	.irq_eoi		= leon_eoi_irq,
+	.irq_set_affinity	= leon_set_affinity,
+};
+
+/*
+ * Build a LEON IRQ for the edge triggered LEON IRQ controller:
+ *  Edge (normal) IRQ           - handle_simple_irq, ack=DONT-CARE, never ack
+ *  Level IRQ (PCI|Level-GPIO)  - handle_fasteoi_irq, ack=1, ack after ISR
+ *  Per-CPU Edge                - handle_percpu_irq, ack=0
+ */
+unsigned int leon_build_device_irq(unsigned int real_irq,
+				    irq_flow_handler_t flow_handler,
+				    const char *name, int do_ack)
+{
+	unsigned int irq;
+	unsigned long mask;
+
+	irq = 0;
+	mask = get_irqmask(real_irq);
+	if (mask == 0)
+		goto out;
+
+	irq = irq_alloc(real_irq, real_irq);
+	if (irq == 0)
+		goto out;
+
+	if (do_ack)
+		mask |= LEON_DO_ACK_HW;
+
+	irq_set_chip_and_handler_name(irq, &leon_irq,
+				      flow_handler, name);
+	irq_set_chip_data(irq, (void *)mask);
+
+out:
+	return irq;
+}
+
+static unsigned int _leon_build_device_irq(struct platform_device *op,
+					   unsigned int real_irq)
+{
+	return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
 }
 
 void __init leon_init_timers(irq_handler_t counter_fn)
 {
-	int irq;
+	int irq, eirq;
 	struct device_node *rootnp, *np, *nnp;
 	struct property *pp;
 	int len;
-	int cpu, icsel;
+	int icsel;
 	int ampopts;
+	int err;
 
 	leondebug_irq_disable = 0;
 	leon_debug_irqout = 0;
@@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn)
 			leon3_gptimer_irq = *(unsigned int *)pp->value;
 	} while (0);
 
-	if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) {
-		LEON3_BYPASS_STORE_PA(
-			&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
-		LEON3_BYPASS_STORE_PA(
-			&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
-			(((1000000 / HZ) - 1)));
-		LEON3_BYPASS_STORE_PA(
+	if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
+		goto bad;
+
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
+				(((1000000 / HZ) - 1)));
+	LEON3_BYPASS_STORE_PA(
 			&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
 
 #ifdef CONFIG_SMP
-		leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs;
-		leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 +
-					       leon3_gptimer_idx;
-
-		if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
-		      (1<<LEON3_GPTIMER_SEPIRQ))) {
-			prom_printf("irq timer not configured with separate irqs\n");
-			BUG();
-		}
+	leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx;
 
-		LEON3_BYPASS_STORE_PA(
-			&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0);
-		LEON3_BYPASS_STORE_PA(
-			&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
-			(((1000000/HZ) - 1)));
-		LEON3_BYPASS_STORE_PA(
-			&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0);
-# endif
-
-		/*
-		 * The IRQ controller may (if implemented) consist of multiple
-		 * IRQ controllers, each mapped on a 4Kb boundary.
-		 * Each CPU may be routed to different IRQCTRLs, however
-		 * we assume that all CPUs (in SMP system) is routed to the
-		 * same IRQ Controller, and for non-SMP only one IRQCTRL is
-		 * accessed anyway.
-		 * In AMP systems, Linux must run on CPU0 for the time being.
-		 */
-		cpu = sparc_leon3_cpuid();
-		icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]);
-		icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf;
-		leon3_irqctrl_regs += icsel;
-	} else {
-		goto bad;
+	if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
+	      (1<<LEON3_GPTIMER_SEPIRQ))) {
+		printk(KERN_ERR "timer not configured with separate irqs\n");
+		BUG();
 	}
 
-	irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx,
-			  counter_fn,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val,
+				0);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
+				(((1000000/HZ) - 1)));
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+				0);
+#endif
 
-	if (irq) {
-		printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n",
-		       LEON_INTERRUPT_TIMER1);
+	/*
+	 * The IRQ controller may (if implemented) consist of multiple
+	 * IRQ controllers, each mapped on a 4Kb boundary.
+	 * Each CPU may be routed to different IRQCTRLs, however
+	 * we assume that all CPUs (in SMP system) is routed to the
+	 * same IRQ Controller, and for non-SMP only one IRQCTRL is
+	 * accessed anyway.
+	 * In AMP systems, Linux must run on CPU0 for the time being.
+	 */
+	icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+	icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+	leon3_irqctrl_regs += icsel;
+
+	/* Mask all IRQs on boot-cpu IRQ controller */
+	LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+
+	/* Probe extended IRQ controller */
+	eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
+		>> 16) & 0xf;
+	if (eirq != 0)
+		leon_eirq_setup(eirq);
+
+	irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx);
+	err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
+	if (err) {
+		printk(KERN_ERR "unable to attach timer IRQ%d\n", irq);
 		prom_halt();
 	}
 
-# ifdef CONFIG_SMP
-	{
-		unsigned long flags;
-		struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)];
-
-		/* For SMP we use the level 14 ticker, however the bootup code
-		 * has copied the firmwares level 14 vector into boot cpu's
-		 * trap table, we must fix this now or we get squashed.
-		 */
-		local_irq_save(flags);
-
-		patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
-
-		/* Adjust so that we jump directly to smpleon_ticker */
-		trap_table->inst_three += smpleon_ticker - real_irq_entry;
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+			      LEON3_GPTIMER_EN |
+			      LEON3_GPTIMER_RL |
+			      LEON3_GPTIMER_LD |
+			      LEON3_GPTIMER_IRQEN);
 
-		local_flush_cache_all();
-		local_irq_restore(flags);
+#ifdef CONFIG_SMP
+	/* Install per-cpu IRQ handler for broadcasted ticker */
+	irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq,
+				    "per-cpu", 0);
+	err = request_irq(irq, leon_percpu_timer_interrupt,
+			  IRQF_PERCPU | IRQF_TIMER, "ticker",
+			  NULL);
+	if (err) {
+		printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq);
+		prom_halt();
 	}
-# endif
-
-	if (leon3_gptimer_regs) {
-		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
-				      LEON3_GPTIMER_EN |
-				      LEON3_GPTIMER_RL |
-				      LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
 
-#ifdef CONFIG_SMP
-		LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
-				      LEON3_GPTIMER_EN |
-				      LEON3_GPTIMER_RL |
-				      LEON3_GPTIMER_LD |
-				      LEON3_GPTIMER_IRQEN);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+			      LEON3_GPTIMER_EN |
+			      LEON3_GPTIMER_RL |
+			      LEON3_GPTIMER_LD |
+			      LEON3_GPTIMER_IRQEN);
 #endif
-
-	}
 	return;
 bad:
 	printk(KERN_ERR "No Timer/irqctrl found\n");
@@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit)
 	BUG();
 }
 
-
-
-
 void __init leon_trans_init(struct device_node *dp)
 {
 	if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
@@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
 {
 	unsigned long mask, flags, *addr;
 	mask = get_irqmask(irq_nr);
-	local_irq_save(flags);
-	addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]);
-	LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask)));
-	local_irq_restore(flags);
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	addr = (unsigned long *)LEON_IMASK(cpu);
+	LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
 }
 
 #endif
 
 void __init leon_init_IRQ(void)
 {
-	sparc_irq_config.init_timers = leon_init_timers;
-
-	BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+	sparc_irq_config.init_timers      = leon_init_timers;
+	sparc_irq_config.build_device_irq = _leon_build_device_irq;
 
 	BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
 			BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 8f5de4aa3c0..fe8fb44c609 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -14,6 +14,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
@@ -29,6 +30,7 @@
 #include <asm/ptrace.h>
 #include <asm/atomic.h>
 #include <asm/irq_regs.h>
+#include <asm/traps.h>
 
 #include <asm/delay.h>
 #include <asm/irq.h>
@@ -50,9 +52,12 @@
 extern ctxd_t *srmmu_ctx_table_phys;
 static int smp_processors_ready;
 extern volatile unsigned long cpu_callin_map[NR_CPUS];
-extern unsigned char boot_cpu_id;
 extern cpumask_t smp_commenced_mask;
 void __init leon_configure_cache_smp(void);
+static void leon_ipi_init(void);
+
+/* IRQ number of LEON IPIs */
+int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
 
 static inline unsigned long do_swap(volatile unsigned long *ptr,
 				    unsigned long val)
@@ -94,8 +99,6 @@ void __cpuinit leon_callin(void)
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	cpu_probe();
-
 	/* Fix idle thread fields. */
 	__asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
 			     : "memory" /* paranoid */);
@@ -104,11 +107,11 @@ void __cpuinit leon_callin(void)
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
-	while (!cpu_isset(cpuid, smp_commenced_mask))
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		mb();
 
 	local_irq_enable();
-	cpu_set(cpuid, cpu_online_map);
+	set_cpu_online(cpuid, true);
 }
 
 /*
@@ -179,13 +182,16 @@ void __init leon_boot_cpus(void)
 	int nrcpu = leon_smp_nrcpus();
 	int me = smp_processor_id();
 
+	/* Setup IPI */
+	leon_ipi_init();
+
 	printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
 	       (unsigned int)nrcpu, (unsigned int)NR_CPUS,
 	       (unsigned int)&(leon3_irqctrl_regs->mpstatus));
 
 	leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
 	leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
-	leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me);
+	leon_enable_irq_cpu(leon_ipi_irq, me);
 
 	leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
 
@@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i)
 	       (unsigned int)&leon3_irqctrl_regs->mpstatus);
 	local_flush_cache_all();
 
+	/* Make sure all IRQs are of from the start for this new CPU */
+	LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
+
+	/* Wake one CPU */
 	LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
 
 	/* wheee... it's going... */
@@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i)
 	} else {
 		leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
 		leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
-		leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i);
+		leon_enable_irq_cpu(leon_ipi_irq, i);
 	}
 
 	local_flush_cache_all();
@@ -262,21 +272,21 @@ void __init leon_smp_done(void)
 	local_flush_cache_all();
 
 	/* Free unneeded trap tables */
-	if (!cpu_isset(1, cpu_present_map)) {
+	if (!cpu_present(1)) {
 		ClearPageReserved(virt_to_page(&trapbase_cpu1));
 		init_page_count(virt_to_page(&trapbase_cpu1));
 		free_page((unsigned long)&trapbase_cpu1);
 		totalram_pages++;
 		num_physpages++;
 	}
-	if (!cpu_isset(2, cpu_present_map)) {
+	if (!cpu_present(2)) {
 		ClearPageReserved(virt_to_page(&trapbase_cpu2));
 		init_page_count(virt_to_page(&trapbase_cpu2));
 		free_page((unsigned long)&trapbase_cpu2);
 		totalram_pages++;
 		num_physpages++;
 	}
-	if (!cpu_isset(3, cpu_present_map)) {
+	if (!cpu_present(3)) {
 		ClearPageReserved(virt_to_page(&trapbase_cpu3));
 		init_page_count(virt_to_page(&trapbase_cpu3));
 		free_page((unsigned long)&trapbase_cpu3);
@@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu)
 {
 }
 
+struct leon_ipi_work {
+	int single;
+	int msk;
+	int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
+
+/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
+ * is used for all three types of IPIs.
+ */
+static void __init leon_ipi_init(void)
+{
+	int cpu, len;
+	struct leon_ipi_work *work;
+	struct property *pp;
+	struct device_node *rootnp;
+	struct tt_entry *trap_table;
+	unsigned long flags;
+
+	/* Find IPI IRQ or stick with default value */
+	rootnp = of_find_node_by_path("/ambapp0");
+	if (rootnp) {
+		pp = of_find_property(rootnp, "ipi_num", &len);
+		if (pp && (*(int *)pp->value))
+			leon_ipi_irq = *(int *)pp->value;
+	}
+	printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
+
+	/* Adjust so that we jump directly to smpleon_ipi */
+	local_irq_save(flags);
+	trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
+	trap_table->inst_three += smpleon_ipi - real_irq_entry;
+	local_flush_cache_all();
+	local_irq_restore(flags);
+
+	for_each_possible_cpu(cpu) {
+		work = &per_cpu(leon_ipi_work, cpu);
+		work->single = work->msk = work->resched = 0;
+	}
+}
+
+static void leon_ipi_single(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->single = 1;
+
+	/* Generate IRQ on the CPU */
+	set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_mask_one(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->msk = 1;
+
+	/* Generate IRQ on the CPU */
+	set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_resched(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->resched = 1;
+
+	/* Generate IRQ on the CPU (any IRQ will cause resched) */
+	set_cpu_int(cpu, leon_ipi_irq);
+}
+
+void leonsmp_ipi_interrupt(void)
+{
+	struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work);
+
+	if (work->single) {
+		work->single = 0;
+		smp_call_function_single_interrupt();
+	}
+	if (work->msk) {
+		work->msk = 0;
+		smp_call_function_interrupt();
+	}
+	if (work->resched) {
+		work->resched = 0;
+		smp_resched_interrupt();
+	}
+}
+
 static struct smp_funcall {
 	smpfunc_t func;
 	unsigned long arg1;
@@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 		{
 			register int i;
 
-			cpu_clear(smp_processor_id(), mask);
-			cpus_and(mask, cpu_online_map, mask);
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
 			for (i = 0; i <= high; i++) {
-				if (cpu_isset(i, mask)) {
+				if (cpumask_test_cpu(i, &mask)) {
 					ccall_info.processors_in[i] = 0;
 					ccall_info.processors_out[i] = 0;
 					set_cpu_int(i, LEON3_IRQ_CROSS_CALL);
@@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 
 				while (!ccall_info.processors_in[i])
@@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 
 				while (!ccall_info.processors_out[i])
@@ -386,27 +489,23 @@ void leon_cross_call_irq(void)
 	ccall_info.processors_out[i] = 1;
 }
 
-void leon_percpu_timer_interrupt(struct pt_regs *regs)
+irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused)
 {
-	struct pt_regs *old_regs;
 	int cpu = smp_processor_id();
 
-	old_regs = set_irq_regs(regs);
-
 	leon_clear_profile_irq(cpu);
 
 	profile_tick(CPU_PROFILING);
 
 	if (!--prof_counter(cpu)) {
-		int user = user_mode(regs);
+		int user = user_mode(get_irq_regs());
 
-		irq_enter();
 		update_process_times(user);
-		irq_exit();
 
 		prof_counter(cpu) = prof_multiplier(cpu);
 	}
-	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
 }
 
 static void __init smp_setup_percpu_timer(void)
@@ -449,6 +548,9 @@ void __init leon_init_smp(void)
 	BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id,
 			BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM);
 }
 
 #endif /* CONFIG_SPARC_LEON */
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 56db06432ce..42f28c7420e 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl
 			       cpuid, NR_CPUS);
 			continue;
 		}
-		if (!cpu_isset(cpuid, *mask))
+		if (!cpumask_test_cpu(cpuid, mask))
 			continue;
 #endif
 
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 5c149689bb2..3bb2eace58c 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
 out:
 	nid = of_node_to_nid(dp);
 	if (nid != -1) {
-		cpumask_t numa_mask = *cpumask_of_node(nid);
+		cpumask_t numa_mask;
 
+		cpumask_copy(&numa_mask, cpumask_of_node(nid));
 		irq_set_affinity(irq, &numa_mask);
 	}
 
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 30982e9ab62..580651af73f 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
 	nid = pbm->numa_node;
 	if (nid != -1) {
-		cpumask_t numa_mask = *cpumask_of_node(nid);
+		cpumask_t numa_mask;
 
+		cpumask_copy(&numa_mask, cpumask_of_node(nid));
 		irq_set_affinity(irq, &numa_mask);
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083f..d1840dbdaa2 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
 	sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c..283fbc329a4 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
 	return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-	if (!op->dev.of_match)
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+	return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 2cdc131b50a..948601a066f 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -164,6 +164,9 @@ void __iomem *pcic_regs;
 volatile int pcic_speculative;
 volatile int pcic_trapped;
 
+/* forward */
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq);
 
 #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
 
@@ -523,6 +526,7 @@ static void
 pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
 {
 	struct pcic_ca2irq *p;
+	unsigned int real_irq;
 	int i, ivec;
 	char namebuf[64];
 
@@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
 	i = p->pin;
 	if (i >= 0 && i < 4) {
 		ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
-		dev->irq = ivec >> (i << 2) & 0xF;
+		real_irq = ivec >> (i << 2) & 0xF;
 	} else if (i >= 4 && i < 8) {
 		ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
-		dev->irq = ivec >> ((i-4) << 2) & 0xF;
+		real_irq = ivec >> ((i-4) << 2) & 0xF;
 	} else {					/* Corrupted map */
 		printk("PCIC: BAD PIN %d\n", i); for (;;) {}
 	}
 /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
 
-	/*
-	 * dev->irq=0 means PROM did not bother to program the upper
+	/* real_irq means PROM did not bother to program the upper
 	 * half of PCIC. This happens on JS-E with PROM 3.11, for instance.
 	 */
-	if (dev->irq == 0 || p->force) {
+	if (real_irq == 0 || p->force) {
 		if (p->irq == 0 || p->irq >= 15) {	/* Corrupted map */
 			printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
 		}
 		printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n",
 		    p->irq, p->pin, dev->bus->number, dev->devfn);
-		dev->irq = p->irq;
+		real_irq = p->irq;
 
 		i = p->pin;
 		if (i >= 4) {
@@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
 			ivec |= p->irq << (i << 2);
 			writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
 		}
- 	}
+	}
+	dev->irq = pcic_build_device_irq(NULL, real_irq);
 }
 
 /*
@@ -729,6 +733,7 @@ void __init pci_time_init(void)
 	struct linux_pcic *pcic = &pcic0;
 	unsigned long v;
 	int timer_irq, irq;
+	int err;
 
 	do_arch_gettimeoffset = pci_gettimeoffset;
 
@@ -740,9 +745,10 @@ void __init pci_time_init(void)
 	timer_irq = PCI_COUNTER_IRQ_SYS(v);
 	writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
 		pcic->pcic_regs+PCI_COUNTER_IRQ);
-	irq = request_irq(timer_irq, pcic_timer_handler,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
-	if (irq) {
+	irq = pcic_build_device_irq(NULL, timer_irq);
+	err = request_irq(irq, pcic_timer_handler,
+			  IRQF_TIMER, "timer", NULL);
+	if (err) {
 		prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
 		prom_halt();
 	}
@@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr)
 	return 1 << irq_nr;
 }
 
-static void pcic_disable_irq(unsigned int irq_nr)
+static void pcic_mask_irq(struct irq_data *data)
 {
 	unsigned long mask, flags;
 
-	mask = get_irqmask(irq_nr);
+	mask = (unsigned long)data->chip_data;
 	local_irq_save(flags);
 	writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
 	local_irq_restore(flags);
 }
 
-static void pcic_enable_irq(unsigned int irq_nr)
+static void pcic_unmask_irq(struct irq_data *data)
 {
 	unsigned long mask, flags;
 
-	mask = get_irqmask(irq_nr);
+	mask = (unsigned long)data->chip_data;
 	local_irq_save(flags);
 	writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
 	local_irq_restore(flags);
 }
 
-static void pcic_load_profile_irq(int cpu, unsigned int limit)
+static unsigned int pcic_startup_irq(struct irq_data *data)
 {
-	printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
+	irq_link(data->irq);
+	pcic_unmask_irq(data);
+	return 0;
 }
 
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void pcic_disable_pil_irq(unsigned int pil)
+static struct irq_chip pcic_irq = {
+	.name		= "pcic",
+	.irq_startup	= pcic_startup_irq,
+	.irq_mask	= pcic_mask_irq,
+	.irq_unmask	= pcic_unmask_irq,
+};
+
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq)
 {
-	writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
+	unsigned int irq;
+	unsigned long mask;
+
+	irq = 0;
+	mask = get_irqmask(real_irq);
+	if (mask == 0)
+		goto out;
+
+	irq = irq_alloc(real_irq, real_irq);
+	if (irq == 0)
+		goto out;
+
+	irq_set_chip_and_handler_name(irq, &pcic_irq,
+	                              handle_level_irq, "PCIC");
+	irq_set_chip_data(irq, (void *)mask);
+
+out:
+	return irq;
 }
 
-static void pcic_enable_pil_irq(unsigned int pil)
+
+static void pcic_load_profile_irq(int cpu, unsigned int limit)
 {
-	writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
+	printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
 }
 
 void __init sun4m_pci_init_IRQ(void)
 {
-	BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM);
+	sparc_irq_config.build_device_irq = pcic_build_device_irq;
+
 	BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM);
 }
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index ee8426ede7c..2cb0e1c001e 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -26,6 +26,7 @@
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 
+#include "kernel.h"
 #include "kstack.h"
 
 /* Sparc64 chips have two performance counters, 32-bits each, with
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 93d7b4465f8..6a585d39358 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata pmc_match[] = {
+static struct of_device_id pmc_match[] = {
 	{
 		.name = PMC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 17529298c50..c8cc461ff75 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -128,8 +128,16 @@ void cpu_idle(void)
         set_thread_flag(TIF_POLLING_NRFLAG);
 	/* endless idle loop with no priority at all */
 	while(1) {
-		while (!need_resched())
-			cpu_relax();
+#ifdef CONFIG_SPARC_LEON
+		if (pm_idle) {
+			while (!need_resched())
+				(*pm_idle)();
+		} else
+#endif
+		{
+			while (!need_resched())
+				cpu_relax();
+		}
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 05fb2533058..5ce3d15a99b 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -326,7 +326,6 @@ void __init of_console_init(void)
 			of_console_options = NULL;
 	}
 
-	prom_printf(msg, of_console_path);
 	printk(msg, of_console_path);
 }
 
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 7b8b76c9557..3609bdee9ed 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0;
 /* Exported for mm/init.c:paging_init. */
 unsigned long cmdline_memory_size __initdata = 0;
 
+/* which CPU booted us (0xff = not set) */
+unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
+unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */
+
 static void
 prom_console_write(struct console *con, const char *s, unsigned n)
 {
 	prom_write(s, n);
 }
 
-static struct console prom_debug_console = {
-	.name =		"debug",
+static struct console prom_early_console = {
+	.name =		"earlyprom",
 	.write =	prom_console_write,
-	.flags =	CON_PRINTBUFFER,
+	.flags =	CON_PRINTBUFFER | CON_BOOT,
 	.index =	-1,
 };
 
@@ -133,8 +137,7 @@ static void __init process_switch(char c)
 		prom_halt();
 		break;
 	case 'p':
-		/* Use PROM debug console. */
-		register_console(&prom_debug_console);
+		/* Just ignore, this behavior is now the default.  */
 		break;
 	default:
 		printk("Unknown boot switch (-%c)\n", c);
@@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p)
 	strcpy(boot_command_line, *cmdline_p);
 	parse_early_param();
 
+	boot_flags_init(*cmdline_p);
+
+	register_console(&prom_early_console);
+
 	/* Set sparc_cpu_model */
 	sparc_cpu_model = sun_unknown;
 	if (!strcmp(&cputypval[0], "sun4 "))
@@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
 #endif
-	boot_flags_init(*cmdline_p);
 
 	idprom_init();
 	if (ARCH_SUN4C)
@@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p)
 	smp_setup_cpu_possible_map();
 }
 
-static int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
-	seq_printf(m,
-		   "cpu\t\t: %s\n"
-		   "fpu\t\t: %s\n"
-		   "promlib\t\t: Version %d Revision %d\n"
-		   "prom\t\t: %d.%d\n"
-		   "type\t\t: %s\n"
-		   "ncpus probed\t: %d\n"
-		   "ncpus active\t: %d\n"
-#ifndef CONFIG_SMP
-		   "CPU0Bogo\t: %lu.%02lu\n"
-		   "CPU0ClkTck\t: %ld\n"
-#endif
-		   ,
-		   sparc_cpu_type,
-		   sparc_fpu_type ,
-		   romvec->pv_romvers,
-		   prom_rev,
-		   romvec->pv_printrev >> 16,
-		   romvec->pv_printrev & 0xffff,
-		   &cputypval[0],
-		   ncpus_probed,
-		   num_online_cpus()
-#ifndef CONFIG_SMP
-		   , cpu_data(0).udelay_val/(500000/HZ),
-		   (cpu_data(0).udelay_val/(5000/HZ)) % 100,
-		   cpu_data(0).clock_tick
-#endif
-		);
-
-#ifdef CONFIG_SMP
-	smp_bogo(m);
-#endif
-	mmu_info(m);
-#ifdef CONFIG_SMP
-	smp_info(m);
-#endif
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	/* The pointer we are returning is arbitrary,
-	 * it just has to be non-NULL and not IS_ERR
-	 * in the success case.
-	 */
-	return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start =c_start,
-	.next =	c_next,
-	.stop =	c_stop,
-	.show =	show_cpuinfo,
-};
-
 extern int stop_a_enabled;
 
 void sun_do_break(void)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 29bafe051bb..f3b6850cc8d 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p)
 	paging_init();
 }
 
-/* BUFFER is PAGE_SIZE bytes long. */
-
-extern void smp_info(struct seq_file *);
-extern void smp_bogo(struct seq_file *);
-extern void mmu_info(struct seq_file *);
-
-unsigned int dcache_parity_tl1_occurred;
-unsigned int icache_parity_tl1_occurred;
-
-int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
-	seq_printf(m, 
-		   "cpu\t\t: %s\n"
-		   "fpu\t\t: %s\n"
-		   "pmu\t\t: %s\n"
-		   "prom\t\t: %s\n"
-		   "type\t\t: %s\n"
-		   "ncpus probed\t: %d\n"
-		   "ncpus active\t: %d\n"
-		   "D$ parity tl1\t: %u\n"
-		   "I$ parity tl1\t: %u\n"
-#ifndef CONFIG_SMP
-		   "Cpu0ClkTck\t: %016lx\n"
-#endif
-		   ,
-		   sparc_cpu_type,
-		   sparc_fpu_type,
-		   sparc_pmu_type,
-		   prom_version,
-		   ((tlb_type == hypervisor) ?
-		    "sun4v" :
-		    "sun4u"),
-		   ncpus_probed,
-		   num_online_cpus(),
-		   dcache_parity_tl1_occurred,
-		   icache_parity_tl1_occurred
-#ifndef CONFIG_SMP
-		   , cpu_data(0).clock_tick
-#endif
-		);
-#ifdef CONFIG_SMP
-	smp_bogo(m);
-#endif
-	mmu_info(m);
-#ifdef CONFIG_SMP
-	smp_info(m);
-#endif
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	/* The pointer we are returning is arbitrary,
-	 * it just has to be non-NULL and not IS_ERR
-	 * in the success case.
-	 */
-	return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start =c_start,
-	.next =	c_next,
-	.stop =	c_stop,
-	.show =	show_cpuinfo,
-};
-
 extern int stop_a_enabled;
 
 void sun_do_break(void)
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb7085..d5b3958be0b 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -37,8 +37,6 @@
 #include "irq.h"
 
 volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
-unsigned char boot_cpu_id = 0;
-unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 
 cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 
@@ -53,6 +51,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 void __cpuinit smp_store_cpu_info(int id)
 {
 	int cpu_node;
+	int mid;
 
 	cpu_data(id).udelay_val = loops_per_jiffy;
 
@@ -60,10 +59,13 @@ void __cpuinit smp_store_cpu_info(int id)
 	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
 						     "clock-frequency", 0);
 	cpu_data(id).prom_node = cpu_node;
-	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
+	mid = cpu_get_hwmid(cpu_node);
 
-	if (cpu_data(id).mid < 0)
-		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
+	if (mid < 0) {
+		printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node);
+		mid = 0;
+	}
+	cpu_data(id).mid = mid;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -125,13 +127,58 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
 void smp_send_reschedule(int cpu)
 {
-	/* See sparc64 */
+	/*
+	 * CPU model dependent way of implementing IPI generation targeting
+	 * a single CPU. The trap handler needs only to do trap entry/return
+	 * to call schedule.
+	 */
+	BTFIXUP_CALL(smp_ipi_resched)(cpu);
 }
 
 void smp_send_stop(void)
 {
 }
 
+void arch_send_call_function_single_ipi(int cpu)
+{
+	/* trigger one IPI single call on one CPU */
+	BTFIXUP_CALL(smp_ipi_single)(cpu);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	/* trigger IPI mask call on each CPU */
+	for_each_cpu(cpu, mask)
+		BTFIXUP_CALL(smp_ipi_mask_one)(cpu);
+}
+
+void smp_resched_interrupt(void)
+{
+	irq_enter();
+	scheduler_ipi();
+	local_cpu_data().irq_resched_count++;
+	irq_exit();
+	/* re-schedule routine called by interrupt return code. */
+}
+
+void smp_call_function_single_interrupt(void)
+{
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	local_cpu_data().irq_call_count++;
+	irq_exit();
+}
+
+void smp_call_function_interrupt(void)
+{
+	irq_enter();
+	generic_smp_call_function_interrupt();
+	local_cpu_data().irq_call_count++;
+	irq_exit();
+}
+
 void smp_flush_cache_all(void)
 {
 	xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
@@ -147,9 +194,10 @@ void smp_flush_tlb_all(void)
 void smp_flush_cache_mm(struct mm_struct *mm)
 {
 	if(mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask))
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
 			xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
 		local_flush_cache_mm(mm);
 	}
@@ -158,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm)
 void smp_flush_tlb_mm(struct mm_struct *mm)
 {
 	if(mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask)) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask)) {
 			xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
 			if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
 				cpumask_copy(mm_cpumask(mm),
@@ -176,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask))
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
 			xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
 		local_flush_cache_range(vma, start, end);
 	}
@@ -190,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	struct mm_struct *mm = vma->vm_mm;
 
 	if (mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask))
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
 			xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
 		local_flush_tlb_range(vma, start, end);
 	}
@@ -203,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if(mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask))
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
 			xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
 		local_flush_cache_page(vma, page);
 	}
@@ -216,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	struct mm_struct *mm = vma->vm_mm;
 
 	if(mm->context != NO_CONTEXT) {
-		cpumask_t cpu_mask = *mm_cpumask(mm);
-		cpu_clear(smp_processor_id(), cpu_mask);
-		if (!cpus_empty(cpu_mask))
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
 			xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
 		local_flush_tlb_page(vma, page);
 	}
 }
 
-void smp_reschedule_irq(void)
-{
-	set_need_resched();
-}
-
 void smp_flush_page_to_ram(unsigned long page)
 {
 	/* Current theory is that those who call this are the one's
@@ -245,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page)
 
 void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
-	cpumask_t cpu_mask = *mm_cpumask(mm);
-	cpu_clear(smp_processor_id(), cpu_mask);
-	if (!cpus_empty(cpu_mask))
+	cpumask_t cpu_mask;
+	cpumask_copy(&cpu_mask, mm_cpumask(mm));
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+	if (!cpumask_empty(&cpu_mask))
 		xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
 	local_flush_sig_insns(mm, insn_addr);
 }
@@ -401,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	};
 
 	if (!ret) {
-		cpu_set(cpu, smp_commenced_mask);
+		cpumask_set_cpu(cpu, &smp_commenced_mask);
 		while (!cpu_online(cpu))
 			mb();
 	}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 3e94a8c2323..99cb17251bb 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -121,11 +121,11 @@ void __cpuinit smp_callin(void)
 	/* inform the notifiers about the new cpu */
 	notify_cpu_starting(cpuid);
 
-	while (!cpu_isset(cpuid, smp_commenced_mask))
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		rmb();
 
 	ipi_call_lock_irq();
-	cpu_set(cpuid, cpu_online_map);
+	set_cpu_online(cpuid, true);
 	ipi_call_unlock_irq();
 
 	/* idle thread is expected to have preempt disabled */
@@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
 
 /* Send cross call to all processors mentioned in MASK_P
  * except self.  Really, there are only two cases currently,
- * "&cpu_online_map" and "&mm->cpu_vm_mask".
+ * "cpu_online_mask" and "mm_cpumask(mm)".
  */
 static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
 {
@@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
 /* Send cross call to all processors except self. */
 static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
 {
-	smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
+	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
 }
 
 extern unsigned long xcall_sync_tick;
@@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick;
 static void smp_start_sync_tick_client(int cpu)
 {
 	xcall_deliver((u64) &xcall_sync_tick, 0, 0,
-		      &cpumask_of_cpu(cpu));
+		      cpumask_of(cpu));
 }
 
 extern unsigned long xcall_call_function;
@@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single;
 void arch_send_call_function_single_ipi(int cpu)
 {
 	xcall_deliver((u64) &xcall_call_function_single, 0, 0,
-		      &cpumask_of_cpu(cpu));
+		      cpumask_of(cpu));
 }
 
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
@@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
 		}
 		if (data0) {
 			xcall_deliver(data0, __pa(pg_addr),
-				      (u64) pg_addr, &cpumask_of_cpu(cpu));
+				      (u64) pg_addr, cpumask_of(cpu));
 #ifdef CONFIG_DEBUG_DCFLUSH
 			atomic_inc(&dcpage_flushes_xcall);
 #endif
@@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 	}
 	if (data0) {
 		xcall_deliver(data0, __pa(pg_addr),
-			      (u64) pg_addr, &cpu_online_map);
+			      (u64) pg_addr, cpu_online_mask);
 #ifdef CONFIG_DEBUG_DCFLUSH
 		atomic_inc(&dcpage_flushes_xcall);
 #endif
@@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
 	for_each_present_cpu(i) {
 		unsigned int j;
 
-		cpus_clear(cpu_core_map[i]);
+		cpumask_clear(&cpu_core_map[i]);
 		if (cpu_data(i).core_id == 0) {
-			cpu_set(i, cpu_core_map[i]);
+			cpumask_set_cpu(i, &cpu_core_map[i]);
 			continue;
 		}
 
 		for_each_present_cpu(j) {
 			if (cpu_data(i).core_id ==
 			    cpu_data(j).core_id)
-				cpu_set(j, cpu_core_map[i]);
+				cpumask_set_cpu(j, &cpu_core_map[i]);
 		}
 	}
 
 	for_each_present_cpu(i) {
 		unsigned int j;
 
-		cpus_clear(per_cpu(cpu_sibling_map, i));
+		cpumask_clear(&per_cpu(cpu_sibling_map, i));
 		if (cpu_data(i).proc_id == -1) {
-			cpu_set(i, per_cpu(cpu_sibling_map, i));
+			cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
 			continue;
 		}
 
 		for_each_present_cpu(j) {
 			if (cpu_data(i).proc_id ==
 			    cpu_data(j).proc_id)
-				cpu_set(j, per_cpu(cpu_sibling_map, i));
+				cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
 		}
 	}
 }
@@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	int ret = smp_boot_one_cpu(cpu);
 
 	if (!ret) {
-		cpu_set(cpu, smp_commenced_mask);
-		while (!cpu_isset(cpu, cpu_online_map))
+		cpumask_set_cpu(cpu, &smp_commenced_mask);
+		while (!cpu_online(cpu))
 			mb();
-		if (!cpu_isset(cpu, cpu_online_map)) {
+		if (!cpu_online(cpu)) {
 			ret = -ENODEV;
 		} else {
 			/* On SUN4V, writes to %tick and %stick are
@@ -1269,7 +1269,7 @@ void cpu_play_dead(void)
 				tb->nonresum_mondo_pa, 0);
 	}
 
-	cpu_clear(cpu, smp_commenced_mask);
+	cpumask_clear_cpu(cpu, &smp_commenced_mask);
 	membar_safe("#Sync");
 
 	local_irq_disable();
@@ -1290,13 +1290,13 @@ int __cpu_disable(void)
 	cpuinfo_sparc *c;
 	int i;
 
-	for_each_cpu_mask(i, cpu_core_map[cpu])
-		cpu_clear(cpu, cpu_core_map[i]);
-	cpus_clear(cpu_core_map[cpu]);
+	for_each_cpu(i, &cpu_core_map[cpu])
+		cpumask_clear_cpu(cpu, &cpu_core_map[i]);
+	cpumask_clear(&cpu_core_map[cpu]);
 
-	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
-	cpus_clear(per_cpu(cpu_sibling_map, cpu));
+	for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+		cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+	cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
 
 	c = &cpu_data(cpu);
 
@@ -1313,7 +1313,7 @@ int __cpu_disable(void)
 	local_irq_disable();
 
 	ipi_call_lock();
-	cpu_clear(cpu, cpu_online_map);
+	set_cpu_online(cpu, false);
 	ipi_call_unlock();
 
 	cpu_map_rebuild();
@@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu)
 
 	for (i = 0; i < 100; i++) {
 		smp_rmb();
-		if (!cpu_isset(cpu, smp_commenced_mask))
+		if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
 			break;
 		msleep(100);
 	}
-	if (cpu_isset(cpu, smp_commenced_mask)) {
+	if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
 		printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 	} else {
 #if defined(CONFIG_SUN_LDOMS)
@@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu)
 		do {
 			hv_err = sun4v_cpu_stop(cpu);
 			if (hv_err == HV_EOK) {
-				cpu_clear(cpu, cpu_present_map);
+				set_cpu_present(cpu, false);
 				break;
 			}
 		} while (--limit > 0);
@@ -1362,12 +1362,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
 void smp_send_reschedule(int cpu)
 {
 	xcall_deliver((u64) &xcall_receive_signal, 0, 0,
-		      &cpumask_of_cpu(cpu));
+		      cpumask_of(cpu));
 }
 
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	scheduler_ipi();
 }
 
 /* This is a nop because we capture all other cpus
diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c
index 90eea38ad66..f6bf25a2ff8 100644
--- a/arch/sparc/kernel/sun4c_irq.c
+++ b/arch/sparc/kernel/sun4c_irq.c
@@ -65,62 +65,94 @@
  */
 unsigned char __iomem *interrupt_enable;
 
-static void sun4c_disable_irq(unsigned int irq_nr)
+static void sun4c_mask_irq(struct irq_data *data)
 {
-	unsigned long flags;
-	unsigned char current_mask, new_mask;
-
-	local_irq_save(flags);
-	irq_nr &= (NR_IRQS - 1);
-	current_mask = sbus_readb(interrupt_enable);
-	switch (irq_nr) {
-	case 1:
-		new_mask = ((current_mask) & (~(SUN4C_INT_E1)));
-		break;
-	case 8:
-		new_mask = ((current_mask) & (~(SUN4C_INT_E8)));
-		break;
-	case 10:
-		new_mask = ((current_mask) & (~(SUN4C_INT_E10)));
-		break;
-	case 14:
-		new_mask = ((current_mask) & (~(SUN4C_INT_E14)));
-		break;
-	default:
+	unsigned long mask = (unsigned long)data->chip_data;
+
+	if (mask) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		mask = sbus_readb(interrupt_enable) & ~mask;
+		sbus_writeb(mask, interrupt_enable);
 		local_irq_restore(flags);
-		return;
 	}
-	sbus_writeb(new_mask, interrupt_enable);
-	local_irq_restore(flags);
 }
 
-static void sun4c_enable_irq(unsigned int irq_nr)
+static void sun4c_unmask_irq(struct irq_data *data)
 {
-	unsigned long flags;
-	unsigned char current_mask, new_mask;
-
-	local_irq_save(flags);
-	irq_nr &= (NR_IRQS - 1);
-	current_mask = sbus_readb(interrupt_enable);
-	switch (irq_nr) {
-	case 1:
-		new_mask = ((current_mask) | SUN4C_INT_E1);
-		break;
-	case 8:
-		new_mask = ((current_mask) | SUN4C_INT_E8);
-		break;
-	case 10:
-		new_mask = ((current_mask) | SUN4C_INT_E10);
-		break;
-	case 14:
-		new_mask = ((current_mask) | SUN4C_INT_E14);
-		break;
-	default:
+	unsigned long mask = (unsigned long)data->chip_data;
+
+	if (mask) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		mask = sbus_readb(interrupt_enable) | mask;
+		sbus_writeb(mask, interrupt_enable);
 		local_irq_restore(flags);
-		return;
 	}
-	sbus_writeb(new_mask, interrupt_enable);
-	local_irq_restore(flags);
+}
+
+static unsigned int sun4c_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	sun4c_unmask_irq(data);
+
+	return 0;
+}
+
+static void sun4c_shutdown_irq(struct irq_data *data)
+{
+	sun4c_mask_irq(data);
+	irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4c_irq = {
+	.name		= "sun4c",
+	.irq_startup	= sun4c_startup_irq,
+	.irq_shutdown	= sun4c_shutdown_irq,
+	.irq_mask	= sun4c_mask_irq,
+	.irq_unmask	= sun4c_unmask_irq,
+};
+
+static unsigned int sun4c_build_device_irq(struct platform_device *op,
+					   unsigned int real_irq)
+{
+	 unsigned int irq;
+
+	if (real_irq >= 16) {
+		prom_printf("Bogus sun4c IRQ %u\n", real_irq);
+		prom_halt();
+	}
+
+	irq = irq_alloc(real_irq, real_irq);
+	if (irq) {
+		unsigned long mask = 0UL;
+
+		switch (real_irq) {
+		case 1:
+			mask = SUN4C_INT_E1;
+			break;
+		case 8:
+			mask = SUN4C_INT_E8;
+			break;
+		case 10:
+			mask = SUN4C_INT_E10;
+			break;
+		case 14:
+			mask = SUN4C_INT_E14;
+			break;
+		default:
+			/* All the rest are either always enabled,
+			 * or are for signalling software interrupts.
+			 */
+			break;
+		}
+		irq_set_chip_and_handler_name(irq, &sun4c_irq,
+		                              handle_level_irq, "level");
+		irq_set_chip_data(irq, (void *)mask);
+	}
+	return irq;
 }
 
 struct sun4c_timer_info {
@@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit)
 
 static void __init sun4c_init_timers(irq_handler_t counter_fn)
 {
-	const struct linux_prom_irqs *irq;
+	const struct linux_prom_irqs *prom_irqs;
 	struct device_node *dp;
+	unsigned int irq;
 	const u32 *addr;
 	int err;
 
@@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
 
 	sun4c_timers = (void __iomem *) (unsigned long) addr[0];
 
-	irq = of_get_property(dp, "intr", NULL);
+	prom_irqs = of_get_property(dp, "intr", NULL);
 	of_node_put(dp);
-	if (!irq) {
+	if (!prom_irqs) {
 		prom_printf("sun4c_init_timers: No intr property\n");
 		prom_halt();
 	}
@@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
 
 	master_l10_counter = &sun4c_timers->l10_count;
 
-	err = request_irq(irq[0].pri, counter_fn,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC),
-			  "timer", NULL);
+	irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri);
+	err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
 	if (err) {
 		prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err);
 		prom_halt();
 	}
 
-	sun4c_disable_irq(irq[1].pri);
+	/* disable timer interrupt */
+	sun4c_mask_irq(irq_get_irq_data(irq));
 }
 
 #ifdef CONFIG_SMP
@@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void)
 
 	interrupt_enable = (void __iomem *) (unsigned long) addr[0];
 
-	BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP);
 
-	sparc_irq_config.init_timers = sun4c_init_timers;
+	sparc_irq_config.init_timers      = sun4c_init_timers;
+	sparc_irq_config.build_device_irq = sun4c_build_device_irq;
 
 #ifdef CONFIG_SMP
 	BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP);
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 77b4a899271..a9ea60eb2c1 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -14,6 +14,7 @@
 #include <asm/io.h>
 #include <asm/sbi.h>
 #include <asm/cacheflush.h>
+#include <asm/setup.h>
 
 #include "kernel.h"
 #include "irq.h"
@@ -22,22 +23,20 @@
  * cpu local.  CPU local interrupts cover the timer interrupts
  * and whatnot, and we encode those as normal PILs between
  * 0 and 15.
- *
- * SBUS interrupts are encoded integers including the board number
- * (plus one), the SBUS level, and the SBUS slot number.  Sun4D
- * IRQ dispatch is done by:
- *
- * 1) Reading the BW local interrupt table in order to get the bus
- *    interrupt mask.
- *
- *    This table is indexed by SBUS interrupt level which can be
- *    derived from the PIL we got interrupted on.
- *
- * 2) For each bus showing interrupt pending from #1, read the
- *    SBI interrupt state register.  This will indicate which slots
- *    have interrupts pending for that SBUS interrupt level.
+ * SBUS interrupts are encodes as a combination of board, level and slot.
  */
 
+struct sun4d_handler_data {
+	unsigned int cpuid;    /* target cpu */
+	unsigned int real_irq; /* interrupt level */
+};
+
+
+static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
+{
+	return (board + 1) << 5 | (lvl << 2) | slot;
+}
+
 struct sun4d_timer_regs {
 	u32	l10_timer_limit;
 	u32	l10_cur_countx;
@@ -48,17 +47,12 @@ struct sun4d_timer_regs {
 
 static struct sun4d_timer_regs __iomem *sun4d_timers;
 
-#define TIMER_IRQ	10
-
-#define MAX_STATIC_ALLOC	4
-static unsigned char sbus_tid[32];
-
-static struct irqaction *irq_action[NR_IRQS];
+#define SUN4D_TIMER_IRQ        10
 
-static struct sbus_action {
-	struct irqaction *action;
-	/* For SMP this needs to be extended */
-} *sbus_actions;
+/* Specify which cpu handle interrupts from which board.
+ * Index is board - value is cpu.
+ */
+static unsigned char board_to_cpu[32];
 
 static int pil_to_sbus[] = {
 	0,
@@ -79,152 +73,81 @@ static int pil_to_sbus[] = {
 	0,
 };
 
-static int sbus_to_pil[] = {
-	0,
-	2,
-	3,
-	5,
-	7,
-	9,
-	11,
-	13,
-};
-
-static int nsbi;
-
 /* Exported for sun4d_smp.c */
 DEFINE_SPINLOCK(sun4d_imsk_lock);
 
-int show_sun4d_interrupts(struct seq_file *p, void *v)
+/* SBUS interrupts are encoded integers including the board number
+ * (plus one), the SBUS level, and the SBUS slot number.  Sun4D
+ * IRQ dispatch is done by:
+ *
+ * 1) Reading the BW local interrupt table in order to get the bus
+ *    interrupt mask.
+ *
+ *    This table is indexed by SBUS interrupt level which can be
+ *    derived from the PIL we got interrupted on.
+ *
+ * 2) For each bus showing interrupt pending from #1, read the
+ *    SBI interrupt state register.  This will indicate which slots
+ *    have interrupts pending for that SBUS interrupt level.
+ *
+ * 3) Call the genreric IRQ support.
+ */
+static void sun4d_sbus_handler_irq(int sbusl)
 {
-	int i = *(loff_t *) v, j = 0, k = 0, sbusl;
-	struct irqaction *action;
-	unsigned long flags;
-#ifdef CONFIG_SMP
-	int x;
-#endif
-
-	spin_lock_irqsave(&irq_action_lock, flags);
-	if (i < NR_IRQS) {
-		sbusl = pil_to_sbus[i];
-		if (!sbusl) {
-			action = *(i + irq_action);
-			if (!action)
-				goto out_unlock;
-		} else {
-			for (j = 0; j < nsbi; j++) {
-				for (k = 0; k < 4; k++)
-					action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
-					if (action)
-						goto found_it;
-			}
-			goto out_unlock;
-		}
-found_it:	seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(x)
-			seq_printf(p, "%10u ",
-			       kstat_cpu(cpu_logical_map(x)).irqs[i]);
-#endif
-		seq_printf(p, "%c %s",
-			(action->flags & IRQF_DISABLED) ? '+' : ' ',
-			action->name);
-		action = action->next;
-		for (;;) {
-			for (; action; action = action->next) {
-				seq_printf(p, ",%s %s",
-					(action->flags & IRQF_DISABLED) ? " +" : "",
-					action->name);
-			}
-			if (!sbusl)
-				break;
-			k++;
-			if (k < 4) {
-				action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
-			} else {
-				j++;
-				if (j == nsbi)
-					break;
-				k = 0;
-				action = sbus_actions[(j << 5) + (sbusl << 2)].action;
+	unsigned int bus_mask;
+	unsigned int sbino, slot;
+	unsigned int sbil;
+
+	bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
+	bw_clear_intr_mask(sbusl, bus_mask);
+
+	sbil = (sbusl << 2);
+	/* Loop for each pending SBI */
+	for (sbino = 0; bus_mask; sbino++) {
+		unsigned int idx, mask;
+
+		bus_mask >>= 1;
+		if (!(bus_mask & 1))
+			continue;
+		/* XXX This seems to ACK the irq twice.  acquire_sbi()
+		 * XXX uses swap, therefore this writes 0xf << sbil,
+		 * XXX then later release_sbi() will write the individual
+		 * XXX bits which were set again.
+		 */
+		mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
+		mask &= (0xf << sbil);
+
+		/* Loop for each pending SBI slot */
+		idx = 0;
+		slot = (1 << sbil);
+		while (mask != 0) {
+			unsigned int pil;
+			struct irq_bucket *p;
+
+			idx++;
+			slot <<= 1;
+			if (!(mask & slot))
+				continue;
+
+			mask &= ~slot;
+			pil = sun4d_encode_irq(sbino, sbil, idx);
+
+			p = irq_map[pil];
+			while (p) {
+				struct irq_bucket *next;
+
+				next = p->next;
+				generic_handle_irq(p->irq);
+				p = next;
 			}
+			release_sbi(SBI2DEVID(sbino), slot);
 		}
-		seq_putc(p, '\n');
 	}
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-	return 0;
-}
-
-void sun4d_free_irq(unsigned int irq, void *dev_id)
-{
-	struct irqaction *action, **actionp;
-	struct irqaction *tmp = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_action_lock, flags);
-	if (irq < 15)
-		actionp = irq + irq_action;
-	else
-		actionp = &(sbus_actions[irq - (1 << 5)].action);
-	action = *actionp;
-	if (!action) {
-		printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
-		goto out_unlock;
-	}
-	if (dev_id) {
-		for (; action; action = action->next) {
-			if (action->dev_id == dev_id)
-				break;
-			tmp = action;
-		}
-		if (!action) {
-			printk(KERN_ERR "Trying to free free shared IRQ%d\n",
-			       irq);
-			goto out_unlock;
-		}
-	} else if (action->flags & IRQF_SHARED) {
-		printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
-		       irq);
-		goto out_unlock;
-	}
-	if (action->flags & SA_STATIC_ALLOC) {
-		/*
-		 * This interrupt is marked as specially allocated
-		 * so it is a bad idea to free it.
-		 */
-		printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
-		       irq, action->name);
-		goto out_unlock;
-	}
-
-	if (tmp)
-		tmp->next = action->next;
-	else
-		*actionp = action->next;
-
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-
-	synchronize_irq(irq);
-
-	spin_lock_irqsave(&irq_action_lock, flags);
-
-	kfree(action);
-
-	if (!(*actionp))
-		__disable_irq(irq);
-
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
 }
 
 void sun4d_handler_irq(int pil, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs;
-	struct irqaction *action;
-	int cpu = smp_processor_id();
 	/* SBUS IRQ level (1 - 7) */
 	int sbusl = pil_to_sbus[pil];
 
@@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs)
 
 	cc_set_iclr(1 << pil);
 
+#ifdef CONFIG_SMP
+	/*
+	 * Check IPI data structures after IRQ has been cleared. Hard and Soft
+	 * IRQ can happen at the same time, so both cases are always handled.
+	 */
+	if (pil == SUN4D_IPI_IRQ)
+		sun4d_ipi_interrupt();
+#endif
+
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	kstat_cpu(cpu).irqs[pil]++;
-	if (!sbusl) {
-		action = *(pil + irq_action);
-		if (!action)
-			unexpected_irq(pil, NULL, regs);
-		do {
-			action->handler(pil, action->dev_id);
-			action = action->next;
-		} while (action);
+	if (sbusl == 0) {
+		/* cpu interrupt */
+		struct irq_bucket *p;
+
+		p = irq_map[pil];
+		while (p) {
+			struct irq_bucket *next;
+
+			next = p->next;
+			generic_handle_irq(p->irq);
+			p = next;
+		}
 	} else {
-		int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
-		int sbino;
-		struct sbus_action *actionp;
-		unsigned mask, slot;
-		int sbil = (sbusl << 2);
-
-		bw_clear_intr_mask(sbusl, bus_mask);
-
-		/* Loop for each pending SBI */
-		for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1)
-			if (bus_mask & 1) {
-				mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
-				mask &= (0xf << sbil);
-				actionp = sbus_actions + (sbino << 5) + (sbil);
-				/* Loop for each pending SBI slot */
-				for (slot = (1 << sbil); mask; slot <<= 1, actionp++)
-					if (mask & slot) {
-						mask &= ~slot;
-						action = actionp->action;
-
-						if (!action)
-							unexpected_irq(pil, NULL, regs);
-						do {
-							action->handler(pil, action->dev_id);
-							action = action->next;
-						} while (action);
-						release_sbi(SBI2DEVID(sbino), slot);
-					}
-			}
+		/* SBUS interrupt */
+		sun4d_sbus_handler_irq(sbusl);
 	}
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-int sun4d_request_irq(unsigned int irq,
-		irq_handler_t handler,
-		unsigned long irqflags, const char *devname, void *dev_id)
+
+static void sun4d_mask_irq(struct irq_data *data)
 {
-	struct irqaction *action, *tmp = NULL, **actionp;
+	struct sun4d_handler_data *handler_data = data->handler_data;
+	unsigned int real_irq;
+#ifdef CONFIG_SMP
+	int cpuid = handler_data->cpuid;
 	unsigned long flags;
-	int ret;
-
-	if (irq > 14 && irq < (1 << 5)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (!handler) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	spin_lock_irqsave(&irq_action_lock, flags);
-
-	if (irq >= (1 << 5))
-		actionp = &(sbus_actions[irq - (1 << 5)].action);
-	else
-		actionp = irq + irq_action;
-	action = *actionp;
-
-	if (action) {
-		if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) {
-			for (tmp = action; tmp->next; tmp = tmp->next)
-				/* find last entry - tmp used below */;
-		} else {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-		if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) {
-			printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
-			       irq);
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-		action = NULL;		/* Or else! */
-	}
-
-	/* If this is flagged as statically allocated then we use our
-	 * private struct which is never freed.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-			       irq, devname);
-	}
-
-	if (action == NULL)
-		action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-
-	if (!action) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->name = devname;
-	action->next = NULL;
-	action->dev_id = dev_id;
-
-	if (tmp)
-		tmp->next = action;
-	else
-		*actionp = action;
-
-	__enable_irq(irq);
-
-	ret = 0;
-out_unlock:
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-	return ret;
+#endif
+	real_irq = handler_data->real_irq;
+#ifdef CONFIG_SMP
+	spin_lock_irqsave(&sun4d_imsk_lock, flags);
+	cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
+	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+	cc_set_imsk(cc_get_imsk() | (1 << real_irq));
+#endif
 }
 
-static void sun4d_disable_irq(unsigned int irq)
+static void sun4d_unmask_irq(struct irq_data *data)
 {
-	int tid = sbus_tid[(irq >> 5) - 1];
+	struct sun4d_handler_data *handler_data = data->handler_data;
+	unsigned int real_irq;
+#ifdef CONFIG_SMP
+	int cpuid = handler_data->cpuid;
 	unsigned long flags;
+#endif
+	real_irq = handler_data->real_irq;
 
-	if (irq < NR_IRQS)
-		return;
-
+#ifdef CONFIG_SMP
 	spin_lock_irqsave(&sun4d_imsk_lock, flags);
-	cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7]));
+	cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq));
 	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+	cc_set_imsk(cc_get_imsk() | ~(1 << real_irq));
+#endif
 }
 
-static void sun4d_enable_irq(unsigned int irq)
+static unsigned int sun4d_startup_irq(struct irq_data *data)
 {
-	int tid = sbus_tid[(irq >> 5) - 1];
-	unsigned long flags;
-
-	if (irq < NR_IRQS)
-		return;
+	irq_link(data->irq);
+	sun4d_unmask_irq(data);
+	return 0;
+}
 
-	spin_lock_irqsave(&sun4d_imsk_lock, flags);
-	cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7]));
-	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+static void sun4d_shutdown_irq(struct irq_data *data)
+{
+	sun4d_mask_irq(data);
+	irq_unlink(data->irq);
 }
 
+struct irq_chip sun4d_irq = {
+	.name		= "sun4d",
+	.irq_startup	= sun4d_startup_irq,
+	.irq_shutdown	= sun4d_shutdown_irq,
+	.irq_unmask	= sun4d_unmask_irq,
+	.irq_mask	= sun4d_mask_irq,
+};
+
 #ifdef CONFIG_SMP
 static void sun4d_set_cpu_int(int cpu, int level)
 {
@@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void)
 	for_each_node_by_name(dp, "sbi") {
 		int devid = of_getintprop_default(dp, "device-id", 0);
 		int board = of_getintprop_default(dp, "board#", 0);
-		sbus_tid[board] = cpuid;
+		board_to_cpu[board] = cpuid;
 		set_sbi_tid(devid, cpuid << 3);
 	}
 	printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
@@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void)
 unsigned int sun4d_build_device_irq(struct platform_device *op,
                                     unsigned int real_irq)
 {
-	static int pil_to_sbus[] = {
-		0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0,
-	};
 	struct device_node *dp = op->dev.of_node;
 	struct device_node *io_unit, *sbi = dp->parent;
 	const struct linux_prom_registers *regs;
+	struct sun4d_handler_data *handler_data;
+	unsigned int pil;
+	unsigned int irq;
 	int board, slot;
 	int sbusl;
 
+	irq = 0;
 	while (sbi) {
 		if (!strcmp(sbi->name, "sbi"))
 			break;
@@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op,
 
 	sbusl = pil_to_sbus[real_irq];
 	if (sbusl)
-		return (((board + 1) << 5) + (sbusl << 2) + slot);
+		pil = sun4d_encode_irq(board, sbusl, slot);
+	else
+		pil = real_irq;
+
+	irq = irq_alloc(real_irq, pil);
+	if (irq == 0)
+		goto err_out;
+
+	handler_data = irq_get_handler_data(irq);
+	if (unlikely(handler_data))
+		goto err_out;
+
+	handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
+	if (unlikely(!handler_data)) {
+		prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
+		prom_halt();
+	}
+	handler_data->cpuid    = board_to_cpu[board];
+	handler_data->real_irq = real_irq;
+	irq_set_chip_and_handler_name(irq, &sun4d_irq,
+	                              handle_level_irq, "level");
+	irq_set_handler_data(irq, handler_data);
 
 err_out:
 	return real_irq;
@@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 {
 	struct device_node *dp;
 	struct resource res;
+	unsigned int irq;
 	const u32 *reg;
 	int err;
 
@@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 
 	master_l10_counter = &sun4d_timers->l10_cur_count;
 
-	err = request_irq(TIMER_IRQ, counter_fn,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC),
-			  "timer", NULL);
+	irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ);
+	err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
 	if (err) {
 		prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
 		             err);
@@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 void __init sun4d_init_sbi_irq(void)
 {
 	struct device_node *dp;
-	int target_cpu = 0;
+	int target_cpu;
 
-#ifdef CONFIG_SMP
 	target_cpu = boot_cpu_id;
-#endif
-
-	nsbi = 0;
-	for_each_node_by_name(dp, "sbi")
-		nsbi++;
-	sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC);
-	if (!sbus_actions) {
-		prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n");
-		prom_halt();
-	}
 	for_each_node_by_name(dp, "sbi") {
 		int devid = of_getintprop_default(dp, "device-id", 0);
 		int board = of_getintprop_default(dp, "board#", 0);
 		unsigned int mask;
 
 		set_sbi_tid(devid, target_cpu << 3);
-		sbus_tid[board] = target_cpu;
+		board_to_cpu[board] = target_cpu;
 
 		/* Get rid of pending irqs from PROM */
 		mask = acquire_sbi(devid, 0xffffffff);
@@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void)
 {
 	local_irq_disable();
 
-	BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM);
 
-	sparc_irq_config.init_timers = sun4d_init_timers;
+	sparc_irq_config.init_timers      = sun4d_init_timers;
 	sparc_irq_config.build_device_irq = sun4d_build_device_irq;
 
 #ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 475d50b96cd..133387980b5 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon
 	return val;
 }
 
+static void smp4d_ipi_init(void);
 static void smp_setup_percpu_timer(void);
 
 static unsigned char cpu_leds[32];
@@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void)
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	cpu_probe();
-
 	while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
 		barrier();
 
@@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void)
 
 	local_irq_enable();	/* We don't allow PIL 14 yet */
 
-	while (!cpu_isset(cpuid, smp_commenced_mask))
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		barrier();
 
 	spin_lock_irqsave(&sun4d_imsk_lock, flags);
@@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void)
  */
 void __init smp4d_boot_cpus(void)
 {
+	smp4d_ipi_init();
 	if (boot_cpu_id)
 		current_set[0] = NULL;
 	smp_setup_percpu_timer();
@@ -191,6 +191,80 @@ void __init smp4d_smp_done(void)
 	sun4d_distribute_irqs();
 }
 
+/* Memory structure giving interrupt handler information about IPI generated */
+struct sun4d_ipi_work {
+	int single;
+	int msk;
+	int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
+
+/* Initialize IPIs on the SUN4D SMP machine */
+static void __init smp4d_ipi_init(void)
+{
+	int cpu;
+	struct sun4d_ipi_work *work;
+
+	printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
+
+	for_each_possible_cpu(cpu) {
+		work = &per_cpu(sun4d_ipi_work, cpu);
+		work->single = work->msk = work->resched = 0;
+	}
+}
+
+void sun4d_ipi_interrupt(void)
+{
+	struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work);
+
+	if (work->single) {
+		work->single = 0;
+		smp_call_function_single_interrupt();
+	}
+	if (work->msk) {
+		work->msk = 0;
+		smp_call_function_interrupt();
+	}
+	if (work->resched) {
+		work->resched = 0;
+		smp_resched_interrupt();
+	}
+}
+
+static void smp4d_ipi_single(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->single = 1;
+
+	/* Generate IRQ on the CPU */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_mask_one(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->msk = 1;
+
+	/* Generate IRQ on the CPU */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_resched(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->resched = 1;
+
+	/* Generate IRQ on the CPU (any IRQ will cause resched) */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
 static struct smp_funcall {
 	smpfunc_t func;
 	unsigned long arg1;
@@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 		{
 			register int i;
 
-			cpu_clear(smp_processor_id(), mask);
-			cpus_and(mask, cpu_online_map, mask);
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
 			for (i = 0; i <= high; i++) {
-				if (cpu_isset(i, mask)) {
+				if (cpumask_test_cpu(i, &mask)) {
 					ccall_info.processors_in[i] = 0;
 					ccall_info.processors_out[i] = 0;
 					sun4d_send_ipi(i, IRQ_CROSS_CALL);
@@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 				while (!ccall_info.processors_in[i])
 					barrier();
@@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 				while (!ccall_info.processors_out[i])
 					barrier();
@@ -356,6 +430,9 @@ void __init sun4d_init_smp(void)
 	BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current);
 	BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM);
 
 	for (i = 0; i < NR_CPUS; i++) {
 		ccall_info.processors_in[i] = 1;
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index 69df6257a32..422c16dad1f 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -100,6 +100,11 @@
 struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
 struct sun4m_irq_global __iomem *sun4m_irq_global;
 
+struct sun4m_handler_data {
+	bool    percpu;
+	long    mask;
+};
+
 /* Dave Redman (djhr@tadpole.co.uk)
  * The sun4m interrupt registers.
  */
@@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global;
 #define	OBP_INT_LEVEL_VME	0x40
 
 #define SUN4M_TIMER_IRQ         (OBP_INT_LEVEL_ONBOARD | 10)
-#define SUM4M_PROFILE_IRQ       (OBP_INT_LEVEL_ONBOARD | 14)
+#define SUN4M_PROFILE_IRQ       (OBP_INT_LEVEL_ONBOARD | 14)
 
-static unsigned long irq_mask[0x50] = {
+static unsigned long sun4m_imask[0x50] = {
 	/* 0x00 - SMP */
 	0,  SUN4M_SOFT_INT(1),
 	SUN4M_SOFT_INT(2),  SUN4M_SOFT_INT(3),
@@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = {
 	SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
 	SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
 	(SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
-	SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR,
+	SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
 	/* 0x30 - sbus */
 	0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
 	0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
@@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = {
 	0, SUN4M_INT_VME(6), 0, 0
 };
 
-static unsigned long sun4m_get_irqmask(unsigned int irq)
+static void sun4m_mask_irq(struct irq_data *data)
 {
-	unsigned long mask;
-
-	if (irq < 0x50)
-		mask = irq_mask[irq];
-	else
-		mask = 0;
+	struct sun4m_handler_data *handler_data = data->handler_data;
+	int cpu = smp_processor_id();
 
-	if (!mask)
-		printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n",
-		       irq);
+	if (handler_data->mask) {
+		unsigned long flags;
 
-	return mask;
+		local_irq_save(flags);
+		if (handler_data->percpu) {
+			sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
+		} else {
+			sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
+		}
+		local_irq_restore(flags);
+	}
 }
 
-static void sun4m_disable_irq(unsigned int irq_nr)
+static void sun4m_unmask_irq(struct irq_data *data)
 {
-	unsigned long mask, flags;
+	struct sun4m_handler_data *handler_data = data->handler_data;
 	int cpu = smp_processor_id();
 
-	mask = sun4m_get_irqmask(irq_nr);
-	local_irq_save(flags);
-	if (irq_nr > 15)
-		sbus_writel(mask, &sun4m_irq_global->mask_set);
-	else
-		sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
-	local_irq_restore(flags);
-}
-
-static void sun4m_enable_irq(unsigned int irq_nr)
-{
-	unsigned long mask, flags;
-	int cpu = smp_processor_id();
+	if (handler_data->mask) {
+		unsigned long flags;
 
-	/* Dreadful floppy hack. When we use 0x2b instead of
-	 * 0x0b the system blows (it starts to whistle!).
-	 * So we continue to use 0x0b. Fixme ASAP. --P3
-	 */
-	if (irq_nr != 0x0b) {
-		mask = sun4m_get_irqmask(irq_nr);
-		local_irq_save(flags);
-		if (irq_nr > 15)
-			sbus_writel(mask, &sun4m_irq_global->mask_clear);
-		else
-			sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
-		local_irq_restore(flags);
-	} else {
 		local_irq_save(flags);
-		sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear);
+		if (handler_data->percpu) {
+			sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
+		} else {
+			sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
+		}
 		local_irq_restore(flags);
 	}
 }
 
-static unsigned long cpu_pil_to_imask[16] = {
-/*0*/	0x00000000,
-/*1*/	0x00000000,
-/*2*/	SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0),
-/*3*/	SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1),
-/*4*/	SUN4M_INT_SCSI,
-/*5*/	SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2),
-/*6*/	SUN4M_INT_ETHERNET,
-/*7*/	SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3),
-/*8*/	SUN4M_INT_VIDEO,
-/*9*/	SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR,
-/*10*/	SUN4M_INT_REALTIME,
-/*11*/	SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY,
-/*12*/	SUN4M_INT_SERIAL  | SUN4M_INT_KBDMS,
-/*13*/	SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO,
-/*14*/	SUN4M_INT_E14,
-/*15*/	SUN4M_INT_ERROR,
-};
+static unsigned int sun4m_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	sun4m_unmask_irq(data);
+	return 0;
+}
 
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void sun4m_disable_pil_irq(unsigned int pil)
+static void sun4m_shutdown_irq(struct irq_data *data)
 {
-	sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set);
+	sun4m_mask_irq(data);
+	irq_unlink(data->irq);
 }
 
-static void sun4m_enable_pil_irq(unsigned int pil)
+static struct irq_chip sun4m_irq = {
+	.name		= "sun4m",
+	.irq_startup	= sun4m_startup_irq,
+	.irq_shutdown	= sun4m_shutdown_irq,
+	.irq_mask	= sun4m_mask_irq,
+	.irq_unmask	= sun4m_unmask_irq,
+};
+
+
+static unsigned int sun4m_build_device_irq(struct platform_device *op,
+					   unsigned int real_irq)
 {
-	sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear);
+	struct sun4m_handler_data *handler_data;
+	unsigned int irq;
+	unsigned int pil;
+
+	if (real_irq >= OBP_INT_LEVEL_VME) {
+		prom_printf("Bogus sun4m IRQ %u\n", real_irq);
+		prom_halt();
+	}
+	pil = (real_irq & 0xf);
+	irq = irq_alloc(real_irq, pil);
+
+	if (irq == 0)
+		goto out;
+
+	handler_data = irq_get_handler_data(irq);
+	if (unlikely(handler_data))
+		goto out;
+
+	handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
+	if (unlikely(!handler_data)) {
+		prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
+		prom_halt();
+	}
+
+	handler_data->mask = sun4m_imask[real_irq];
+	handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
+	irq_set_chip_and_handler_name(irq, &sun4m_irq,
+	                              handle_level_irq, "level");
+	irq_set_handler_data(irq, handler_data);
+
+out:
+	return irq;
 }
 
 #ifdef CONFIG_SMP
 static void sun4m_send_ipi(int cpu, int level)
 {
-	unsigned long mask = sun4m_get_irqmask(level);
-
-	sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
+	sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
 }
 
 static void sun4m_clear_ipi(int cpu, int level)
 {
-	unsigned long mask = sun4m_get_irqmask(level);
-
-	sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
+	sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear);
 }
 
 static void sun4m_set_udt(int cpu)
@@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs)
 	prom_halt();
 }
 
-/* Exported for sun4m_smp.c */
+void sun4m_unmask_profile_irq(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
+	local_irq_restore(flags);
+}
+
 void sun4m_clear_profile_irq(int cpu)
 {
 	sbus_readl(&timers_percpu[cpu]->l14_limit);
@@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
 {
 	struct device_node *dp = of_find_node_by_name(NULL, "counter");
 	int i, err, len, num_cpu_timers;
+	unsigned int irq;
 	const u32 *addr;
 
 	if (!dp) {
@@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
 
 	master_l10_counter = &timers_global->l10_count;
 
-	err = request_irq(SUN4M_TIMER_IRQ, counter_fn,
-			  (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+	irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
+
+	err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
 	if (err) {
 		printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
 			err);
@@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void)
 	if (num_cpu_iregs == 4)
 		sbus_writel(0, &sun4m_irq_global->interrupt_target);
 
-	BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM);
-	BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM);
 
 	sparc_irq_config.init_timers = sun4m_init_timers;
+	sparc_irq_config.build_device_irq = sun4m_build_device_irq;
 
 #ifdef CONFIG_SMP
 	BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 5cc7dc51de3..59476868652 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -15,6 +15,9 @@
 #include "irq.h"
 #include "kernel.h"
 
+#define IRQ_IPI_SINGLE		12
+#define IRQ_IPI_MASK		13
+#define IRQ_IPI_RESCHED		14
 #define IRQ_CROSS_CALL		15
 
 static inline unsigned long
@@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
 	return val;
 }
 
+static void smp4m_ipi_init(void);
 static void smp_setup_percpu_timer(void);
 
 void __cpuinit smp4m_callin(void)
@@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void)
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	cpu_probe();
-
 	/* Fix idle thread fields. */
 	__asm__ __volatile__("ld [%0], %%g6\n\t"
 			     : : "r" (&current_set[cpuid])
@@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void)
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
-	while (!cpu_isset(cpuid, smp_commenced_mask))
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
 		mb();
 
 	local_irq_enable();
@@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void)
  */
 void __init smp4m_boot_cpus(void)
 {
+	smp4m_ipi_init();
 	smp_setup_percpu_timer();
 	local_flush_cache_all();
 }
@@ -150,18 +153,25 @@ void __init smp4m_smp_done(void)
 	/* Ok, they are spinning and ready to go. */
 }
 
-/* At each hardware IRQ, we get this called to forward IRQ reception
- * to the next processor.  The caller must disable the IRQ level being
- * serviced globally so that there are no double interrupts received.
- *
- * XXX See sparc64 irq.c.
- */
-void smp4m_irq_rotate(int cpu)
+
+/* Initialize IPIs on the SUN4M SMP machine */
+static void __init smp4m_ipi_init(void)
+{
+}
+
+static void smp4m_ipi_resched(int cpu)
+{
+	set_cpu_int(cpu, IRQ_IPI_RESCHED);
+}
+
+static void smp4m_ipi_single(int cpu)
 {
-	int next = cpu_data(cpu).next;
+	set_cpu_int(cpu, IRQ_IPI_SINGLE);
+}
 
-	if (next != cpu)
-		set_irq_udt(next);
+static void smp4m_ipi_mask_one(int cpu)
+{
+	set_cpu_int(cpu, IRQ_IPI_MASK);
 }
 
 static struct smp_funcall {
@@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 		{
 			register int i;
 
-			cpu_clear(smp_processor_id(), mask);
-			cpus_and(mask, cpu_online_map, mask);
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
 			for (i = 0; i < ncpus; i++) {
-				if (cpu_isset(i, mask)) {
+				if (cpumask_test_cpu(i, &mask)) {
 					ccall_info.processors_in[i] = 0;
 					ccall_info.processors_out[i] = 0;
 					set_cpu_int(i, IRQ_CROSS_CALL);
@@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 				while (!ccall_info.processors_in[i])
 					barrier();
@@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
 			i = 0;
 			do {
-				if (!cpu_isset(i, mask))
+				if (!cpumask_test_cpu(i, &mask))
 					continue;
 				while (!ccall_info.processors_out[i])
 					barrier();
@@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void)
 	load_profile_irq(cpu, lvl14_resolution);
 
 	if (cpu == boot_cpu_id)
-		enable_pil_irq(14);
+		sun4m_unmask_profile_irq();
 }
 
 static void __init smp4m_blackbox_id(unsigned *addr)
@@ -306,4 +316,7 @@ void __init sun4m_init_smp(void)
 	BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
 	BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM);
 }
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 1eb8b00aed7..7408201d7ef 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu,
 			        unsigned long (*func)(unsigned long),
 				unsigned long arg)
 {
-	cpumask_t old_affinity = current->cpus_allowed;
+	cpumask_t old_affinity;
 	unsigned long ret;
 
+	cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
 	/* should return -EINVAL to userspace */
 	if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
 		return 0;
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e8..332c83ff770 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs
+/*335*/	.long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef4..43887ca0be0 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, compat_sys_sendmmsg
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@ sys_call_table:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 4e236391b63..1060e0672a4 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata clock_match[] = {
+static struct of_device_id clock_match[] = {
 	{
 		.name = "eeprom",
 	},
@@ -228,14 +228,10 @@ static void __init sbus_time_init(void)
 
 void __init time_init(void)
 {
-#ifdef CONFIG_PCI
-	extern void pci_time_init(void);
-	if (pcic_present()) {
+	if (pcic_present())
 		pci_time_init();
-		return;
-	}
-#endif
-	sbus_time_init();
+	else
+		sbus_time_init();
 }
 
 
diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c
index 8f982b76c71..531d54fc982 100644
--- a/arch/sparc/kernel/us2e_cpufreq.c
+++ b/arch/sparc/kernel/us2e_cpufreq.c
@@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
 	if (!cpu_online(cpu))
 		return 0;
 
-	cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
 	if (!cpu_online(cpu))
 		return;
 
-	cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c
index f35d1e79454..9a8ceb70083 100644
--- a/arch/sparc/kernel/us3_cpufreq.c
+++ b/arch/sparc/kernel/us3_cpufreq.c
@@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
 	if (!cpu_online(cpu))
 		return 0;
 
-	cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	reg = read_safari_cfg();
@@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
 	if (!cpu_online(cpu))
 		return;
 
-	cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 846d1c4374e..7f01b8fce8b 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
 lib-$(CONFIG_SPARC32) += copy_user.o locks.o
 lib-y                 += atomic_$(BITS).o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
-lib-$(CONFIG_SPARC32) += rwsem_32.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 3632cb34e91..0084c3361e1 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -289,10 +289,16 @@ cc_end_cruft:
 
 	/* Also, handle the alignment code out of band. */
 cc_dword_align:
-	cmp	%g1, 6
-	bl,a	ccte
+	cmp	%g1, 16
+	bge	1f
+	 srl	%g1, 1, %o3
+2:	cmp	%o3, 0
+	be,a	ccte
 	 andcc	%g1, 0xf, %o3
-	andcc	%o0, 0x1, %g0
+	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
+	be,a	2b
+	 srl	%o3, 1, %o3
+1:	andcc	%o0, 0x1, %g0
 	bne	ccslow
 	 andcc	%o0, 0x2, %g0
 	be	1f
diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S
deleted file mode 100644
index 9675268e7fd..00000000000
--- a/arch/sparc/lib/rwsem_32.S
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Assembly part of rw semaphores.
- *
- * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-
-	.section .sched.text, "ax"
-	.align	4
-
-	.globl		___down_read
-___down_read:
-	rd		%psr, %g3
-	nop
-	nop
-	nop
-	or		%g3, PSR_PIL, %g7
-	wr		%g7, 0, %psr
-	nop
-	nop
-	nop
-#ifdef CONFIG_SMP
-1:	ldstub		[%g1 + 4], %g7
-	tst		%g7
-	bne		1b
-	 ld		[%g1], %g7
-	sub		%g7, 1, %g7
-	st		%g7, [%g1]
-	stb		%g0, [%g1 + 4]
-#else
-	ld		[%g1], %g7
-	sub		%g7, 1, %g7
-	st		%g7, [%g1]
-#endif
-	wr		%g3, 0, %psr
-	add		%g7, 1, %g7
-	nop
-	nop
-	subcc		%g7, 1, %g7
-	bneg		3f
-	 nop
-2:	jmpl		%o7, %g0
-	 mov		%g4, %o7
-3:	save		%sp, -64, %sp
-	mov		%g1, %l1
-	mov		%g4, %l4
-	bcs		4f
-	 mov		%g5, %l5
-	call		down_read_failed
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		___down_read
-	 restore	%l5, %g0, %g5
-4:	call		down_read_failed_biased
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		2b
-	 restore	%l5, %g0, %g5
-
-	.globl		___down_write
-___down_write:
-	rd		%psr, %g3
-	nop
-	nop
-	nop
-	or		%g3, PSR_PIL, %g7
-	wr		%g7, 0, %psr
-	sethi		%hi(0x01000000), %g2
-	nop
-	nop
-#ifdef CONFIG_SMP
-1:	ldstub		[%g1 + 4], %g7
-	tst		%g7
-	bne		1b
-	 ld		[%g1], %g7
-	sub		%g7, %g2, %g7
-	st		%g7, [%g1]
-	stb		%g0, [%g1 + 4]
-#else
-	ld		[%g1], %g7
-	sub		%g7, %g2, %g7
-	st		%g7, [%g1]
-#endif
-	wr		%g3, 0, %psr
-	add		%g7, %g2, %g7
-	nop
-	nop
-	subcc		%g7, %g2, %g7
-	bne		3f
-	 nop
-2:	jmpl		%o7, %g0
-	 mov		%g4, %o7
-3:	save		%sp, -64, %sp
-	mov		%g1, %l1
-	mov		%g4, %l4
-	bcs		4f
-	 mov		%g5, %l5
-	call		down_write_failed
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		___down_write
-	 restore	%l5, %g0, %g5
-4:	call		down_write_failed_biased
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		2b
-	 restore	%l5, %g0, %g5
-
-	.text
-	.globl		___up_read
-___up_read:
-	rd		%psr, %g3
-	nop
-	nop
-	nop
-	or		%g3, PSR_PIL, %g7
-	wr		%g7, 0, %psr
-	nop
-	nop
-	nop
-#ifdef CONFIG_SMP
-1:	ldstub		[%g1 + 4], %g7
-	tst		%g7
-	bne		1b
-	 ld		[%g1], %g7
-	add		%g7, 1, %g7
-	st		%g7, [%g1]
-	stb		%g0, [%g1 + 4]
-#else
-	ld		[%g1], %g7
-	add		%g7, 1, %g7
-	st		%g7, [%g1]
-#endif
-	wr		%g3, 0, %psr
-	nop
-	nop
-	nop
-	cmp		%g7, 0
-	be		3f
-	 nop
-2:	jmpl		%o7, %g0
-	 mov		%g4, %o7
-3:	save		%sp, -64, %sp
-	mov		%g1, %l1
-	mov		%g4, %l4
-	mov		%g5, %l5
-	clr		%o1
-	call		__rwsem_wake
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		2b
-	 restore	%l5, %g0, %g5
-
-	.globl		___up_write
-___up_write:
-	rd		%psr, %g3
-	nop
-	nop
-	nop
-	or		%g3, PSR_PIL, %g7
-	wr		%g7, 0, %psr
-	sethi		%hi(0x01000000), %g2
-	nop
-	nop
-#ifdef CONFIG_SMP
-1:	ldstub		[%g1 + 4], %g7
-	tst		%g7
-	bne		1b
-	 ld		[%g1], %g7
-	add		%g7, %g2, %g7
-	st		%g7, [%g1]
-	stb		%g0, [%g1 + 4]
-#else
-	ld		[%g1], %g7
-	add		%g7, %g2, %g7
-	st		%g7, [%g1]
-#endif
-	wr		%g3, 0, %psr
-	sub		%g7, %g2, %g7
-	nop
-	nop
-	addcc		%g7, %g2, %g7
-	bcs		3f
-	 nop
-2:	jmpl		%o7, %g0
-	 mov		%g4, %o7
-3:	save		%sp, -64, %sp
-	mov		%g1, %l1
-	mov		%g4, %l4
-	mov		%g5, %l5
-	mov		%g7, %o1
-	call		__rwsem_wake
-	 mov		%l1, %o0
-	mov		%l1, %g1
-	mov		%l4, %g4
-	ba		2b
-	 restore	%l5, %g0, %g5
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2f6ae1d1fb6..e10cd03fab8 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void)
 	for (i = 0; i < NR_CPUS; i++)
 		numa_cpu_lookup_table[i] = 0;
 
-	numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
+	cpumask_setall(&numa_cpumask_lookup_table[0]);
 }
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
 {
 	u64 arc;
 
-	cpus_clear(*mask);
+	cpumask_clear(mask);
 
 	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
 		u64 target = mdesc_arc_target(md, arc);
@@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
 			continue;
 		id = mdesc_get_property(md, target, "id", NULL);
 		if (*id < nr_cpu_ids)
-			cpu_set(*id, *mask);
+			cpumask_set_cpu(*id, mask);
 	}
 }
 
@@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
 
 	numa_parse_mdesc_group_cpus(md, grp, &mask);
 
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, &mask)
 		numa_cpu_lookup_table[cpu] = index;
-	numa_cpumask_lookup_table[index] = mask;
+	cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
 
 	if (numa_debug) {
 		printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
-		for_each_cpu_mask(cpu, mask)
+		for_each_cpu(cpu, &mask)
 			printk("%d ", cpu);
 		printk("]\n");
 	}
@@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void)
 	index = 0;
 	for_each_present_cpu(cpu) {
 		numa_cpu_lookup_table[cpu] = index;
-		numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu);
+		cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
 		node_masks[index].mask = ~((1UL << 36UL) - 1UL);
 		node_masks[index].val = cpu << 36UL;
 
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a4293102ef8..c52224d5ed4 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
-	/*
-	 * Nothing to do here; when we return from interrupt, the
-	 * rescheduling will occur there. But do bump the interrupt
-	 * profiler count in the meantime.
-	 */
 	__get_cpu_var(irq_stat).irq_resched_count++;
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index 90a438acbfa..b5e675e370c 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -47,7 +47,7 @@ config HOSTFS
 
 config HPPFS
 	tristate "HoneyPot ProcFS (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && PROC_FS
 	help
 	  hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
 	  entries to be overridden, removed, or fabricated from the host.
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index e2cf786bda0..5bd1bad33fa 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -49,7 +49,10 @@ static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
 	unsigned long mask = THREAD_SIZE - 1;
-	ti = (struct thread_info *) (((unsigned long) &ti) & ~mask);
+	void *p;
+
+	asm volatile ("" : "=r" (p) : "0" (&ti));
+	ti = (struct thread_info *) (((unsigned long)p) & ~mask);
 	return ti;
 }
 
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 106bf27e2a9..eefb107d2d7 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'R':
-			set_tsk_need_resched(current);
+			scheduler_ipi();
 			break;
 
 		case 'S':
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 6ea77979531..42827cafa6a 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -5,6 +5,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <string.h>
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len)
 		 host.release, host.version, host.machine);
 }
 
+/*
+ * We cannot use glibc's abort(). It makes use of tgkill() which
+ * has no effect within UML's kernel threads.
+ * After that glibc would execute an invalid instruction to kill
+ * the calling process and UML crashes with SIGSEGV.
+ */
+static inline void __attribute__ ((noreturn)) uml_abort(void)
+{
+	sigset_t sig;
+
+	fflush(NULL);
+
+	if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT))
+		sigprocmask(SIG_UNBLOCK, &sig, 0);
+
+	for (;;)
+		if (kill(getpid(), SIGABRT) < 0)
+			exit(127);
+}
+
 void os_dump_core(void)
 {
 	int pid;
@@ -116,5 +137,5 @@ void os_dump_core(void)
 	while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0)
 		os_kill_ptraced_process(pid, 0);
 
-	abort();
+	uml_abort();
 }
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 804b28dd032..b1da91c1b20 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -4,7 +4,7 @@
 
 obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
-	sys_call_table.o tls.o
+	sys_call_table.o tls.o atomic64_cx8_32.o
 
 obj-$(CONFIG_BINFMT_ELF) += elfcore.o
 
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
new file mode 100644
index 00000000000..1e901d3d4a9
--- /dev/null
+++ b/arch/um/sys-i386/atomic64_cx8_32.S
@@ -0,0 +1,225 @@
+/*
+ * atomic64_t for 586+
+ *
+ * Copied from arch/x86/lib/atomic64_cx8_32.S
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+
+.macro SAVE reg
+	pushl_cfi %\reg
+	CFI_REL_OFFSET \reg, 0
+.endm
+
+.macro RESTORE reg
+	popl_cfi %\reg
+	CFI_RESTORE \reg
+.endm
+
+.macro read64 reg
+	movl %ebx, %eax
+	movl %ecx, %edx
+/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
+	LOCK_PREFIX
+	cmpxchg8b (\reg)
+.endm
+
+ENTRY(atomic64_read_cx8)
+	CFI_STARTPROC
+
+	read64 %ecx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_read_cx8)
+
+ENTRY(atomic64_set_cx8)
+	CFI_STARTPROC
+
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_set_cx8)
+
+ENTRY(atomic64_xchg_cx8)
+	CFI_STARTPROC
+
+	movl %ebx, %eax
+	movl %ecx, %edx
+1:
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_xchg_cx8)
+
+.macro addsub_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+	SAVE esi
+	SAVE edi
+
+	movl %eax, %esi
+	movl %edx, %edi
+	movl %ecx, %ebp
+
+	read64 %ebp
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l %esi, %ebx
+	\insc\()l %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE edi
+	RESTORE esi
+	RESTORE ebx
+	RESTORE ebp
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+addsub_return add add adc
+addsub_return sub sub sbb
+
+.macro incdec_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l $1, %ebx
+	\insc\()l $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+incdec_return inc add adc
+incdec_return dec sub sbb
+
+ENTRY(atomic64_dec_if_positive_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	subl $1, %ebx
+	sbb $0, %ecx
+	js 2f
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+2:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_dec_if_positive_cx8)
+
+ENTRY(atomic64_add_unless_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+/* these just push these two parameters on the stack */
+	SAVE edi
+	SAVE esi
+
+	movl %ecx, %ebp
+	movl %eax, %esi
+	movl %edx, %edi
+
+	read64 %ebp
+1:
+	cmpl %eax, 0(%esp)
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl %esi, %ebx
+	adcl %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+	movl $1, %eax
+3:
+	addl $8, %esp
+	CFI_ADJUST_CFA_OFFSET -8
+	RESTORE ebx
+	RESTORE ebp
+	ret
+4:
+	cmpl %edx, 4(%esp)
+	jne 2b
+	xorl %eax, %eax
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_add_unless_cx8)
+
+ENTRY(atomic64_inc_not_zero_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	testl %eax, %eax
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl $1, %ebx
+	adcl $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	movl $1, %eax
+3:
+	RESTORE ebx
+	ret
+4:
+	testl %edx, %edx
+	jne 2b
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c
index 2aa30a364bb..d4efa7d679f 100644
--- a/arch/unicore32/kernel/irq.c
+++ b/arch/unicore32/kernel/irq.c
@@ -23,7 +23,7 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 
 #include <asm/system.h>
@@ -237,7 +237,7 @@ static struct puv3_irq_state {
 	unsigned int	iccr;
 } puv3_irq_state;
 
-static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int puv3_irq_suspend(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -265,7 +265,7 @@ static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int puv3_irq_resume(struct sys_device *dev)
+static void puv3_irq_resume(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -278,27 +278,20 @@ static int puv3_irq_resume(struct sys_device *dev)
 
 		writel(st->icmr, INTC_ICMR);
 	}
-	return 0;
 }
 
-static struct sysdev_class puv3_irq_sysclass = {
-	.name		= "pkunity-irq",
+static struct syscore_ops puv3_irq_syscore_ops = {
 	.suspend	= puv3_irq_suspend,
 	.resume		= puv3_irq_resume,
 };
 
-static struct sys_device puv3_irq_device = {
-	.id		= 0,
-	.cls		= &puv3_irq_sysclass,
-};
-
-static int __init puv3_irq_init_devicefs(void)
+static int __init puv3_irq_init_syscore(void)
 {
-	sysdev_class_register(&puv3_irq_sysclass);
-	return sysdev_register(&puv3_irq_device);
+	register_syscore_ops(&puv3_irq_syscore_ops);
+	return 0;
 }
 
-device_initcall(puv3_irq_init_devicefs);
+device_initcall(puv3_irq_init_syscore);
 
 void __init init_IRQ(void)
 {
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 254e36fa951..b9a26465e72 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread,
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on UniCore */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b75..0e9dec6cadd 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
 obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
+obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bf..880fcb6c86f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -8,6 +8,7 @@ config 64BIT
 
 config X86_32
 	def_bool !64BIT
+	select CLKSRC_I8253
 
 config X86_64
 	def_bool 64BIT
@@ -71,7 +72,7 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
-	select ARCH_NO_SYSDEV_OPS
+	select HAVE_BPF_JIT if (X86_64 && NET)
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
@@ -112,7 +113,14 @@ config MMU
 	def_bool y
 
 config ZONE_DMA
-	def_bool y
+	bool "DMA memory allocation support" if EXPERT
+	default y
+	help
+	  DMA memory allocation support allows devices with less than 32-bit
+	  addressing to allocate within the first 16MB of address space.
+	  Disable if no such devices will be used.
+
+	  If unsure, say Y.
 
 config SBUS
 	bool
@@ -365,17 +373,6 @@ config X86_UV
 # Following is an alphabetically sorted list of 32 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
 
-config X86_ELAN
-	bool "AMD Elan"
-	depends on X86_32
-	depends on X86_EXTENDED_PLATFORM
-	---help---
-	  Select this for an AMD Elan processor.
-
-	  Do not use this option for K6/Athlon/Opteron processors!
-
-	  If unsure, choose "PC-compatible" instead.
-
 config X86_INTEL_CE
 	bool "CE4100 TV platform"
 	depends on PCI
@@ -690,6 +687,7 @@ config AMD_IOMMU
 	bool "AMD IOMMU support"
 	select SWIOTLB
 	select PCI_MSI
+	select PCI_IOV
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
@@ -1174,7 +1172,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 config AMD_NUMA
 	def_bool y
 	prompt "Old style AMD Opteron NUMA detection"
-	depends on X86_64 && NUMA && PCI
+	depends on NUMA && PCI
 	---help---
 	  Enable AMD NUMA node topology detection.  You should say Y here if
 	  you have a multi processor AMD system. This uses an old method to
@@ -1201,7 +1199,7 @@ config NODES_SPAN_OTHER_NODES
 
 config NUMA_EMU
 	bool "NUMA emulation"
-	depends on X86_64 && NUMA
+	depends on NUMA
 	---help---
 	  Enable NUMA emulation. A flat machine will be split
 	  into virtual nodes when booted with "numa=fake=N", where N is the
@@ -1223,6 +1221,10 @@ config HAVE_ARCH_BOOTMEM
 	def_bool y
 	depends on X86_32 && NUMA
 
+config HAVE_ARCH_ALLOC_REMAP
+	def_bool y
+	depends on X86_32 && NUMA
+
 config ARCH_HAVE_MEMORY_PRESENT
 	def_bool y
 	depends on X86_32 && DISCONTIGMEM
@@ -1231,13 +1233,9 @@ config NEED_NODE_MEMMAP_SIZE
 	def_bool y
 	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
 
-config HAVE_ARCH_ALLOC_REMAP
-	def_bool y
-	depends on X86_32 && NUMA
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
+	depends on X86_32 && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
@@ -1247,20 +1245,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA && X86_32
 
-config ARCH_PROC_KCORE_TEXT
-	def_bool y
-	depends on X86_64 && PROC_KCORE
-
-config ARCH_SPARSEMEM_DEFAULT
-	def_bool y
-	depends on X86_64
-
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on X86_64
+
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
@@ -1269,6 +1263,10 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on X86_64 && PROC_KCORE
+
 config ILLEGAL_POINTER_VALUE
        hex
        default 0 if X86_32
@@ -1703,10 +1701,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	def_bool X86_64
-	depends on NUMA
-
 config USE_PERCPU_NUMA_NODE_ID
 	def_bool y
 	depends on NUMA
@@ -1848,7 +1842,7 @@ config APM_ALLOW_INTS
 
 endif # APM
 
-source "arch/x86/kernel/cpu/cpufreq/Kconfig"
+source "drivers/cpufreq/Kconfig"
 
 source "drivers/cpuidle/Kconfig"
 
@@ -2076,7 +2070,7 @@ config OLPC
 	depends on !X86_PAE
 	select GPIOLIB
 	select OF
-	select OF_PROMTREE if PROC_DEVICETREE
+	select OF_PROMTREE
 	---help---
 	  Add support for detecting the unique features of the OLPC
 	  XO hardware.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index d161e939df6..6a7cfdf8ff6 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -1,6 +1,4 @@
 # Put here option for CPU selection and depending optimization
-if !X86_ELAN
-
 choice
 	prompt "Processor family"
 	default M686 if X86_32
@@ -203,6 +201,14 @@ config MWINCHIP3D
 	  stores for this CPU, which can increase performance of some
 	  operations.
 
+config MELAN
+	bool "AMD Elan"
+	depends on X86_32
+	---help---
+	  Select this for an AMD Elan processor.
+
+	  Do not use this option for K6/Athlon/Opteron processors!
+
 config MGEODEGX1
 	bool "GeodeGX1"
 	depends on X86_32
@@ -292,8 +298,6 @@ config X86_GENERIC
 	  This is really intended for distributors who need more
 	  generic optimizations.
 
-endif
-
 #
 # Define implied options from the CPU selection here
 config X86_INTERNODE_CACHE_SHIFT
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+	default "4" if MELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
 config X86_XADD
@@ -358,7 +362,7 @@ config X86_POPAD_OK
 
 config X86_ALIGNMENT_16
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
 
 config X86_INTEL_USERCOPY
 	def_bool y
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index f2ee1abb1df..86cee7b749e 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=
 	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
-cflags-$(CONFIG_X86_ELAN)	+= -march=i486
+cflags-$(CONFIG_MELAN)		+= -march=i486
 
 # Geode GX1 support
 cflags-$(CONFIG_MGEODEGX1)	+= -march=pentium-mmx
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035..db75d07c364 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
 	if (oreg.ax > 15*1024) {
 		return -1;	/* Bogus! */
 	} else if (oreg.ax == 15*1024) {
-		boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+		boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
 	} else {
 		/*
 		 * This ignores memory above 16MB if we have a memory
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 1a58ad89fdf..c04f1b7a913 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
 # Arch-specific CryptoAPI modules.
 #
 
-obj-$(CONFIG_CRYPTO_FPU) += fpu.o
-
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2577613fb32..feee8ff1d05 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
+
+int crypto_fpu_init(void);
+void crypto_fpu_exit(void);
+
 #ifdef CONFIG_X86_64
 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
@@ -1257,6 +1261,8 @@ static int __init aesni_init(void)
 		return -ENODEV;
 	}
 
+	if ((err = crypto_fpu_init()))
+		goto fpu_err;
 	if ((err = crypto_register_alg(&aesni_alg)))
 		goto aes_err;
 	if ((err = crypto_register_alg(&__aesni_alg)))
@@ -1334,6 +1340,7 @@ blk_ecb_err:
 __aes_err:
 	crypto_unregister_alg(&aesni_alg);
 aes_err:
+fpu_err:
 	return err;
 }
 
@@ -1363,6 +1370,8 @@ static void __exit aesni_exit(void)
 	crypto_unregister_alg(&blk_ecb_alg);
 	crypto_unregister_alg(&__aesni_alg);
 	crypto_unregister_alg(&aesni_alg);
+
+	crypto_fpu_exit();
 }
 
 module_init(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 1a8f8649c03..98d7a188f46 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = {
 	.module = THIS_MODULE,
 };
 
-static int __init crypto_fpu_module_init(void)
+int __init crypto_fpu_init(void)
 {
 	return crypto_register_template(&crypto_fpu_tmpl);
 }
 
-static void __exit crypto_fpu_module_exit(void)
+void __exit crypto_fpu_exit(void)
 {
 	crypto_unregister_template(&crypto_fpu_tmpl);
 }
-
-module_init(crypto_fpu_module_init);
-module_exit(crypto_fpu_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71..95f5826be45 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
 	.quad compat_sys_open_by_handle_at
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
+	.quad compat_sys_sendmmsg	/* 345 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 12e0e7dd869..416d865eae3 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
 
 #define ARCH_HAS_POWER_INIT	1
 
-struct bootnode;
-
 #ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
 extern int x86_acpi_numa_init(void);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index a63a68be1cc..94d420b360d 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -15,4 +15,13 @@
 	.endm
 #endif
 
+.macro altinstruction_entry orig alt feature orig_len alt_len
+	.align 8
+	.quad \orig
+	.quad \alt
+	.word \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
 #endif  /*  __ASSEMBLY__  */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99..bf535f947e8 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
-#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -191,12 +190,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-#define IDEAL_NOP_SIZE_5 5
-extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
-extern void arch_init_ideal_nop5(void);
-#else
-static inline void arch_init_ideal_nop5(void) {}
-#endif
-
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc8111a0..55d95eb789b 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,13 +19,12 @@
 #ifndef _ASM_X86_AMD_IOMMU_PROTO_H
 #define _ASM_X86_AMD_IOMMU_PROTO_H
 
-struct amd_iommu;
+#include <asm/amd_iommu_types.h>
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
 extern int amd_iommu_init_devices(void);
@@ -44,4 +43,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
 	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
 }
 
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+		return false;
+
+	return !!(iommu->features & f);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc303b..4c998299541 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
 #define MMIO_CONTROL_OFFSET     0x0018
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
+#define MMIO_EXT_FEATURES	0x0030
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
 #define MMIO_EVT_TAIL_OFFSET	0x2018
 #define MMIO_STATUS_OFFSET	0x2020
 
+
+/* Extended Feature Bits */
+#define FEATURE_PREFETCH	(1ULL<<0)
+#define FEATURE_PPR		(1ULL<<1)
+#define FEATURE_X2APIC		(1ULL<<2)
+#define FEATURE_NX		(1ULL<<3)
+#define FEATURE_GT		(1ULL<<4)
+#define FEATURE_IA		(1ULL<<6)
+#define FEATURE_GA		(1ULL<<7)
+#define FEATURE_HE		(1ULL<<8)
+#define FEATURE_PC		(1ULL<<9)
+
 /* MMIO status bits */
 #define MMIO_STATUS_COM_WAIT_INT_MASK	0x04
 
@@ -113,7 +126,9 @@
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
-#define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_IOMMU_PAGES	0x03
+#define CMD_INV_IOTLB_PAGES	0x04
+#define CMD_INV_ALL		0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
@@ -215,6 +230,8 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+#define DTE_FLAG_IOTLB	0x01
+
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
@@ -227,6 +244,7 @@
 /* IOMMU capabilities */
 #define IOMMU_CAP_IOTLB   24
 #define IOMMU_CAP_NPCACHE 26
+#define IOMMU_CAP_EFR     27
 
 #define MAX_DOMAIN_ID 65536
 
@@ -249,6 +267,8 @@ extern bool amd_iommu_dump;
 
 /* global flag if IOMMUs cache non-present entries */
 extern bool amd_iommu_np_cache;
+/* Only true if all IOMMUs support device IOTLBs */
+extern bool amd_iommu_iotlb_sup;
 
 /*
  * Make iterating over all IOMMUs easier
@@ -371,6 +391,9 @@ struct amd_iommu {
 	/* flags read from acpi table */
 	u8 acpi_flags;
 
+	/* Extended features */
+	u64 features;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
@@ -409,9 +432,6 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;
 
-	/* becomes true if a command buffer reset is running */
-	bool reset_in_progress;
-
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 331682231bb..67f87f25761 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
 
 extern const struct pci_device_id amd_nb_misc_ids[];
 extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
-struct bootnode;
 
 extern bool early_is_amd_nb(u32 value);
 extern int amd_cache_northbridges(void);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2b7d573be54..a0c46f06121 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -363,7 +363,12 @@ struct apic {
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
 
-	/* determine CPU -> NUMA node mapping */
+	/*
+	 * Optional method called from setup_local_APIC() after logical
+	 * apicid is guaranteed to be known to initialize apicid -> node
+	 * mapping if NUMA initialization hasn't done so already.  Don't
+	 * add new users.
+	 */
 	int (*x86_32_numa_cpu_node)(int cpu);
 #endif
 };
@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-extern int default_x86_32_numa_cpu_node(int cpu);
-
 #endif
 
 static inline unsigned int
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3..34595d5e103 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
 #define		APIC_DEST_LOGICAL	0x00800
 #define		APIC_DEST_PHYSICAL	0x00000
 #define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
 #define		APIC_DM_LOWEST		0x00100
 #define		APIC_DM_SMI		0x00200
 #define		APIC_DM_REMRD		0x00300
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 3c7521063d3..aa6a3170ab5 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -4,16 +4,40 @@
 #include <asm/io.h>
 
 /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E.
+ * Returns physical address of EBDA.  Returns 0 if there is no EBDA.
  */
 static inline unsigned int get_bios_ebda(void)
 {
+	/*
+	 * There is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E.
+	 */
 	unsigned int address = *(unsigned short *)phys_to_virt(0x40E);
 	address <<= 4;
 	return address;	/* 0 means none */
 }
 
+/*
+ * Return the sanitized length of the EBDA in bytes, if it exists.
+ */
+static inline unsigned int get_bios_ebda_length(void)
+{
+	unsigned int address;
+	unsigned int length;
+
+	address = get_bios_ebda();
+	if (!address)
+		return 0;
+
+	/* EBDA length is byte 0 of the EBDA (stored in KiB) */
+	length = *(unsigned char *)phys_to_virt(address);
+	length <<= 10;
+
+	/* Trim the length if it extends beyond 640KiB */
+	length = min_t(unsigned int, (640 * 1024) - address, length);
+	return length;
+}
+
 void reserve_ebda_region(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf2..5dc6acc98db 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,8 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
@@ -207,8 +209,7 @@ extern const char * const x86_power_flags[32];
 #define test_cpu_cap(c, bit)						\
 	 test_bit(bit, (unsigned long *)((c)->x86_capability))
 
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
+#define REQUIRED_MASK_BIT_SET(bit)					\
 	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
 	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
 	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
@@ -218,10 +219,16 @@ extern const char * const x86_power_flags[32];
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\
 	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )	\
-	  ? 1 :								\
+	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
 	 test_cpu_cap(c, bit))
 
+#define this_cpu_has(bit)						\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
+	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 057099e5fab..0bdb0c54d9a 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -69,22 +69,18 @@
 
 #define MAX_DMA_CHANNELS	8
 
-#ifdef CONFIG_X86_32
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-#define MAX_DMA_ADDRESS      (PAGE_OFFSET + 0x1000000)
-
-#else
-
 /* 16MB ISA DMA zone */
 #define MAX_DMA_PFN   ((16 * 1024 * 1024) >> PAGE_SHIFT)
 
 /* 4GB broken PCI/AGP hardware bus master zone */
 #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
 
+#ifdef CONFIG_X86_32
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET + 0x1000000)
+#else
 /* Compat define for old dma zone */
 #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
 #endif
 
 /* 8237 DMA controllers */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8e4a16508d4..7093e4a6a0b 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
+extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern void efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be..268c783ab1c 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -38,11 +38,10 @@ extern void mcount(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
-	 * call mcount is "e8 <4 byte offset>"
-	 * The addr points to the 4 byte offset and the caller of this
-	 * function wants the pointer to e8. Simply subtract one.
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
 	 */
-	return addr - 1;
+	return addr;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h
index fc1f579fb96..65aaa91d585 100644
--- a/arch/x86/include/asm/i8253.h
+++ b/arch/x86/include/asm/i8253.h
@@ -6,6 +6,8 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH	LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc5..a97a240f67f 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
 extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 
-int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893..a32b18ce6ea 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,25 @@
 
 #include <linux/types.h>
 #include <asm/nops.h>
+#include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-
-# define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:"					\
-			JUMP_LABEL_INITIAL_NOP			\
-			".pushsection __jump_table,  \"aw\" \n\t"\
-			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
-			".popsection \n\t"			\
-			: :  "i" (key) :  : label);		\
-	} while (0)
+#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:"
+		JUMP_LABEL_INITIAL_NOP
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f521356432..0049211959c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -14,6 +14,8 @@
 #include <asm/desc_defs.h>
 
 struct x86_emulate_ctxt;
+enum x86_intercept;
+enum x86_intercept_stage;
 
 struct x86_exception {
 	u8 vector;
@@ -24,6 +26,24 @@ struct x86_exception {
 };
 
 /*
+ * This struct is used to carry enough information from the instruction
+ * decoder to main KVM so that a decision can be made whether the
+ * instruction needs to be intercepted or not.
+ */
+struct x86_instruction_info {
+	u8  intercept;          /* which intercept                      */
+	u8  rep_prefix;         /* rep prefix?                          */
+	u8  modrm_mod;		/* mod part of modrm			*/
+	u8  modrm_reg;          /* index of register used               */
+	u8  modrm_rm;		/* rm part of modrm			*/
+	u64 src_val;            /* value of source operand              */
+	u8  src_bytes;          /* size of source operand               */
+	u8  dst_bytes;          /* size of destination operand          */
+	u8  ad_bytes;           /* size of src/dst address              */
+	u64 next_rip;           /* rip following the instruction        */
+};
+
+/*
  * x86_emulate_ops:
  *
  * These operations represent the instruction emulator's interface to memory.
@@ -62,6 +82,7 @@ struct x86_exception {
 #define X86EMUL_RETRY_INSTR     3 /* retry the instruction for some reason */
 #define X86EMUL_CMPXCHG_FAILED  4 /* cmpxchg did not see expected value */
 #define X86EMUL_IO_NEEDED       5 /* IO is needed to complete emulation */
+#define X86EMUL_INTERCEPTED     6 /* Intercepted by nested VMCB/VMCS */
 
 struct x86_emulate_ops {
 	/*
@@ -71,8 +92,9 @@ struct x86_emulate_ops {
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
 	 */
-	int (*read_std)(unsigned long addr, void *val,
-			unsigned int bytes, struct kvm_vcpu *vcpu,
+	int (*read_std)(struct x86_emulate_ctxt *ctxt,
+			unsigned long addr, void *val,
+			unsigned int bytes,
 			struct x86_exception *fault);
 
 	/*
@@ -82,8 +104,8 @@ struct x86_emulate_ops {
 	 *  @val:   [OUT] Value write to memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to write to memory.
 	 */
-	int (*write_std)(unsigned long addr, void *val,
-			 unsigned int bytes, struct kvm_vcpu *vcpu,
+	int (*write_std)(struct x86_emulate_ctxt *ctxt,
+			 unsigned long addr, void *val, unsigned int bytes,
 			 struct x86_exception *fault);
 	/*
 	 * fetch: Read bytes of standard (non-emulated/special) memory.
@@ -92,8 +114,8 @@ struct x86_emulate_ops {
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
 	 */
-	int (*fetch)(unsigned long addr, void *val,
-		     unsigned int bytes, struct kvm_vcpu *vcpu,
+	int (*fetch)(struct x86_emulate_ctxt *ctxt,
+		     unsigned long addr, void *val, unsigned int bytes,
 		     struct x86_exception *fault);
 
 	/*
@@ -102,11 +124,9 @@ struct x86_emulate_ops {
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
 	 */
-	int (*read_emulated)(unsigned long addr,
-			     void *val,
-			     unsigned int bytes,
-			     struct x86_exception *fault,
-			     struct kvm_vcpu *vcpu);
+	int (*read_emulated)(struct x86_emulate_ctxt *ctxt,
+			     unsigned long addr, void *val, unsigned int bytes,
+			     struct x86_exception *fault);
 
 	/*
 	 * write_emulated: Write bytes to emulated/special memory area.
@@ -115,11 +135,10 @@ struct x86_emulate_ops {
 	 *                required).
 	 *  @bytes: [IN ] Number of bytes to write to memory.
 	 */
-	int (*write_emulated)(unsigned long addr,
-			      const void *val,
+	int (*write_emulated)(struct x86_emulate_ctxt *ctxt,
+			      unsigned long addr, const void *val,
 			      unsigned int bytes,
-			      struct x86_exception *fault,
-			      struct kvm_vcpu *vcpu);
+			      struct x86_exception *fault);
 
 	/*
 	 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -129,40 +148,54 @@ struct x86_emulate_ops {
 	 *  @new:   [IN ] Value to write to @addr.
 	 *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
 	 */
-	int (*cmpxchg_emulated)(unsigned long addr,
+	int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt,
+				unsigned long addr,
 				const void *old,
 				const void *new,
 				unsigned int bytes,
-				struct x86_exception *fault,
-				struct kvm_vcpu *vcpu);
-
-	int (*pio_in_emulated)(int size, unsigned short port, void *val,
-			       unsigned int count, struct kvm_vcpu *vcpu);
-
-	int (*pio_out_emulated)(int size, unsigned short port, const void *val,
-				unsigned int count, struct kvm_vcpu *vcpu);
-
-	bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3,
-				      int seg, struct kvm_vcpu *vcpu);
-	void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3,
-				      int seg, struct kvm_vcpu *vcpu);
-	u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu);
-	void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
-	unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu);
-	void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
-	void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
-	ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
-	int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
-	int (*cpl)(struct kvm_vcpu *vcpu);
-	int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu);
-	int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
-	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
-	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+				struct x86_exception *fault);
+	void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr);
+
+	int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt,
+			       int size, unsigned short port, void *val,
+			       unsigned int count);
+
+	int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt,
+				int size, unsigned short port, const void *val,
+				unsigned int count);
+
+	bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector,
+			    struct desc_struct *desc, u32 *base3, int seg);
+	void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector,
+			    struct desc_struct *desc, u32 base3, int seg);
+	unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt,
+						 int seg);
+	void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+	void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+	void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+	void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
+	ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
+	int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
+	int (*cpl)(struct x86_emulate_ctxt *ctxt);
+	int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
+	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
+	int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
+	int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
+	void (*halt)(struct x86_emulate_ctxt *ctxt);
+	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
+	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
+	void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
+	void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
+	int (*intercept)(struct x86_emulate_ctxt *ctxt,
+			 struct x86_instruction_info *info,
+			 enum x86_intercept_stage stage);
 };
 
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+	enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
 	unsigned int bytes;
 	union {
 		unsigned long orig_val;
@@ -174,11 +207,13 @@ struct operand {
 			ulong ea;
 			unsigned seg;
 		} mem;
+		unsigned xmm;
 	} addr;
 	union {
 		unsigned long val;
 		u64 val64;
 		char valptr[sizeof(unsigned long) + 2];
+		sse128_t vec_val;
 	};
 };
 
@@ -197,6 +232,7 @@ struct read_cache {
 struct decode_cache {
 	u8 twobyte;
 	u8 b;
+	u8 intercept;
 	u8 lock_prefix;
 	u8 rep_prefix;
 	u8 op_bytes;
@@ -209,6 +245,7 @@ struct decode_cache {
 	u8 seg_override;
 	unsigned int d;
 	int (*execute)(struct x86_emulate_ctxt *ctxt);
+	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 	unsigned long regs[NR_VCPU_REGS];
 	unsigned long eip;
 	/* modrm */
@@ -227,17 +264,15 @@ struct x86_emulate_ctxt {
 	struct x86_emulate_ops *ops;
 
 	/* Register state before/after emulation. */
-	struct kvm_vcpu *vcpu;
-
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
-	u32 cs_base;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
 
+	bool guest_mode; /* guest running a nested guest */
 	bool perm_ok; /* do not check permissions if true */
 	bool only_vendor_specific_insn;
 
@@ -249,8 +284,8 @@ struct x86_emulate_ctxt {
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX	1
-#define REPNE_PREFIX	2
+#define REPE_PREFIX	0xf3
+#define REPNE_PREFIX	0xf2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
@@ -259,6 +294,69 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
 
+/* any protected mode   */
+#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
+			       X86EMUL_MODE_PROT64)
+
+enum x86_intercept_stage {
+	X86_ICTP_NONE = 0,   /* Allow zero-init to not match anything */
+	X86_ICPT_PRE_EXCEPT,
+	X86_ICPT_POST_EXCEPT,
+	X86_ICPT_POST_MEMACCESS,
+};
+
+enum x86_intercept {
+	x86_intercept_none,
+	x86_intercept_cr_read,
+	x86_intercept_cr_write,
+	x86_intercept_clts,
+	x86_intercept_lmsw,
+	x86_intercept_smsw,
+	x86_intercept_dr_read,
+	x86_intercept_dr_write,
+	x86_intercept_lidt,
+	x86_intercept_sidt,
+	x86_intercept_lgdt,
+	x86_intercept_sgdt,
+	x86_intercept_lldt,
+	x86_intercept_sldt,
+	x86_intercept_ltr,
+	x86_intercept_str,
+	x86_intercept_rdtsc,
+	x86_intercept_rdpmc,
+	x86_intercept_pushf,
+	x86_intercept_popf,
+	x86_intercept_cpuid,
+	x86_intercept_rsm,
+	x86_intercept_iret,
+	x86_intercept_intn,
+	x86_intercept_invd,
+	x86_intercept_pause,
+	x86_intercept_hlt,
+	x86_intercept_invlpg,
+	x86_intercept_invlpga,
+	x86_intercept_vmrun,
+	x86_intercept_vmload,
+	x86_intercept_vmsave,
+	x86_intercept_vmmcall,
+	x86_intercept_stgi,
+	x86_intercept_clgi,
+	x86_intercept_skinit,
+	x86_intercept_rdtscp,
+	x86_intercept_icebp,
+	x86_intercept_wbinvd,
+	x86_intercept_monitor,
+	x86_intercept_mwait,
+	x86_intercept_rdmsr,
+	x86_intercept_wrmsr,
+	x86_intercept_in,
+	x86_intercept_ins,
+	x86_intercept_out,
+	x86_intercept_outs,
+
+	nr_x86_intercepts
+};
+
 /* Host execution mode. */
 #if defined(CONFIG_X86_32)
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
@@ -270,6 +368,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
 #define EMULATION_FAILED -1
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
+#define EMULATION_INTERCEPTED 2
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 			 u16 tss_selector, int reason,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c8af0991fdf..d2ac8e2ee89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,14 +30,30 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
 
+#define CR0_RESERVED_BITS                                               \
+	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
+			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
+			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
+
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |	\
 				  0xFFFFFF0000000000ULL)
+#define CR4_RESERVED_BITS                                               \
+	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
+			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
+			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+			  | X86_CR4_OSXSAVE \
+			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+
+#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+
 
 #define INVALID_PAGE (~(hpa_t)0)
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
@@ -118,6 +134,9 @@ enum kvm_reg {
 enum kvm_reg_ex {
 	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
 	VCPU_EXREG_CR3,
+	VCPU_EXREG_RFLAGS,
+	VCPU_EXREG_CPL,
+	VCPU_EXREG_SEGMENTS,
 };
 
 enum {
@@ -256,7 +275,7 @@ struct kvm_mmu {
 			 struct kvm_mmu_page *sp);
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
 	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			u64 *spte, const void *pte, unsigned long mmu_seq);
+			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
 	int root_level;
 	int shadow_root_level;
@@ -340,7 +359,6 @@ struct kvm_vcpu_arch {
 	struct fpu guest_fpu;
 	u64 xcr0;
 
-	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
 
@@ -367,18 +385,22 @@ struct kvm_vcpu_arch {
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
+	bool emulate_regs_need_sync_to_vcpu;
+	bool emulate_regs_need_sync_from_vcpu;
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
-	u64 last_host_tsc;
 	u64 last_guest_tsc;
 	u64 last_kernel_ns;
 	u64 last_tsc_nsec;
 	u64 last_tsc_write;
+	u32 virtual_tsc_khz;
 	bool tsc_catchup;
+	u32  tsc_catchup_mult;
+	s8   tsc_catchup_shift;
 
 	bool nmi_pending;
 	bool nmi_injected;
@@ -448,9 +470,6 @@ struct kvm_arch {
 	u64 last_tsc_nsec;
 	u64 last_tsc_offset;
 	u64 last_tsc_write;
-	u32 virtual_tsc_khz;
-	u32 virtual_tsc_mult;
-	s8 virtual_tsc_shift;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
@@ -502,6 +521,8 @@ struct kvm_vcpu_stat {
 	u32 nmi_injections;
 };
 
+struct x86_instruction_info;
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
@@ -586,9 +607,17 @@ struct kvm_x86_ops {
 
 	bool (*has_wbinvd_exit)(void);
 
+	void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
+	u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
+
 	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+
+	int (*check_intercept)(struct kvm_vcpu *vcpu,
+			       struct x86_instruction_info *info,
+			       enum x86_intercept_stage stage);
+
 	const struct trace_print_flags *exit_reasons_str;
 };
 
@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
+/* control of guest tsc rate supported? */
+extern bool kvm_has_tsc_control;
+/* minimum supported tsc_khz for guests */
+extern u32  kvm_min_guest_tsc_khz;
+/* maximum supported tsc_khz for guests */
+extern u32  kvm_max_guest_tsc_khz;
+
 enum emulation_result {
 	EMULATE_DONE,       /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
 	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
 }
 
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
-
 void kvm_enable_efer_bits(u64);
 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
@@ -657,8 +690,6 @@ struct x86_emulate_ctxt;
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
-int emulate_clts(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
-int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
-
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index eb16e94ae04..021979a6e23 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
 static inline void enable_p5_mce(void) {}
 #endif
 
-extern void (*x86_mce_decode_callback)(struct mce *m);
-
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 91df7c51806..5e83a416eca 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
 #include <asm/numaq.h>
-/* summit or generic arch */
-#include <asm/srat.h>
-
-extern int get_memcfg_numa_flat(void);
-/*
- * This allows any one NUMA architecture to be compiled
- * for, and still fall back to the flat function if it
- * fails.
- */
-static inline void get_memcfg_numa(void)
-{
-
-	if (get_memcfg_numaq())
-		return;
-	if (get_memcfg_from_srat())
-		return;
-	get_memcfg_numa_flat();
-}
 
 extern void resume_map_numa_kva(pgd_t *pgd);
 
 #else /* !CONFIG_NUMA */
 
-#define get_memcfg_numa get_memcfg_numa_flat
-
 static inline void resume_map_numa_kva(pgd_t *pgd) {}
 
 #endif /* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index 288b96f815a..b3f88d7867c 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -4,36 +4,13 @@
 #ifndef _ASM_X86_MMZONE_64_H
 #define _ASM_X86_MMZONE_64_H
 
-
 #ifdef CONFIG_NUMA
 
 #include <linux/mmdebug.h>
-
 #include <asm/smp.h>
 
-/* Simple perfect hash to map physical addresses to node numbers */
-struct memnode {
-	int shift;
-	unsigned int mapsize;
-	s16 *map;
-	s16 embedded_map[64 - 8];
-} ____cacheline_aligned; /* total size = 128 bytes */
-extern struct memnode memnode;
-#define memnode_shift memnode.shift
-#define memnodemap memnode.map
-#define memnodemapsize memnode.mapsize
-
 extern struct pglist_data *node_data[];
 
-static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
-{
-	unsigned nid;
-	VIRTUAL_BUG_ON(!memnodemap);
-	nid = memnodemap[addr >> memnode_shift];
-	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
-	return nid;
-}
-
 #define NODE_DATA(nid)		(node_data[nid])
 
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 67763c5d8b4..9eae7752ae9 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -35,7 +35,7 @@
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
+#elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
 #define MODULE_PROC_FAMILY "CRUSOE "
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3cce71413d0..485b4f1f079 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -118,6 +118,7 @@
    complete list. */
 
 #define MSR_AMD64_PATCH_LEVEL		0x0000008b
+#define MSR_AMD64_TSC_RATIO		0xc0000104
 #define MSR_AMD64_NB_CFG		0xc001001f
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index af788496020..405b4032a60 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -1,7 +1,13 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
-/* Define nops for use with alternative() */
+/*
+ * Define nops for use with alternative() and for tracing.
+ *
+ * *_NOP5_ATOMIC must be a single instruction.
+ */
+
+#define NOP_DS_PREFIX 0x3e
 
 /* generic versions from gas
    1: nop
@@ -13,14 +19,15 @@
    6: leal 0x00000000(%esi),%esi
    7: leal 0x00000000(,%esi,1),%esi
 */
-#define GENERIC_NOP1 ".byte 0x90\n"
-#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
+#define GENERIC_NOP1 0x90
+#define GENERIC_NOP2 0x89,0xf6
+#define GENERIC_NOP3 0x8d,0x76,0x00
+#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
+#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
+#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
+#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
 
 /* Opteron 64bit nops
    1: nop
@@ -29,13 +36,14 @@
    4: osp osp osp nop
 */
 #define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	".byte 0x66,0x90\n"
-#define K8_NOP3	".byte 0x66,0x66,0x90\n"
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5	K8_NOP3 K8_NOP2
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
+#define K8_NOP2	0x66,K8_NOP1
+#define K8_NOP3	0x66,K8_NOP2
+#define K8_NOP4	0x66,K8_NOP3
+#define K8_NOP5	K8_NOP3,K8_NOP2
+#define K8_NOP6	K8_NOP3,K8_NOP3
+#define K8_NOP7	K8_NOP4,K8_NOP3
+#define K8_NOP8	K8_NOP4,K8_NOP4
+#define K8_NOP5_ATOMIC 0x66,K8_NOP4
 
 /* K7 nops
    uses eax dependencies (arbitrary choice)
@@ -47,13 +55,14 @@
    7: leal 0x00000000(,%eax,1),%eax
 */
 #define K7_NOP1	GENERIC_NOP1
-#define K7_NOP2	".byte 0x8b,0xc0\n"
-#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5	K7_NOP4 ASM_NOP1
-#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7	".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8	K7_NOP7 ASM_NOP1
+#define K7_NOP2	0x8b,0xc0
+#define K7_NOP3	0x8d,0x04,0x20
+#define K7_NOP4	0x8d,0x44,0x20,0x00
+#define K7_NOP5	K7_NOP4,K7_NOP1
+#define K7_NOP6	0x8d,0x80,0,0,0,0
+#define K7_NOP7	0x8D,0x04,0x05,0,0,0,0
+#define K7_NOP8	K7_NOP7,K7_NOP1
+#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
 
 /* P6 nops
    uses eax dependencies (Intel-recommended choice)
@@ -69,52 +78,65 @@
 	There is kernel code that depends on this.
 */
 #define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+#define P6_NOP2	0x66,0x90
+#define P6_NOP3	0x0f,0x1f,0x00
+#define P6_NOP4	0x0f,0x1f,0x40,0
+#define P6_NOP5	0x0f,0x1f,0x44,0x00,0
+#define P6_NOP6	0x66,0x0f,0x1f,0x44,0x00,0
+#define P6_NOP7	0x0f,0x1f,0x80,0,0,0,0
+#define P6_NOP8	0x0f,0x1f,0x84,0x00,0,0,0,0
+#define P6_NOP5_ATOMIC P6_NOP5
+
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
 
 #if defined(CONFIG_MK7)
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_64)
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
 #else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
 #endif
 
 #define ASM_NOP_MAX 8
+#define NOP_ATOMIC5 (ASM_NOP_MAX+1)	/* Entry for the 5-byte atomic NOP */
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const *ideal_nops;
+extern void arch_init_ideal_nops(void);
+#endif
 
 #endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index a50fc9f493b..bfacd2ccf65 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,12 +1,24 @@
 #ifndef _ASM_X86_NUMA_H
 #define _ASM_X86_NUMA_H
 
+#include <linux/nodemask.h>
+
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 
 #ifdef CONFIG_NUMA
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
+#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
+
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
+extern int numa_off;
 
 /*
  * __apicid_to_node[] stores the raw mapping between physical apicid and
@@ -17,15 +29,27 @@
  * numa_cpu_node().
  */
 extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
 
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 	__apicid_to_node[apicid] = node;
 }
+
+extern int __cpuinit numa_cpu_node(int cpu);
+
 #else	/* CONFIG_NUMA */
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 }
+
+static inline int numa_cpu_node(int cpu)
+{
+	return NUMA_NO_NODE;
+}
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_X86_32
@@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
-extern void __init numa_init_array(void);
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
-static inline void numa_init_array(void)		{ }
 static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
@@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu)		{ }
 void debug_cpumask_set_cpu(int cpu, int node, bool enable);
 #endif
 
+#ifdef CONFIG_NUMA_EMU
+#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
+#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
+void numa_emu_cmdline(char *);
+#endif /* CONFIG_NUMA_EMU */
+
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index c6beed1ef10..e7d6b825474 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -1,16 +1,6 @@
 #ifndef _ASM_X86_NUMA_32_H
 #define _ASM_X86_NUMA_32_H
 
-extern int numa_off;
-
-extern int pxm_to_nid(int pxm);
-
-#ifdef CONFIG_NUMA
-extern int __cpuinit numa_cpu_node(int cpu);
-#else	/* CONFIG_NUMA */
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-#endif	/* CONFIG_NUMA */
-
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 344eb1790b4..0c05f7ae46e 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -1,42 +1,6 @@
 #ifndef _ASM_X86_NUMA_64_H
 #define _ASM_X86_NUMA_64_H
 
-#include <linux/nodemask.h>
-
-struct bootnode {
-	u64 start;
-	u64 end;
-};
-
-#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-
-extern int numa_off;
-
 extern unsigned long numa_free_all_bootmem(void);
-extern void setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end);
-
-#ifdef CONFIG_NUMA
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
-extern nodemask_t numa_nodes_parsed __initdata;
-
-extern int __cpuinit numa_cpu_node(int cpu);
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
-#endif /* CONFIG_NUMA_EMU */
-#else
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-#endif
 
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 37c516545ec..c3b3c322fd8 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -29,7 +29,7 @@
 #ifdef CONFIG_X86_NUMAQ
 
 extern int found_numaq;
-extern int get_memcfg_numaq(void);
+extern int numaq_numa_init(void);
 extern int pci_numaq_init(void);
 
 extern void *xquad_portio;
@@ -166,11 +166,6 @@ struct sys_cfg_data {
 
 void numaq_tsc_disable(void);
 
-#else
-static inline int get_memcfg_numaq(void)
-{
-	return 0;
-}
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* _ASM_X86_NUMAQ_H */
 
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index c5d3a5abbb9..24487712e0b 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void);
 /* check if OFW was detected during boot */
 extern bool olpc_ofw_present(void);
 
+extern void olpc_dt_build_devicetree(void);
+
 #else /* !CONFIG_OLPC */
 static inline void olpc_ofw_detect(void) { }
 static inline void setup_olpc_ofw_pgd(void) { }
-#endif /* !CONFIG_OLPC */
-
-#ifdef CONFIG_OF_PROMTREE
-extern void olpc_dt_build_devicetree(void);
-#else
 static inline void olpc_dt_build_devicetree(void) { }
-#endif
+#endif /* !CONFIG_OLPC */
 
 #endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d475b4398d8..53278b0dfdf 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -517,7 +517,7 @@ do {									\
 	typeof(o2) __o2 = o2;						\
 	typeof(o2) __n2 = n2;						\
 	typeof(o2) __dummy;						\
-	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,	\
+	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4,	\
 		       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",	\
 		       X86_FEATURE_CX16,				\
 		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
@@ -542,6 +542,33 @@ do {									\
 	old__;								\
 })
 
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+	int oldbit;
+
+	asm volatile("bt "__percpu_arg(2)",%1\n\t"
+			"sbb %0,%0"
+			: "=r" (oldbit)
+			: "m" (*(unsigned long *)addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr)			\
+	(__builtin_constant_p((nr))			\
+	 ? x86_this_cpu_constant_test_bit((nr), (addr))	\
+	 : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f3..d56187c6b83 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
 extern void native_pagetable_setup_start(pgd_t *base);
 extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h
new file mode 100644
index 00000000000..4950a0b1d09
--- /dev/null
+++ b/arch/x86/include/asm/probe_roms.h
@@ -0,0 +1,8 @@
+#ifndef _PROBE_ROMS_H_
+#define _PROBE_ROMS_H_
+struct pci_dev;
+
+extern void __iomem *pci_map_biosrom(struct pci_dev *pdev);
+extern void pci_unmap_biosrom(void __iomem *rom);
+extern size_t pci_biosrom_size(struct pci_dev *pdev);
+#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index a898a2b6e10..59ab4dffa37 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -60,6 +60,7 @@
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
+#define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
 
 /*
  * x86-64 Task Priority Register, CR8
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a..9756551ec76 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
  * executable.)
  */
 #define RESERVE_BRK(name,sz)						\
-	static void __section(.discard.text) __used			\
+	static void __section(.discard.text) __used notrace		\
 	__brk_reservation_fn_##name##__(void) {				\
 		asm volatile (						\
 			".pushsection .brk_reservation,\"aw\",@nobits;" \
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align);
 	type *name;					\
 	RESERVE_BRK(name, sizeof(type) * entries)
 
+extern void probe_roms(void);
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
-extern void probe_roms(void);
 
 #else
 void __init x86_64_start_kernel(char *real_mode);
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h
deleted file mode 100644
index b508d639d1a..00000000000
--- a/arch/x86/include/asm/srat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-
-#ifndef _ASM_X86_SRAT_H
-#define _ASM_X86_SRAT_H
-
-#ifdef CONFIG_ACPI_NUMA
-extern int get_memcfg_from_srat(void);
-#else
-static inline int get_memcfg_from_srat(void)
-{
-	return 0;
-}
-#endif
-
-#endif /* _ASM_X86_SRAT_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index d7e89c83645..70bbe39043a 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
-	void (*warning)(void *data, char *msg);
-	/* msg must contain %s for the symbol */
-	void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
 	void (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 12569e691ce..c2ff2a1d845 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void)
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define read_cr0()	(native_read_cr0())
-#define write_cr0(x)	(native_write_cr0(x))
-#define read_cr2()	(native_read_cr2())
-#define write_cr2(x)	(native_write_cr2(x))
-#define read_cr3()	(native_read_cr3())
-#define write_cr3(x)	(native_write_cr3(x))
-#define read_cr4()	(native_read_cr4())
-#define read_cr4_safe()	(native_read_cr4_safe())
-#define write_cr4(x)	(native_write_cr4(x))
-#define wbinvd()	(native_wbinvd())
+
+static inline unsigned long read_cr0(void)
+{
+	return native_read_cr0();
+}
+
+static inline void write_cr0(unsigned long x)
+{
+	native_write_cr0(x);
+}
+
+static inline unsigned long read_cr2(void)
+{
+	return native_read_cr2();
+}
+
+static inline void write_cr2(unsigned long x)
+{
+	native_write_cr2(x);
+}
+
+static inline unsigned long read_cr3(void)
+{
+	return native_read_cr3();
+}
+
+static inline void write_cr3(unsigned long x)
+{
+	native_write_cr3(x);
+}
+
+static inline unsigned long read_cr4(void)
+{
+	return native_read_cr4();
+}
+
+static inline unsigned long read_cr4_safe(void)
+{
+	return native_read_cr4_safe();
+}
+
+static inline void write_cr4(unsigned long x)
+{
+	native_write_cr4(x);
+}
+
+static inline void wbinvd(void)
+{
+	native_wbinvd();
+}
+
 #ifdef CONFIG_X86_64
-#define read_cr8()	(native_read_cr8())
-#define write_cr8(x)	(native_write_cr8(x))
-#define load_gs_index   native_load_gs_index
+
+static inline unsigned long read_cr8(void)
+{
+	return native_read_cr8();
+}
+
+static inline void write_cr8(unsigned long x)
+{
+	native_write_cr8(x);
+}
+
+static inline void load_gs_index(unsigned selector)
+{
+	native_load_gs_index(selector);
+}
+
 #endif
 
 /* Clear the 'TS' bit */
-#define clts()		(native_clts())
+static inline void clts(void)
+{
+	native_clts();
+}
 
 #endif/* CONFIG_PARAVIRT */
 
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 910a7084f7f..c00692476e9 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void);
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 #ifdef CONFIG_X86_32
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-
 #else
-
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-
 #endif
 
 /* sched_domains SD_NODE_INIT for NUMA machines */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762..99ddd148a76 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -6,7 +6,6 @@
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
@@ -42,7 +41,7 @@
  * Returns 0 if the range is valid, nonzero otherwise.
  *
  * This is equivalent to the following test:
- * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
+ * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
  *
  * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
  */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 088d09fb161..566e803cc60 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -6,7 +6,6 @@
  */
 #include <linux/errno.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 316708d5af9..1c66d30971a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -6,7 +6,6 @@
  */
 #include <linux/compiler.h>
 #include <linux/errno.h>
-#include <linux/prefetch.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e597..fb6a625c99b 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
 #define __NR_open_by_handle_at  342
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
+#define __NR_sendmmsg		345
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 345
+#define NR_syscalls 346
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd57..79f90eb15aa 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs                             306
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg				307
+__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c..130f1eeee5f 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
 /* after this # consecutive successes, bump up the throttle if it was lowered */
 #define COMPLETE_THRESHOLD 5
 
+#define UV_LB_SUBNODEID 0x10
+
 /*
  * number of entries in the destination side payload queue
  */
@@ -124,7 +126,7 @@
  * The distribution specification (32 bytes) is interpreted as a 256-bit
  * distribution vector. Adjacent bits correspond to consecutive even numbered
  * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nodeid' field of the header corresponds to the
+ * 'base_dest_nasid' field of the header corresponds to the
  * destination nodeID associated with that specified bit.
  */
 struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
 struct bau_msg_header {
 	unsigned int dest_subnodeid:6;	/* must be 0x10, for the LB */
 	/* bits 5:0 */
-	unsigned int base_dest_nodeid:15; /* nasid of the */
+	unsigned int base_dest_nasid:15; /* nasid of the */
 	/* bits 20:6 */			  /* first bit in uvhub map */
 	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
 	unsigned long d_rcanceled; /* number of messages canceled by resets */
 };
 
+struct hub_and_pnode {
+	short uvhub;
+	short pnode;
+};
 /*
  * one per-cpu; to locate the software tables
  */
@@ -399,10 +405,12 @@ struct bau_control {
 	int baudisabled;
 	int set_bau_off;
 	short cpu;
+	short osnode;
 	short uvhub_cpu;
 	short uvhub;
 	short cpus_in_socket;
 	short cpus_in_uvhub;
+	short partition_base_pnode;
 	unsigned short message_number;
 	unsigned short uvhub_quiesce;
 	short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
 	int congested_period;
 	cycles_t period_time;
 	long period_requests;
+	struct hub_and_pnode *target_hub_and_pnode;
 };
 
 static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
 {
 	return constant_test_bit(uvhub, &dstp->bits[0]);
 }
-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
+static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
 {
-	__set_bit(uvhub, &dstp->bits[0]);
+	__set_bit(pnode, &dstp->bits[0]);
 }
 static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
 				    int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c233..4298002d0c8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
 	unsigned short	nr_online_cpus;
 	unsigned short	pnode;
 	short		memory_nid;
+	spinlock_t	nmi_lock;
+	unsigned long	nmi_count;
 };
 extern struct uv_blade_info *uv_blade_info;
 extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac745..f5bb64a823d 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long	v;
+    struct uvh_scratch5_s {
+	unsigned long	scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad..d3d859035af 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
 };
 
 /**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve:	reserve a range of addresses for kernel pagetable usage
+ *
+ * For more details on the purpose of this hook, look in
+ * init_memory_mapping and the commit that added it.
+ */
+struct x86_init_mapping {
+	void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_setup_start:	platform specific pre paging_init() call
  * @pagetable_setup_done:	platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
 	struct x86_init_mpparse		mpparse;
 	struct x86_init_irqs		irqs;
 	struct x86_init_oem		oem;
+	struct x86_init_mapping		mapping;
 	struct x86_init_paging		paging;
 	struct x86_init_timers		timers;
 	struct x86_init_iommu		iommu;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c61934fbf22..64a619d47d3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 					     unsigned long pfn_e);
 
-extern int m2p_add_override(unsigned long mfn, struct page *page);
-extern int m2p_remove_override(struct page *page);
+extern int m2p_add_override(unsigned long mfn, struct page *page,
+			    bool clear_pte);
+extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index aa862098916..4fbda9a3f33 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
 #endif
 #if defined(CONFIG_XEN_DOM0)
 void __init xen_setup_pirqs(void);
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
 static inline void __init xen_setup_pirqs(void)
 {
 }
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+						   uint16_t domain)
+{
+	return -1;
+}
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2218b..250806472a7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
-obj-$(CONFIG_X86_32)	+= probe_roms_32.o
+obj-y			+= probe_roms.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF)			+= devicetree.o
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
-	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
 	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 	obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
 
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ff93bc1b09c..18a857ba7a2 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str)
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
 			acpi_no_s4_hw_signature();
-		if (strncmp(str, "s4_nonvs", 8) == 0) {
-			pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
-					"please use acpi_sleep=nonvs instead");
-			acpi_nvs_nosave();
-		}
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e21..a81f2d52f86 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #define DPRINTK(fmt, args...) if (debug_alternative) \
 	printk(KERN_DEBUG fmt, args)
 
+/*
+ * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
+ * that correspond to that nop. Getting from one nop to the next, we
+ * add to the array the offset that is equal to the sum of all sizes of
+ * nops preceding the one we are after.
+ *
+ * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
+ * nice symmetry of sizes of the previous nops.
+ */
 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
-/* Use inline assembly to define this because the nops are defined
-   as inline assembly strings in the include files and we cannot
-   get them easily into strings. */
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
-	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-	GENERIC_NOP7 GENERIC_NOP8
-    "\t.previous");
-extern const unsigned char intelnops[];
-static const unsigned char *const __initconst_or_module
-intel_nops[ASM_NOP_MAX+1] = {
+static const unsigned char intelnops[] =
+{
+	GENERIC_NOP1,
+	GENERIC_NOP2,
+	GENERIC_NOP3,
+	GENERIC_NOP4,
+	GENERIC_NOP5,
+	GENERIC_NOP6,
+	GENERIC_NOP7,
+	GENERIC_NOP8,
+	GENERIC_NOP5_ATOMIC
+};
+static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	intelnops,
 	intelnops + 1,
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = {
 	intelnops + 1 + 2 + 3 + 4 + 5,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef K8_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
-	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-	K8_NOP7 K8_NOP8
-    "\t.previous");
-extern const unsigned char k8nops[];
-static const unsigned char *const __initconst_or_module
-k8_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k8nops[] =
+{
+	K8_NOP1,
+	K8_NOP2,
+	K8_NOP3,
+	K8_NOP4,
+	K8_NOP5,
+	K8_NOP6,
+	K8_NOP7,
+	K8_NOP8,
+	K8_NOP5_ATOMIC
+};
+static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k8nops,
 	k8nops + 1,
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = {
 	k8nops + 1 + 2 + 3 + 4 + 5,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
-	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-	K7_NOP7 K7_NOP8
-    "\t.previous");
-extern const unsigned char k7nops[];
-static const unsigned char *const __initconst_or_module
-k7_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k7nops[] =
+{
+	K7_NOP1,
+	K7_NOP2,
+	K7_NOP3,
+	K7_NOP4,
+	K7_NOP5,
+	K7_NOP6,
+	K7_NOP7,
+	K7_NOP8,
+	K7_NOP5_ATOMIC
+};
+static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k7nops,
 	k7nops + 1,
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = {
 	k7nops + 1 + 2 + 3 + 4 + 5,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef P6_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
-	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
-	P6_NOP7 P6_NOP8
-    "\t.previous");
-extern const unsigned char p6nops[];
-static const unsigned char *const __initconst_or_module
-p6_nops[ASM_NOP_MAX+1] = {
+static const unsigned char  __initconst_or_module p6nops[] =
+{
+	P6_NOP1,
+	P6_NOP2,
+	P6_NOP3,
+	P6_NOP4,
+	P6_NOP5,
+	P6_NOP6,
+	P6_NOP7,
+	P6_NOP8,
+	P6_NOP5_ATOMIC
+};
+static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	p6nops,
 	p6nops + 1,
@@ -147,47 +184,65 @@ p6_nops[ASM_NOP_MAX+1] = {
 	p6nops + 1 + 2 + 3 + 4 + 5,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
+/* Initialize these to a safe default */
 #ifdef CONFIG_X86_64
+const unsigned char * const *ideal_nops = p6_nops;
+#else
+const unsigned char * const *ideal_nops = intel_nops;
+#endif
 
-extern char __vsyscall_0;
-static const unsigned char *const *__init_or_module find_nop_table(void)
+void __init arch_init_ideal_nops(void)
 {
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return k8_nops;
-}
-
-#else /* CONFIG_X86_64 */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		/*
+		 * Due to a decoder implementation quirk, some
+		 * specific Intel CPUs actually perform better with
+		 * the "k8_nops" than with the SDM-recommended NOPs.
+		 */
+		if (boot_cpu_data.x86 == 6 &&
+		    boot_cpu_data.x86_model >= 0x0f &&
+		    boot_cpu_data.x86_model != 0x1c &&
+		    boot_cpu_data.x86_model != 0x26 &&
+		    boot_cpu_data.x86_model != 0x27 &&
+		    boot_cpu_data.x86_model < 0x30) {
+			ideal_nops = k8_nops;
+		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
+			   ideal_nops = p6_nops;
+		} else {
+#ifdef CONFIG_X86_64
+			ideal_nops = k8_nops;
+#else
+			ideal_nops = intel_nops;
+#endif
+		}
 
-static const unsigned char *const *__init_or_module find_nop_table(void)
-{
-	if (boot_cpu_has(X86_FEATURE_K8))
-		return k8_nops;
-	else if (boot_cpu_has(X86_FEATURE_K7))
-		return k7_nops;
-	else if (boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return intel_nops;
+	default:
+#ifdef CONFIG_X86_64
+		ideal_nops = k8_nops;
+#else
+		if (boot_cpu_has(X86_FEATURE_K8))
+			ideal_nops = k8_nops;
+		else if (boot_cpu_has(X86_FEATURE_K7))
+			ideal_nops = k7_nops;
+		else
+			ideal_nops = intel_nops;
+#endif
+	}
 }
 
-#endif /* CONFIG_X86_64 */
-
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
 static void __init_or_module add_nops(void *insns, unsigned int len)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
 	while (len > 0) {
 		unsigned int noplen = len;
 		if (noplen > ASM_NOP_MAX)
 			noplen = ASM_NOP_MAX;
-		memcpy(insns, noptable[noplen], noplen);
+		memcpy(insns, ideal_nops[noplen], noplen);
 		insns += noplen;
 		len -= noplen;
 	}
@@ -195,6 +250,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
+extern char __vsyscall_0;
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
@@ -210,6 +266,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 	u8 insnbuf[MAX_PATCH_LEN];
 
 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite a previous scanned alternative code.
+	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+	 * patch code.
+	 *
+	 * So be careful if you want to change the scan order to any other
+	 * order.
+	 */
 	for (a = start; a < end; a++) {
 		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
@@ -678,29 +743,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	wrote_text = 0;
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
-
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-
-#ifdef CONFIG_X86_64
-unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
-#else
-unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
-#endif
-
-void __init arch_init_ideal_nop5(void)
-{
-	/*
-	 * There is no good nop for all x86 archs.  This selection
-	 * algorithm should be unified with the one in find_nop_table(),
-	 * but this should be good enough for now.
-	 *
-	 * For cases other than the ones below, use the safe (as in
-	 * always functional) defaults above.
-	 */
-#ifdef CONFIG_X86_64
-	/* Don't use these on 32 bits due to broken virtualizers */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		memcpy(ideal_nop5, p6_nops[5], 5);
-#endif
-}
-#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f7..b117efd24f7 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca7778722..cd8cbeb5fa3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -25,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
+#include <linux/delay.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -34,7 +36,7 @@
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
-#define EXIT_LOOP_COUNT 10000000
+#define LOOP_TIMEOUT	100000
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -57,7 +59,6 @@ struct iommu_cmd {
 	u32 data[4];
 };
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu);
 static void update_domain(struct protection_domain *domain);
 
 /****************************************************************************
@@ -322,8 +323,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
@@ -367,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-irqreturn_t amd_iommu_int_handler(int irq, void *data)
+irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
@@ -377,192 +376,300 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+	return IRQ_WAKE_THREAD;
+}
+
 /****************************************************************************
  *
  * IOMMU command queuing functions
  *
  ****************************************************************************/
 
-/*
- * Writes the command to the IOMMUs command buffer and informs the
- * hardware about the new command. Must be called with iommu->lock held.
- */
-static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int wait_on_sem(volatile u64 *sem)
+{
+	int i = 0;
+
+	while (*sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
+
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void copy_cmd_to_buffer(struct amd_iommu *iommu,
+			       struct iommu_cmd *cmd,
+			       u32 tail)
 {
-	u32 tail, head;
 	u8 *target;
 
-	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
-	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 	target = iommu->cmd_buf + tail;
-	memcpy_toio(target, cmd, sizeof(*cmd));
-	tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
-	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	if (tail == head)
-		return -ENOMEM;
+	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+
+	/* Copy command to buffer */
+	memcpy(target, cmd, sizeof(*cmd));
+
+	/* Tell the IOMMU about it */
 	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+}
 
-	return 0;
+static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
+{
+	WARN_ON(address & 0x7ULL);
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
+	cmd->data[1] = upper_32_bits(__pa(address));
+	cmd->data[2] = 1;
+	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+}
+
+static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = devid;
+	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
+}
+
+static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+				  size_t size, u16 domid, int pde)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[1] |= domid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
+static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
+				  u64 address, size_t size)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0]  = devid;
+	cmd->data[0] |= (qdep & 0xff) << 24;
+	cmd->data[1]  = devid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+	if (s)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+}
+
+static void build_inv_all(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	CMD_SET_TYPE(cmd, CMD_INV_ALL);
 }
 
 /*
- * General queuing function for commands. Takes iommu->lock and calls
- * __iommu_queue_command().
+ * Writes the command to the IOMMUs command buffer and informs the
+ * hardware about the new command.
  */
 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	u32 left, tail, head, next_tail;
 	unsigned long flags;
-	int ret;
 
+	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+
+again:
 	spin_lock_irqsave(&iommu->lock, flags);
-	ret = __iommu_queue_command(iommu, cmd);
-	if (!ret)
-		iommu->need_sync = true;
-	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return ret;
-}
+	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+	left      = (head - next_tail) % iommu->cmd_buf_size;
 
-/*
- * This function waits until an IOMMU has completed a completion
- * wait command
- */
-static void __iommu_wait_for_completion(struct amd_iommu *iommu)
-{
-	int ready = 0;
-	unsigned status = 0;
-	unsigned long i = 0;
+	if (left <= 2) {
+		struct iommu_cmd sync_cmd;
+		volatile u64 sem = 0;
+		int ret;
+
+		build_completion_wait(&sync_cmd, (u64)&sem);
+		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
 
-	INC_STATS_COUNTER(compl_wait);
+		spin_unlock_irqrestore(&iommu->lock, flags);
+
+		if ((ret = wait_on_sem(&sem)) != 0)
+			return ret;
 
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
+		goto again;
 	}
 
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
+	copy_cmd_to_buffer(iommu, cmd, tail);
+
+	/* We need to sync now to make sure all commands are processed */
+	iommu->need_sync = true;
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		iommu->reset_in_progress = true;
+	return 0;
 }
 
 /*
  * This function queues a completion wait command into the command
  * buffer of an IOMMU
  */
-static int __iommu_completion_wait(struct amd_iommu *iommu)
+static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
+	volatile u64 sem = 0;
+	int ret;
+
+	if (!iommu->need_sync)
+		return 0;
 
-	 memset(&cmd, 0, sizeof(cmd));
-	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	build_completion_wait(&cmd, (u64)&sem);
 
-	 return __iommu_queue_command(iommu, &cmd);
+	ret = iommu_queue_command(iommu, &cmd);
+	if (ret)
+		return ret;
+
+	return wait_on_sem(&sem);
 }
 
-/*
- * This function is called whenever we need to ensure that the IOMMU has
- * completed execution of all commands we sent. It sends a
- * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
- * us about that by writing a value to a physical address we pass with
- * the command.
- */
-static int iommu_completion_wait(struct amd_iommu *iommu)
+static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
 {
-	int ret = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
+	struct iommu_cmd cmd;
 
-	if (!iommu->need_sync)
-		goto out;
+	build_inv_dte(&cmd, devid);
 
-	ret = __iommu_completion_wait(iommu);
+	return iommu_queue_command(iommu, &cmd);
+}
 
-	iommu->need_sync = false;
+static void iommu_flush_dte_all(struct amd_iommu *iommu)
+{
+	u32 devid;
 
-	if (ret)
-		goto out;
+	for (devid = 0; devid <= 0xffff; ++devid)
+		iommu_flush_dte(iommu, devid);
 
-	__iommu_wait_for_completion(iommu);
+	iommu_completion_wait(iommu);
+}
 
-out:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+/*
+ * This function uses heavy locking and may disable irqs for some time. But
+ * this is no issue because it is only called during resume.
+ */
+static void iommu_flush_tlb_all(struct amd_iommu *iommu)
+{
+	u32 dom_id;
 
-	if (iommu->reset_in_progress)
-		reset_iommu_command_buffer(iommu);
+	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
+		struct iommu_cmd cmd;
+		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      dom_id, 1);
+		iommu_queue_command(iommu, &cmd);
+	}
 
-	return 0;
+	iommu_completion_wait(iommu);
 }
 
-static void iommu_flush_complete(struct protection_domain *domain)
+static void iommu_flush_all(struct amd_iommu *iommu)
 {
-	int i;
+	struct iommu_cmd cmd;
 
-	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
-			continue;
+	build_inv_all(&cmd);
 
-		/*
-		 * Devices of this domain are behind this IOMMU
-		 * We need to wait for completion of all commands.
-		 */
-		iommu_completion_wait(amd_iommus[i]);
+	iommu_queue_command(iommu, &cmd);
+	iommu_completion_wait(iommu);
+}
+
+void iommu_flush_all_caches(struct amd_iommu *iommu)
+{
+	if (iommu_feature(iommu, FEATURE_IA)) {
+		iommu_flush_all(iommu);
+	} else {
+		iommu_flush_dte_all(iommu);
+		iommu_flush_tlb_all(iommu);
 	}
 }
 
 /*
- * Command send function for invalidating a device table entry
+ * Command send function for flushing on-device TLB
  */
-static int iommu_flush_device(struct device *dev)
+static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 	u16 devid;
+	int qdep;
 
+	qdep  = pci_ats_queue_depth(pdev);
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	/* Build command */
-	memset(&cmd, 0, sizeof(cmd));
-	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
-	cmd.data[0] = devid;
+	build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
 
 	return iommu_queue_command(iommu, &cmd);
 }
 
-static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
-					  u16 domid, int pde, int s)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
-	cmd->data[1] |= domid;
-	cmd->data[2] = lower_32_bits(address);
-	cmd->data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
-}
-
 /*
- * Generic command send function for invalidaing TLB entries
+ * Command send function for invalidating a device table entry
  */
-static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
-		u64 address, u16 domid, int pde, int s)
+static int device_flush_dte(struct device *dev)
 {
-	struct iommu_cmd cmd;
+	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	u16 devid;
 	int ret;
 
-	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
+	pdev  = to_pci_dev(dev);
+	devid = get_device_id(dev);
+	iommu = amd_iommu_rlookup_table[devid];
 
-	ret = iommu_queue_command(iommu, &cmd);
+	ret = iommu_flush_dte(iommu, devid);
+	if (ret)
+		return ret;
+
+	if (pci_ats_enabled(pdev))
+		ret = device_flush_iotlb(dev, 0, ~0UL);
 
 	return ret;
 }
@@ -572,23 +679,14 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
  * It invalidates a single PTE if the range to flush is within a single
  * page. Otherwise it flushes the whole TLB of the IOMMU.
  */
-static void __iommu_flush_pages(struct protection_domain *domain,
-				u64 address, size_t size, int pde)
+static void __domain_flush_pages(struct protection_domain *domain,
+				 u64 address, size_t size, int pde)
 {
-	int s = 0, i;
-	unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
-
-	address &= PAGE_MASK;
-
-	if (pages > 1) {
-		/*
-		 * If we have to flush more than one page, flush all
-		 * TLB entries for this domain
-		 */
-		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-		s = 1;
-	}
+	struct iommu_dev_data *dev_data;
+	struct iommu_cmd cmd;
+	int ret = 0, i;
 
+	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
 
 	for (i = 0; i < amd_iommus_present; ++i) {
 		if (!domain->dev_iommu[i])
@@ -598,101 +696,70 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 		 * Devices of this domain are behind this IOMMU
 		 * We need a TLB flush
 		 */
-		iommu_queue_inv_iommu_pages(amd_iommus[i], address,
-					    domain->id, pde, s);
+		ret |= iommu_queue_command(amd_iommus[i], &cmd);
+	}
+
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
+
+		if (!pci_ats_enabled(pdev))
+			continue;
+
+		ret |= device_flush_iotlb(dev_data->dev, address, size);
 	}
 
-	return;
+	WARN_ON(ret);
 }
 
-static void iommu_flush_pages(struct protection_domain *domain,
-			     u64 address, size_t size)
+static void domain_flush_pages(struct protection_domain *domain,
+			       u64 address, size_t size)
 {
-	__iommu_flush_pages(domain, address, size, 0);
+	__domain_flush_pages(domain, address, size, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain */
-static void iommu_flush_tlb(struct protection_domain *domain)
+static void domain_flush_tlb(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void iommu_flush_tlb_pde(struct protection_domain *domain)
+static void domain_flush_tlb_pde(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
-}
-
-
-/*
- * This function flushes the DTEs for all devices in domain
- */
-static void iommu_flush_domain_devices(struct protection_domain *domain)
-{
-	struct iommu_dev_data *dev_data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&domain->lock, flags);
-
-	list_for_each_entry(dev_data, &domain->dev_list, list)
-		iommu_flush_device(dev_data->dev);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
-static void iommu_flush_all_domain_devices(void)
+static void domain_flush_complete(struct protection_domain *domain)
 {
-	struct protection_domain *domain;
-	unsigned long flags;
+	int i;
 
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+	for (i = 0; i < amd_iommus_present; ++i) {
+		if (!domain->dev_iommu[i])
+			continue;
 
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		iommu_flush_domain_devices(domain);
-		iommu_flush_complete(domain);
+		/*
+		 * Devices of this domain are behind this IOMMU
+		 * We need to wait for completion of all commands.
+		 */
+		iommu_completion_wait(amd_iommus[i]);
 	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
 }
 
-void amd_iommu_flush_all_devices(void)
-{
-	iommu_flush_all_domain_devices();
-}
 
 /*
- * This function uses heavy locking and may disable irqs for some time. But
- * this is no issue because it is only called during resume.
+ * This function flushes the DTEs for all devices in domain
  */
-void amd_iommu_flush_all_domains(void)
+static void domain_flush_devices(struct protection_domain *domain)
 {
-	struct protection_domain *domain;
+	struct iommu_dev_data *dev_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		spin_lock(&domain->lock);
-		iommu_flush_tlb_pde(domain);
-		iommu_flush_complete(domain);
-		spin_unlock(&domain->lock);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-static void reset_iommu_command_buffer(struct amd_iommu *iommu)
-{
-	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
-
-	if (iommu->reset_in_progress)
-		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
+	spin_lock_irqsave(&domain->lock, flags);
 
-	amd_iommu_reset_cmd_buffer(iommu);
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	list_for_each_entry(dev_data, &domain->dev_list, list)
+		device_flush_dte(dev_data->dev);
 
-	iommu->reset_in_progress = false;
+	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /****************************************************************************
@@ -1410,17 +1477,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
 	return domain->flags & PD_DMA_OPS_MASK;
 }
 
-static void set_dte_entry(u16 devid, struct protection_domain *domain)
+static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
 	u64 pte_root = virt_to_phys(domain->pt_root);
+	u32 flags = 0;
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
-	amd_iommu_dev_table[devid].data[2] = domain->id;
-	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+	if (ats)
+		flags |= DTE_FLAG_IOTLB;
+
+	amd_iommu_dev_table[devid].data[3] |= flags;
+	amd_iommu_dev_table[devid].data[2]  = domain->id;
+	amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
+	amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1437,23 +1509,29 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	bool ats = false;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
+
+	if (amd_iommu_iotlb_sup)
+		ats = pci_ats_enabled(pdev);
 
 	/* Update data structures */
 	dev_data->domain = domain;
 	list_add(&dev_data->list, &domain->dev_list);
-	set_dte_entry(devid, domain);
+	set_dte_entry(devid, domain, ats);
 
 	/* Do reference counting */
 	domain->dev_iommu[iommu->index] += 1;
 	domain->dev_cnt                 += 1;
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 static void do_detach(struct device *dev)
@@ -1476,7 +1554,7 @@ static void do_detach(struct device *dev)
 	clear_dte_entry(devid);
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 /*
@@ -1539,9 +1617,13 @@ out_unlock:
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 	int ret;
 
+	if (amd_iommu_iotlb_sup)
+		pci_enable_ats(pdev, PAGE_SHIFT);
+
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1551,7 +1633,7 @@ static int attach_device(struct device *dev,
 	 * left the caches in the IOMMU dirty. So we have to flush
 	 * here to evict all dirty stuff.
 	 */
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return ret;
 }
@@ -1598,12 +1680,16 @@ static void __detach_device(struct device *dev)
  */
 static void detach_device(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 
 	/* lock device table */
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
+		pci_disable_ats(pdev);
 }
 
 /*
@@ -1615,10 +1701,9 @@ static struct protection_domain *domain_for_device(struct device *dev)
 	struct protection_domain *dom;
 	struct iommu_dev_data *dev_data, *alias_data;
 	unsigned long flags;
-	u16 devid, alias;
+	u16 devid;
 
 	devid      = get_device_id(dev);
-	alias      = amd_iommu_alias_table[devid];
 	dev_data   = get_dev_data(dev);
 	alias_data = get_dev_data(dev_data->alias);
 	if (!alias_data)
@@ -1692,7 +1777,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		goto out;
 	}
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 
 out:
@@ -1753,8 +1838,9 @@ static void update_device_table(struct protection_domain *domain)
 	struct iommu_dev_data *dev_data;
 
 	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
 		u16 devid = get_device_id(dev_data->dev);
-		set_dte_entry(devid, domain);
+		set_dte_entry(devid, domain, pci_ats_enabled(pdev));
 	}
 }
 
@@ -1764,8 +1850,9 @@ static void update_domain(struct protection_domain *domain)
 		return;
 
 	update_device_table(domain);
-	iommu_flush_domain_devices(domain);
-	iommu_flush_tlb_pde(domain);
+
+	domain_flush_devices(domain);
+	domain_flush_tlb_pde(domain);
 
 	domain->updated = false;
 }
@@ -1924,10 +2011,10 @@ retry:
 	ADD_STATS_COUNTER(alloced_io_mem, size);
 
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-		iommu_flush_tlb(&dma_dom->domain);
+		domain_flush_tlb(&dma_dom->domain);
 		dma_dom->need_flush = false;
 	} else if (unlikely(amd_iommu_np_cache))
-		iommu_flush_pages(&dma_dom->domain, address, size);
+		domain_flush_pages(&dma_dom->domain, address, size);
 
 out:
 	return address;
@@ -1976,7 +2063,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-		iommu_flush_pages(&dma_dom->domain, flush_addr, size);
+		domain_flush_pages(&dma_dom->domain, flush_addr, size);
 		dma_dom->need_flush = false;
 	}
 }
@@ -2012,7 +2099,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 	if (addr == DMA_ERROR_CODE)
 		goto out;
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2039,7 +2126,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, dir);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2104,7 +2191,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2150,7 +2237,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		s->dma_address = s->dma_length = 0;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2200,7 +2287,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		goto out_free;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2232,7 +2319,7 @@ static void free_coherent(struct device *dev, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2476,7 +2563,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	if (!iommu)
 		return;
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 }
 
@@ -2542,7 +2629,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return get_order(unmap_size);
 }
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727b65b..9179c21120a 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
 
 /* IOMMUs have a non-present cache? */
 bool amd_iommu_np_cache __read_mostly;
+bool amd_iommu_iotlb_sup __read_mostly = true;
 
 /*
  * The ACPI table parsing functions set this variable on an error
@@ -180,6 +181,12 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -293,9 +300,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC", NULL
+	};
+	int i;
+
+	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
+	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+		printk(KERN_CONT " extended features: ");
+		for (i = 0; feat_str[i]; ++i)
+			if (iommu_feature(iommu, (1ULL << i)))
+				printk(KERN_CONT " %s", feat_str[i]);
+	}
+	printk(KERN_CONT "\n");
+
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -651,7 +672,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc;
+	u32 range, misc, low, high;
 	int i, j;
 
 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -667,6 +688,15 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
@@ -1004,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	if (pci_enable_msi(iommu->dev))
 		return 1;
 
-	r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
-			IRQF_SAMPLE_RANDOM,
-			"AMD-Vi",
-			NULL);
+	r = request_threaded_irq(iommu->dev->irq,
+				 amd_iommu_int_handler,
+				 amd_iommu_int_thread,
+				 0, "AMD-Vi",
+				 iommu->dev);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
@@ -1244,6 +1275,7 @@ static void enable_iommus(void)
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable(iommu);
+		iommu_flush_all_caches(iommu);
 	}
 }
 
@@ -1274,8 +1306,8 @@ static void amd_iommu_resume(void)
 	 * we have to flush after the IOMMUs are enabled because a
 	 * disabled IOMMU will never execute the commands we send
 	 */
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	for_each_iommu(iommu)
+		iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index cd1ffed4ee2..289e92862fd 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = {
 	.rating		= APBT_CLOCKSOURCE_RATING,
 	.read		= apbt_read_clocksource,
 	.mask		= APBT_MASK,
-	.shift		= APBT_SHIFT,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.resume		= apbt_restart_clocksource,
 };
@@ -543,14 +542,7 @@ static int apbt_clocksource_register(void)
 	if (t1 == apbt_read_clocksource(&clocksource_apbt))
 		panic("APBT counter not counting. APBT disabled\n");
 
-	/*
-	 * initialize and register APBT clocksource
-	 * convert that to ns/clock cycle
-	 * mult = (ns/c) * 2^APBT_SHIFT
-	 */
-	clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
-				       (unsigned long) apbt_freq, APBT_SHIFT);
-	clocksource_register(&clocksource_apbt);
+	clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 73fb469908c..3d2661ca654 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,22 @@
 #include <asm/amd_nb.h>
 #include <asm/x86_init.h>
 
+/*
+ * Using 512M as goal, in case kexec will load kernel_big
+ * that will do the on-position decompress, and could overlap with
+ * with the gart aperture that is used.
+ * Sequence:
+ * kernel_small
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kernel_small (gart area become e820_reserved)
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kerne_big (uncompressed size will be big than 64M or 128M)
+ * So don't use 512M below as gart iommu, leave the space for kernel
+ * code for safe.
+ */
+#define GART_MIN_ADDR	(512ULL << 20)
+#define GART_MAX_ADDR	(1ULL   << 32)
+
 int gart_iommu_aperture;
 int gart_iommu_aperture_disabled __initdata;
 int gart_iommu_aperture_allowed __initdata;
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void)
 	 * memory. Unfortunately we cannot move it up because that would
 	 * make the IOMMU useless.
 	 */
-	/*
-	 * using 512M as goal, in case kexec will load kernel_big
-	 * that will do the on position decompress, and  could overlap with
-	 * that position with gart that is used.
-	 * sequende:
-	 * kernel_small
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kernel_small(gart area become e820_reserved)
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
-	 * so don't use 512M below as gart iommu, leave the space for kernel
-	 * code for safe
-	 */
-	addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
-	if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
+				      aper_size, aper_size);
+	if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%lx,%uK)\n",
 				addr, aper_size>>10);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01eff77..f92a8e5d1e2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-	if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+	if (this_cpu_has(X86_FEATURE_ARAT)) {
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 		/* Make LAPIC timer preferrable over percpu HPET */
 		lapic_clockevent.rating = 150;
@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void)
 	/* always use the value from LDR */
 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
 		logical_smp_processor_id();
+
+	/*
+	 * Some NUMA implementations (NUMAQ) don't initialize apicid to
+	 * node mapping during NUMA init.  Now that logical apicid is
+	 * guaranteed to be known, give it another chance.  This is already
+	 * a bit too late - percpu allocation has already happened without
+	 * proper NUMA affinity.
+	 */
+	if (apic->x86_32_numa_cpu_node)
+		set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
+				   apic->x86_32_numa_cpu_node(cpu));
 #endif
 
 	/*
@@ -1812,30 +1823,41 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-	u32 v, v1;
+	u32 v0, v1;
+	u32 i = 0;
+	static const char * const error_interrupt_reason[] = {
+		"Send CS error",		/* APIC Error Bit 0 */
+		"Receive CS error",		/* APIC Error Bit 1 */
+		"Send accept error",		/* APIC Error Bit 2 */
+		"Receive accept error",		/* APIC Error Bit 3 */
+		"Redirectable IPI",		/* APIC Error Bit 4 */
+		"Send illegal vector",		/* APIC Error Bit 5 */
+		"Received illegal vector",	/* APIC Error Bit 6 */
+		"Illegal register address",	/* APIC Error Bit 7 */
+	};
 
 	exit_idle();
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
+	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
 	v1 = apic_read(APIC_ESR);
 	ack_APIC_irq();
 	atomic_inc(&irq_err_count);
 
-	/*
-	 * Here is what the APIC error bits mean:
-	 * 0: Send CS error
-	 * 1: Receive CS error
-	 * 2: Send accept error
-	 * 3: Receive accept error
-	 * 4: Reserved
-	 * 5: Send illegal vector
-	 * 6: Received illegal vector
-	 * 7: Illegal register address
-	 */
-	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
+	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
+		    smp_processor_id(), v0 , v1);
+
+	v1 = v1 & 0xff;
+	while (v1) {
+		if (v1 & 0x1)
+			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+		i++;
+		v1 >>= 1;
+	};
+
+	apic_printk(APIC_DEBUG, KERN_CONT "\n");
+
 	irq_exit();
 }
 
@@ -2003,21 +2025,6 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-#ifdef CONFIG_X86_32
-int default_x86_32_numa_cpu_node(int cpu)
-{
-#ifdef CONFIG_NUMA
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-#else
-	return 0;
-#endif
-}
-#endif
-
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f1baa2dc087..775b82bc655 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
-#ifdef CONFIG_X86_32
-static int noop_x86_32_numa_cpu_node(int cpu)
-{
-	/* we're always on node 0 */
-	return 0;
-}
-#endif
-
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -195,6 +187,5 @@ struct apic apic_noop = {
 
 #ifdef CONFIG_X86_32
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= noop_x86_32_numa_cpu_node,
 #endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 541a2e43165..d84ac5a584b 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3e9de4854c5..70533de5bd2 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
 		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
 }
 
-static int es7000_numa_cpu_node(int cpu)
-{
-	return 0;
-}
-
 static int es7000_cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92..45fd33d1fd3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr);
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+				 struct io_apic_irq_attr *attr);
 
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-int
+static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
 	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 	return ret;
 }
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr)
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+			       struct io_apic_irq_attr *attr)
 {
 	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 6273eee5134..30f13319e24 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -48,8 +48,6 @@
 #include <asm/e820.h>
 #include <asm/ipi.h>
 
-#define	MB_TO_PAGES(addr)		((addr) << (20 - PAGE_SHIFT))
-
 int found_numaq;
 
 /*
@@ -79,31 +77,20 @@ int					quad_local_to_mp_bus_id[NR_CPUS/4][4];
 static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
 {
 	struct eachquadmem *eq = scd->eq + node;
+	u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
+	u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
+	int ret;
 
-	node_set_online(node);
-
-	/* Convert to pages */
-	node_start_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
-
-	node_end_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
-
-	memblock_x86_register_active_regions(node, node_start_pfn[node],
-						node_end_pfn[node]);
-
-	memory_present(node, node_start_pfn[node], node_end_pfn[node]);
-
-	node_remap_size[node] = node_memmap_size_bytes(node,
-					node_start_pfn[node],
-					node_end_pfn[node]);
+	node_set(node, numa_nodes_parsed);
+	ret = numa_add_memblk(node, start, end);
+	BUG_ON(ret < 0);
 }
 
 /*
  * Function: smp_dump_qct()
  *
  * Description: gets memory layout from the quad config table.  This
- * function also updates node_online_map with the nodes (quads) present.
+ * function also updates numa_nodes_parsed with the nodes (quads) present.
  */
 static void __init smp_dump_qct(void)
 {
@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void)
 
 	scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
 
-	nodes_clear(node_online_map);
 	for_each_node(node) {
 		if (scd->quads_present31_0 & (1 << node))
 			numaq_register_node(node, scd);
@@ -282,14 +268,14 @@ static __init void early_check_numaq(void)
 	}
 }
 
-int __init get_memcfg_numaq(void)
+int __init numaq_numa_init(void)
 {
 	early_check_numaq();
 	if (!found_numaq)
-		return 0;
+		return -ENOENT;
 	smp_dump_qct();
 
-	return 1;
+	return 0;
 }
 
 #define NUMAQ_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fc84c7b6110..6541e471fd9 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -172,7 +172,6 @@ struct apic apic_default = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index e4b8059b414..35bcd7d995a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -551,5 +551,4 @@ struct apic apic_summit = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc09..7acd2d2ac96 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
 #include <asm/smp.h>
 #include <asm/x86_init.h>
 #include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR				UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK			(1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
+	unsigned long real_uv_nmi;
+	int bid;
+
 	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
 		/* do nothing if entering the crash kernel */
 		return NOTIFY_OK;
+
 	/*
-	 * Use a lock so only one cpu prints at a time
-	 * to prevent intermixed output.
+	 * Each blade has an MMR that indicates when an NMI has been sent
+	 * to cpus on the blade. If an NMI is detected, atomically
+	 * clear the MMR and update a per-blade NMI count used to
+	 * cause each cpu on the blade to notice a new NMI.
+	 */
+	bid = uv_numa_blade_id();
+	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+	if (unlikely(real_uv_nmi)) {
+		spin_lock(&uv_blade_info[bid].nmi_lock);
+		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+		if (real_uv_nmi) {
+			uv_blade_info[bid].nmi_count++;
+			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+		}
+		spin_unlock(&uv_blade_info[bid].nmi_lock);
+	}
+
+	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+		return NOTIFY_DONE;
+
+	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+	/*
+	 * Use a lock so only one cpu prints at a time.
+	 * This prevents intermixed output.
 	 */
 	spin_lock(&uv_nmi_lock);
-	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
 
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 }
 
 static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi
+	.notifier_call	= uv_handle_nmi,
+	.priority = NMI_LOCAL_LOW_PRIOR - 1,
 };
 
 void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
 	BUG_ON(!uv_blade_info);
+
 	for (blade = 0; blade < uv_num_possible_blades(); blade++)
 		uv_blade_info[blade].memory_nid = -1;
 
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
 			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
+			spin_lock_init(&uv_blade_info[blade].nmi_lock);
 			max_pnode = max(pnode, max_pnode);
 			blade++;
 		}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index adee12e0da1..3bfa0223596 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 
 	local_irq_enable();
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable)
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
@@ -1282,7 +1280,6 @@ static void standby(void)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 	local_irq_enable();
 
@@ -1292,7 +1289,6 @@ static void standby(void)
 
 	local_irq_disable();
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3f0ebe429a0..6042981d030 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
-obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3532d3bf810..8f5cabb3c5b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #endif
 
 	/* As a rule processors have APIC timer running in deep C states */
-	if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+	if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
 	/*
@@ -629,10 +629,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 		 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
 		 */
 		u64 mask;
+		int err;
 
-		rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
-		mask |= (1 << 10);
-		wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+		err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
+		if (err == 0) {
+			mask |= (1 << 10);
+			checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+		}
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2ced0074a4..c8b41623377 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
+static int disable_smep __cpuinitdata;
+static __init int setup_disable_smep(char *arg)
+{
+	disable_smep = 1;
+	return 1;
+}
+__setup("nosmep", setup_disable_smep);
+
+static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_SMEP)) {
+		if (unlikely(disable_smep)) {
+			setup_clear_cpu_cap(X86_FEATURE_SMEP);
+			clear_in_cr4(X86_CR4_SMEP);
+		} else
+			set_in_cr4(X86_CR4_SMEP);
+	}
+}
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
@@ -565,8 +584,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
 
-		if (eax > 0)
-			c->x86_capability[9] = ebx;
+		c->x86_capability[9] = ebx;
 	}
 
 	/* AMD-defined flags: level 0x80000001 */
@@ -668,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
+
+	setup_smep(c);
 }
 
 void __init early_cpu_init(void)
@@ -753,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 #endif
 	}
 
+	setup_smep(c);
+
 	get_model_name(c); /* Default name */
 
 	detect_nopl(c);
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
deleted file mode 100644
index 870e6cc6ad2..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ /dev/null
@@ -1,266 +0,0 @@
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config X86_PCC_CPUFREQ
-	tristate "Processor Clocking Control interface driver"
-	depends on ACPI && ACPI_PROCESSOR
-	help
-	  This driver adds support for the PCC interface.
-
-	  For details, take a look at:
-	  <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called pcc-cpufreq.
-
-	  If in doubt, say N.
-
-config X86_ACPI_CPUFREQ
-	tristate "ACPI Processor P-States driver"
-	select CPU_FREQ_TABLE
-	depends on ACPI_PROCESSOR
-	help
-	  This driver adds a CPUFreq driver which utilizes the ACPI
-	  Processor Performance States.
-	  This driver also supports Intel Enhanced Speedstep.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called acpi-cpufreq.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config ELAN_CPUFREQ
-	tristate "AMD Elan SC400 and SC410"
-	select CPU_FREQ_TABLE
-	depends on X86_ELAN
-	---help---
-	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
-	  processors.
-
-	  You need to specify the processor maximum speed as boot
-	  parameter: elanfreq=maxspeed (in kHz) or as module
-	  parameter "max_freq".
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config SC520_CPUFREQ
-	tristate "AMD Elan SC520"
-	select CPU_FREQ_TABLE
-	depends on X86_ELAN
-	---help---
-	  This adds the CPUFreq driver for AMD Elan SC520 processor.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-
-config X86_POWERNOW_K6
-	tristate "AMD Mobile K6-2/K6-3 PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
-	  AMD K6-3+ processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_POWERNOW_K7
-	tristate "AMD Mobile Athlon/Duron PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for mobile AMD K7 mobile processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_POWERNOW_K7_ACPI
-	bool
-	depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
-	depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
-	depends on X86_32
-	default y
-
-config X86_POWERNOW_K8
-	tristate "AMD Opteron/Athlon64 PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on ACPI && ACPI_PROCESSOR
-	help
-	  This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called powernow-k8.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-config X86_GX_SUSPMOD
-	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
-	depends on X86_32 && PCI
-	help
-	 This add the CPUFreq driver for NatSemi Geode processors which
-	 support suspend modulation.
-
-	 For details, take a look at <file:Documentation/cpu-freq/>.
-
-	 If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO
-	tristate "Intel Enhanced SpeedStep (deprecated)"
-	select CPU_FREQ_TABLE
-	select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
-	depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
-	help
-	  This is deprecated and this functionality is now merged into
-	  acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
-	  speedstep_centrino.
-	  This adds the CPUFreq driver for Enhanced SpeedStep enabled
-	  mobile CPUs.  This means Intel Pentium M (Centrino) CPUs
-	  or 64bit enabled Intel Xeons.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called speedstep-centrino.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO_TABLE
-	bool "Built-in tables for Banias CPUs"
-	depends on X86_32 && X86_SPEEDSTEP_CENTRINO
-	default y
-	help
-	  Use built-in tables for Banias CPUs if ACPI encoding
-	  is not available.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_ICH
-	tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for certain mobile Intel Pentium III
-	  (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
-	  mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
-	  ICH3 or ICH4 southbridge.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_SMI
-	tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for certain mobile Intel Pentium III
-	  (Coppermine), all mobile Intel Pentium III-M (Tualatin)
-	  on systems which have an Intel 440BX/ZX/MX southbridge.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_P4_CLOCKMOD
-	tristate "Intel Pentium 4 clock modulation"
-	select CPU_FREQ_TABLE
-	help
-	  This adds the CPUFreq driver for Intel Pentium 4 / XEON
-	  processors.  When enabled it will lower CPU temperature by skipping
-	  clocks.
-
-	  This driver should be only used in exceptional
-	  circumstances when very low power is needed because it causes severe
-	  slowdowns and noticeable latencies.  Normally Speedstep should be used
-	  instead.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called p4-clockmod.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  Unless you are absolutely sure say N.
-
-config X86_CPUFREQ_NFORCE2
-	tristate "nVidia nForce2 FSB changing"
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for FSB changing on nVidia nForce2
-	  platforms.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_LONGRUN
-	tristate "Transmeta LongRun"
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
-	  which support LongRun.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_LONGHAUL
-	tristate "VIA Cyrix III Longhaul"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && ACPI_PROCESSOR
-	help
-	  This adds the CPUFreq driver for VIA Samuel/CyrixIII,
-	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
-	  processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_E_POWERSAVER
-	tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for VIA C7 processors.  However, this driver
-	  does not have any safeguards to prevent operating the CPU out of spec
-	  and is thus considered dangerous.  Please use the regular ACPI cpufreq
-	  driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
-
-	  If in doubt, say N.
-
-comment "shared options"
-
-config X86_SPEEDSTEP_LIB
-	tristate
-	default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
-
-config X86_SPEEDSTEP_RELAXED_CAP_CHECK
-	bool "Relaxed speedstep capability checks"
-	depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
-	help
-	  Don't perform all checks for a speedstep capable system which would
-	  normally be done. Some ancient or strange systems, though speedstep
-	  capable, don't always indicate that they are speedstep capable. This
-	  option lets the probing code bypass some of those checks if the
-	  parameter "relaxed_check=1" is passed to the module.
-
-endif	# CPU_FREQ
-
-endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
deleted file mode 100644
index bd54bf67e6f..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
-# K8 systems. ACPI is preferred to all other hardware-specific drivers.
-# speedstep-* is preferred over p4-clockmod.
-
-obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
-obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
-obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
-obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
-obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
-obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
-obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
-obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
-obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
-obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
-obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
-obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
-obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
-obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
-obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
-obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index a2baafb2fe6..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,776 +0,0 @@
-/*
- * acpi-cpufreq.c - ACPI Processor P-States Driver
- *
- *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
- *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
- *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/compiler.h>
-#include <linux/dmi.h>
-#include <linux/slab.h>
-
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/uaccess.h>
-
-#include <acpi/processor.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-#include "mperf.h"
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"acpi-cpufreq", msg)
-
-MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
-MODULE_DESCRIPTION("ACPI Processor P-States Driver");
-MODULE_LICENSE("GPL");
-
-enum {
-	UNDEFINED_CAPABLE = 0,
-	SYSTEM_INTEL_MSR_CAPABLE,
-	SYSTEM_IO_CAPABLE,
-};
-
-#define INTEL_MSR_RANGE		(0xffff)
-
-struct acpi_cpufreq_data {
-	struct acpi_processor_performance *acpi_data;
-	struct cpufreq_frequency_table *freq_table;
-	unsigned int resume;
-	unsigned int cpu_feature;
-};
-
-static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-
-/* acpi_perf_data is a pointer to percpu data. */
-static struct acpi_processor_performance __percpu *acpi_perf_data;
-
-static struct cpufreq_driver acpi_cpufreq_driver;
-
-static unsigned int acpi_pstate_strict;
-
-static int check_est_cpu(unsigned int cpuid)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
-
-	return cpu_has(cpu, X86_FEATURE_EST);
-}
-
-static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
-{
-	struct acpi_processor_performance *perf;
-	int i;
-
-	perf = data->acpi_data;
-
-	for (i = 0; i < perf->state_count; i++) {
-		if (value == perf->states[i].status)
-			return data->freq_table[i].frequency;
-	}
-	return 0;
-}
-
-static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
-{
-	int i;
-	struct acpi_processor_performance *perf;
-
-	msr &= INTEL_MSR_RANGE;
-	perf = data->acpi_data;
-
-	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		if (msr == perf->states[data->freq_table[i].index].status)
-			return data->freq_table[i].frequency;
-	}
-	return data->freq_table[0].frequency;
-}
-
-static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
-{
-	switch (data->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		return extract_msr(val, data);
-	case SYSTEM_IO_CAPABLE:
-		return extract_io(val, data);
-	default:
-		return 0;
-	}
-}
-
-struct msr_addr {
-	u32 reg;
-};
-
-struct io_addr {
-	u16 port;
-	u8 bit_width;
-};
-
-struct drv_cmd {
-	unsigned int type;
-	const struct cpumask *mask;
-	union {
-		struct msr_addr msr;
-		struct io_addr io;
-	} addr;
-	u32 val;
-};
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void do_drv_read(void *_cmd)
-{
-	struct drv_cmd *cmd = _cmd;
-	u32 h;
-
-	switch (cmd->type) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		rdmsr(cmd->addr.msr.reg, cmd->val, h);
-		break;
-	case SYSTEM_IO_CAPABLE:
-		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
-				&cmd->val,
-				(u32)cmd->addr.io.bit_width);
-		break;
-	default:
-		break;
-	}
-}
-
-/* Called via smp_call_function_many(), on the target CPUs */
-static void do_drv_write(void *_cmd)
-{
-	struct drv_cmd *cmd = _cmd;
-	u32 lo, hi;
-
-	switch (cmd->type) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		rdmsr(cmd->addr.msr.reg, lo, hi);
-		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
-		wrmsr(cmd->addr.msr.reg, lo, hi);
-		break;
-	case SYSTEM_IO_CAPABLE:
-		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
-				cmd->val,
-				(u32)cmd->addr.io.bit_width);
-		break;
-	default:
-		break;
-	}
-}
-
-static void drv_read(struct drv_cmd *cmd)
-{
-	int err;
-	cmd->val = 0;
-
-	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
-	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
-}
-
-static void drv_write(struct drv_cmd *cmd)
-{
-	int this_cpu;
-
-	this_cpu = get_cpu();
-	if (cpumask_test_cpu(this_cpu, cmd->mask))
-		do_drv_write(cmd);
-	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
-	put_cpu();
-}
-
-static u32 get_cur_val(const struct cpumask *mask)
-{
-	struct acpi_processor_performance *perf;
-	struct drv_cmd cmd;
-
-	if (unlikely(cpumask_empty(mask)))
-		return 0;
-
-	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
-		break;
-	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
-		break;
-	default:
-		return 0;
-	}
-
-	cmd.mask = mask;
-	drv_read(&cmd);
-
-	dprintk("get_cur_val = %u\n", cmd.val);
-
-	return cmd.val;
-}
-
-static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
-	unsigned int freq;
-	unsigned int cached_freq;
-
-	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
-
-	if (unlikely(data == NULL ||
-		     data->acpi_data == NULL || data->freq_table == NULL)) {
-		return 0;
-	}
-
-	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
-	if (freq != cached_freq) {
-		/*
-		 * The dreaded BIOS frequency change behind our back.
-		 * Force set the frequency on next target call.
-		 */
-		data->resume = 1;
-	}
-
-	dprintk("cur freq = %u\n", freq);
-
-	return freq;
-}
-
-static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
-				struct acpi_cpufreq_data *data)
-{
-	unsigned int cur_freq;
-	unsigned int i;
-
-	for (i = 0; i < 100; i++) {
-		cur_freq = extract_freq(get_cur_val(mask), data);
-		if (cur_freq == freq)
-			return 1;
-		udelay(10);
-	}
-	return 0;
-}
-
-static int acpi_cpufreq_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq, unsigned int relation)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-	struct acpi_processor_performance *perf;
-	struct cpufreq_freqs freqs;
-	struct drv_cmd cmd;
-	unsigned int next_state = 0; /* Index into freq_table */
-	unsigned int next_perf_state = 0; /* Index into perf table */
-	unsigned int i;
-	int result = 0;
-
-	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
-
-	if (unlikely(data == NULL ||
-	     data->acpi_data == NULL || data->freq_table == NULL)) {
-		return -ENODEV;
-	}
-
-	perf = data->acpi_data;
-	result = cpufreq_frequency_table_target(policy,
-						data->freq_table,
-						target_freq,
-						relation, &next_state);
-	if (unlikely(result)) {
-		result = -ENODEV;
-		goto out;
-	}
-
-	next_perf_state = data->freq_table[next_state].index;
-	if (perf->state == next_perf_state) {
-		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n",
-				next_perf_state);
-			data->resume = 0;
-		} else {
-			dprintk("Already at target state (P%d)\n",
-				next_perf_state);
-			goto out;
-		}
-	}
-
-	switch (data->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
-		cmd.val = (u32) perf->states[next_perf_state].control;
-		break;
-	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
-		cmd.val = (u32) perf->states[next_perf_state].control;
-		break;
-	default:
-		result = -ENODEV;
-		goto out;
-	}
-
-	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
-		cmd.mask = policy->cpus;
-	else
-		cmd.mask = cpumask_of(policy->cpu);
-
-	freqs.old = perf->states[perf->state].core_frequency * 1000;
-	freqs.new = data->freq_table[next_state].frequency;
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	drv_write(&cmd);
-
-	if (acpi_pstate_strict) {
-		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			dprintk("acpi_cpufreq_target failed (%d)\n",
-				policy->cpu);
-			result = -EAGAIN;
-			goto out;
-		}
-	}
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	perf->state = next_perf_state;
-
-out:
-	return result;
-}
-
-static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	dprintk("acpi_cpufreq_verify\n");
-
-	return cpufreq_frequency_table_verify(policy, data->freq_table);
-}
-
-static unsigned long
-acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
-{
-	struct acpi_processor_performance *perf = data->acpi_data;
-
-	if (cpu_khz) {
-		/* search the closest match to cpu_khz */
-		unsigned int i;
-		unsigned long freq;
-		unsigned long freqn = perf->states[0].core_frequency * 1000;
-
-		for (i = 0; i < (perf->state_count-1); i++) {
-			freq = freqn;
-			freqn = perf->states[i+1].core_frequency * 1000;
-			if ((2 * cpu_khz) > (freqn + freq)) {
-				perf->state = i;
-				return freq;
-			}
-		}
-		perf->state = perf->state_count-1;
-		return freqn;
-	} else {
-		/* assume CPU is at P0... */
-		perf->state = 0;
-		return perf->states[0].core_frequency * 1000;
-	}
-}
-
-static void free_acpi_perf_data(void)
-{
-	unsigned int i;
-
-	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
-	for_each_possible_cpu(i)
-		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
-				 ->shared_cpu_map);
-	free_percpu(acpi_perf_data);
-}
-
-/*
- * acpi_cpufreq_early_init - initialize ACPI P-States library
- *
- * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
- * in order to determine correct frequency and voltage pairings. We can
- * do _PDC and _PSD and find out the processor dependency for the
- * actual init that will happen later...
- */
-static int __init acpi_cpufreq_early_init(void)
-{
-	unsigned int i;
-	dprintk("acpi_cpufreq_early_init\n");
-
-	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
-	if (!acpi_perf_data) {
-		dprintk("Memory allocation error for acpi_perf_data.\n");
-		return -ENOMEM;
-	}
-	for_each_possible_cpu(i) {
-		if (!zalloc_cpumask_var_node(
-			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
-			GFP_KERNEL, cpu_to_node(i))) {
-
-			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
-			free_acpi_perf_data();
-			return -ENOMEM;
-		}
-	}
-
-	/* Do initialization in ACPI core */
-	acpi_processor_preregister_performance(acpi_perf_data);
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-/*
- * Some BIOSes do SW_ANY coordination internally, either set it up in hw
- * or do it in BIOS firmware and won't inform about it to OS. If not
- * detected, this has a side effect of making CPU run at a different speed
- * than OS intended it to run at. Detect it and handle it cleanly.
- */
-static int bios_with_sw_any_bug;
-
-static int sw_any_bug_found(const struct dmi_system_id *d)
-{
-	bios_with_sw_any_bug = 1;
-	return 0;
-}
-
-static const struct dmi_system_id sw_any_bug_dmi_table[] = {
-	{
-		.callback = sw_any_bug_found,
-		.ident = "Supermicro Server X6DLP",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
-			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
-		},
-	},
-	{ }
-};
-
-static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
-{
-	/* Intel Xeon Processor 7100 Series Specification Update
-	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
-	 * AL30: A Machine Check Exception (MCE) Occurring during an
-	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
-	 * Both Processor Cores to Lock Up. */
-	if (c->x86_vendor == X86_VENDOR_INTEL) {
-		if ((c->x86 == 15) &&
-		    (c->x86_model == 6) &&
-		    (c->x86_mask == 8)) {
-			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
-			    "Xeon(R) 7100 Errata AL30, processors may "
-			    "lock up on frequency changes: disabling "
-			    "acpi-cpufreq.\n");
-			return -ENODEV;
-		    }
-		}
-	return 0;
-}
-#endif
-
-static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	unsigned int valid_states = 0;
-	unsigned int cpu = policy->cpu;
-	struct acpi_cpufreq_data *data;
-	unsigned int result = 0;
-	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
-	struct acpi_processor_performance *perf;
-#ifdef CONFIG_SMP
-	static int blacklisted;
-#endif
-
-	dprintk("acpi_cpufreq_cpu_init\n");
-
-#ifdef CONFIG_SMP
-	if (blacklisted)
-		return blacklisted;
-	blacklisted = acpi_cpufreq_blacklist(c);
-	if (blacklisted)
-		return blacklisted;
-#endif
-
-	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
-	per_cpu(acfreq_data, cpu) = data;
-
-	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
-		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	result = acpi_processor_register_performance(data->acpi_data, cpu);
-	if (result)
-		goto err_free;
-
-	perf = data->acpi_data;
-	policy->shared_type = perf->shared_type;
-
-	/*
-	 * Will let policy->cpus know about dependency only when software
-	 * coordination is required.
-	 */
-	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-		cpumask_copy(policy->cpus, perf->shared_cpu_map);
-	}
-	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
-
-#ifdef CONFIG_SMP
-	dmi_check_system(sw_any_bug_dmi_table);
-	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
-		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
-	}
-#endif
-
-	/* capability check */
-	if (perf->state_count <= 1) {
-		dprintk("No P-States\n");
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	if (perf->control_register.space_id != perf->status_register.space_id) {
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	switch (perf->control_register.space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_IO:
-		dprintk("SYSTEM IO addr space\n");
-		data->cpu_feature = SYSTEM_IO_CAPABLE;
-		break;
-	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		dprintk("HARDWARE addr space\n");
-		if (!check_est_cpu(cpu)) {
-			result = -ENODEV;
-			goto err_unreg;
-		}
-		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
-		break;
-	default:
-		dprintk("Unknown addr space %d\n",
-			(u32) (perf->control_register.space_id));
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
-		    (perf->state_count+1), GFP_KERNEL);
-	if (!data->freq_table) {
-		result = -ENOMEM;
-		goto err_unreg;
-	}
-
-	/* detect transition latency */
-	policy->cpuinfo.transition_latency = 0;
-	for (i = 0; i < perf->state_count; i++) {
-		if ((perf->states[i].transition_latency * 1000) >
-		    policy->cpuinfo.transition_latency)
-			policy->cpuinfo.transition_latency =
-			    perf->states[i].transition_latency * 1000;
-	}
-
-	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
-	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
-	    policy->cpuinfo.transition_latency > 20 * 1000) {
-		policy->cpuinfo.transition_latency = 20 * 1000;
-		printk_once(KERN_INFO
-			    "P-state transition latency capped at 20 uS\n");
-	}
-
-	/* table init */
-	for (i = 0; i < perf->state_count; i++) {
-		if (i > 0 && perf->states[i].core_frequency >=
-		    data->freq_table[valid_states-1].frequency / 1000)
-			continue;
-
-		data->freq_table[valid_states].index = i;
-		data->freq_table[valid_states].frequency =
-		    perf->states[i].core_frequency * 1000;
-		valid_states++;
-	}
-	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
-	perf->state = 0;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
-	if (result)
-		goto err_freqfree;
-
-	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
-		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
-
-	switch (perf->control_register.space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_IO:
-		/* Current speed is unknown and not detectable by IO port */
-		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
-		break;
-	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
-		policy->cur = get_cur_freq_on_cpu(cpu);
-		break;
-	default:
-		break;
-	}
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	/* Check for APERF/MPERF support in hardware */
-	if (cpu_has(c, X86_FEATURE_APERFMPERF))
-		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
-
-	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
-	for (i = 0; i < perf->state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
-			(i == perf->state ? '*' : ' '), i,
-			(u32) perf->states[i].core_frequency,
-			(u32) perf->states[i].power,
-			(u32) perf->states[i].transition_latency);
-
-	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
-
-	/*
-	 * the first call to ->target() should result in us actually
-	 * writing something to the appropriate registers.
-	 */
-	data->resume = 1;
-
-	return result;
-
-err_freqfree:
-	kfree(data->freq_table);
-err_unreg:
-	acpi_processor_unregister_performance(perf, cpu);
-err_free:
-	kfree(data);
-	per_cpu(acfreq_data, cpu) = NULL;
-
-	return result;
-}
-
-static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	dprintk("acpi_cpufreq_cpu_exit\n");
-
-	if (data) {
-		cpufreq_frequency_table_put_attr(policy->cpu);
-		per_cpu(acfreq_data, policy->cpu) = NULL;
-		acpi_processor_unregister_performance(data->acpi_data,
-						      policy->cpu);
-		kfree(data->freq_table);
-		kfree(data);
-	}
-
-	return 0;
-}
-
-static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	dprintk("acpi_cpufreq_resume\n");
-
-	data->resume = 1;
-
-	return 0;
-}
-
-static struct freq_attr *acpi_cpufreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver acpi_cpufreq_driver = {
-	.verify		= acpi_cpufreq_verify,
-	.target		= acpi_cpufreq_target,
-	.bios_limit	= acpi_processor_get_bios_limit,
-	.init		= acpi_cpufreq_cpu_init,
-	.exit		= acpi_cpufreq_cpu_exit,
-	.resume		= acpi_cpufreq_resume,
-	.name		= "acpi-cpufreq",
-	.owner		= THIS_MODULE,
-	.attr		= acpi_cpufreq_attr,
-};
-
-static int __init acpi_cpufreq_init(void)
-{
-	int ret;
-
-	if (acpi_disabled)
-		return 0;
-
-	dprintk("acpi_cpufreq_init\n");
-
-	ret = acpi_cpufreq_early_init();
-	if (ret)
-		return ret;
-
-	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
-	if (ret)
-		free_acpi_perf_data();
-
-	return ret;
-}
-
-static void __exit acpi_cpufreq_exit(void)
-{
-	dprintk("acpi_cpufreq_exit\n");
-
-	cpufreq_unregister_driver(&acpi_cpufreq_driver);
-
-	free_percpu(acpi_perf_data);
-}
-
-module_param(acpi_pstate_strict, uint, 0644);
-MODULE_PARM_DESC(acpi_pstate_strict,
-	"value 0 or non-zero. non-zero -> strict ACPI checks are "
-	"performed during frequency changes.");
-
-late_initcall(acpi_cpufreq_init);
-module_exit(acpi_cpufreq_exit);
-
-MODULE_ALIAS("acpi");
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
deleted file mode 100644
index 141abebc451..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * (C) 2004-2006  Sebastian Witt <se.witt@gmx.net>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon reverse engineered information
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#define NFORCE2_XTAL 25
-#define NFORCE2_BOOTFSB 0x48
-#define NFORCE2_PLLENABLE 0xa8
-#define NFORCE2_PLLREG 0xa4
-#define NFORCE2_PLLADR 0xa0
-#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
-
-#define NFORCE2_MIN_FSB 50
-#define NFORCE2_SAFE_DISTANCE 50
-
-/* Delay in ms between FSB changes */
-/* #define NFORCE2_DELAY 10 */
-
-/*
- * nforce2_chipset:
- * FSB is changed using the chipset
- */
-static struct pci_dev *nforce2_dev;
-
-/* fid:
- * multiplier * 10
- */
-static int fid;
-
-/* min_fsb, max_fsb:
- * minimum and maximum FSB (= FSB at boot time)
- */
-static int min_fsb;
-static int max_fsb;
-
-MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
-MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
-MODULE_LICENSE("GPL");
-
-module_param(fid, int, 0444);
-module_param(min_fsb, int, 0444);
-
-MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
-MODULE_PARM_DESC(min_fsb,
-		"Minimum FSB to use, if not defined: current FSB - 50");
-
-#define PFX "cpufreq-nforce2: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"cpufreq-nforce2", msg)
-
-/**
- * nforce2_calc_fsb - calculate FSB
- * @pll: PLL value
- *
- *   Calculates FSB from PLL value
- */
-static int nforce2_calc_fsb(int pll)
-{
-	unsigned char mul, div;
-
-	mul = (pll >> 8) & 0xff;
-	div = pll & 0xff;
-
-	if (div > 0)
-		return NFORCE2_XTAL * mul / div;
-
-	return 0;
-}
-
-/**
- * nforce2_calc_pll - calculate PLL value
- * @fsb: FSB
- *
- *   Calculate PLL value for given FSB
- */
-static int nforce2_calc_pll(unsigned int fsb)
-{
-	unsigned char xmul, xdiv;
-	unsigned char mul = 0, div = 0;
-	int tried = 0;
-
-	/* Try to calculate multiplier and divider up to 4 times */
-	while (((mul == 0) || (div == 0)) && (tried <= 3)) {
-		for (xdiv = 2; xdiv <= 0x80; xdiv++)
-			for (xmul = 1; xmul <= 0xfe; xmul++)
-				if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
-				    fsb + tried) {
-					mul = xmul;
-					div = xdiv;
-				}
-		tried++;
-	}
-
-	if ((mul == 0) || (div == 0))
-		return -1;
-
-	return NFORCE2_PLL(mul, div);
-}
-
-/**
- * nforce2_write_pll - write PLL value to chipset
- * @pll: PLL value
- *
- *   Writes new FSB PLL value to chipset
- */
-static void nforce2_write_pll(int pll)
-{
-	int temp;
-
-	/* Set the pll addr. to 0x00 */
-	pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
-
-	/* Now write the value in all 64 registers */
-	for (temp = 0; temp <= 0x3f; temp++)
-		pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
-
-	return;
-}
-
-/**
- * nforce2_fsb_read - Read FSB
- *
- *   Read FSB from chipset
- *   If bootfsb != 0, return FSB at boot-time
- */
-static unsigned int nforce2_fsb_read(int bootfsb)
-{
-	struct pci_dev *nforce2_sub5;
-	u32 fsb, temp = 0;
-
-	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
-	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
-				PCI_ANY_ID, PCI_ANY_ID, NULL);
-	if (!nforce2_sub5)
-		return 0;
-
-	pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
-	fsb /= 1000000;
-
-	/* Check if PLL register is already set */
-	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
-
-	if (bootfsb || !temp)
-		return fsb;
-
-	/* Use PLL register FSB value */
-	pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
-	fsb = nforce2_calc_fsb(temp);
-
-	return fsb;
-}
-
-/**
- * nforce2_set_fsb - set new FSB
- * @fsb: New FSB
- *
- *   Sets new FSB
- */
-static int nforce2_set_fsb(unsigned int fsb)
-{
-	u32 temp = 0;
-	unsigned int tfsb;
-	int diff;
-	int pll = 0;
-
-	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
-		return -EINVAL;
-	}
-
-	tfsb = nforce2_fsb_read(0);
-	if (!tfsb) {
-		printk(KERN_ERR PFX "Error while reading the FSB\n");
-		return -EINVAL;
-	}
-
-	/* First write? Then set actual value */
-	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
-	if (!temp) {
-		pll = nforce2_calc_pll(tfsb);
-
-		if (pll < 0)
-			return -EINVAL;
-
-		nforce2_write_pll(pll);
-	}
-
-	/* Enable write access */
-	temp = 0x01;
-	pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
-
-	diff = tfsb - fsb;
-
-	if (!diff)
-		return 0;
-
-	while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
-		if (diff < 0)
-			tfsb++;
-		else
-			tfsb--;
-
-		/* Calculate the PLL reg. value */
-		pll = nforce2_calc_pll(tfsb);
-		if (pll == -1)
-			return -EINVAL;
-
-		nforce2_write_pll(pll);
-#ifdef NFORCE2_DELAY
-		mdelay(NFORCE2_DELAY);
-#endif
-	}
-
-	temp = 0x40;
-	pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
-
-	return 0;
-}
-
-/**
- * nforce2_get - get the CPU frequency
- * @cpu: CPU number
- *
- * Returns the CPU frequency
- */
-static unsigned int nforce2_get(unsigned int cpu)
-{
-	if (cpu)
-		return 0;
-	return nforce2_fsb_read(0) * fid * 100;
-}
-
-/**
- * nforce2_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int nforce2_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq, unsigned int relation)
-{
-/*        unsigned long         flags; */
-	struct cpufreq_freqs freqs;
-	unsigned int target_fsb;
-
-	if ((target_freq > policy->max) || (target_freq < policy->min))
-		return -EINVAL;
-
-	target_fsb = target_freq / (fid * 100);
-
-	freqs.old = nforce2_get(policy->cpu);
-	freqs.new = target_fsb * fid * 100;
-	freqs.cpu = 0;		/* Only one CPU on nForce2 platforms */
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
-	       freqs.old, freqs.new);
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Disable IRQs */
-	/* local_irq_save(flags); */
-
-	if (nforce2_set_fsb(target_fsb) < 0)
-		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
-			target_fsb);
-	else
-		dprintk("Changed FSB successfully to %d\n",
-			target_fsb);
-
-	/* Enable IRQs */
-	/* local_irq_restore(flags); */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return 0;
-}
-
-/**
- * nforce2_verify - verifies a new CPUFreq policy
- * @policy: new policy
- */
-static int nforce2_verify(struct cpufreq_policy *policy)
-{
-	unsigned int fsb_pol_max;
-
-	fsb_pol_max = policy->max / (fid * 100);
-
-	if (policy->min < (fsb_pol_max * fid * 100))
-		policy->max = (fsb_pol_max + 1) * fid * 100;
-
-	cpufreq_verify_within_limits(policy,
-				     policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-	return 0;
-}
-
-static int nforce2_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int fsb;
-	unsigned int rfid;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* Get current FSB */
-	fsb = nforce2_fsb_read(0);
-
-	if (!fsb)
-		return -EIO;
-
-	/* FIX: Get FID from CPU */
-	if (!fid) {
-		if (!cpu_khz) {
-			printk(KERN_WARNING PFX
-			"cpu_khz not set, can't calculate multiplier!\n");
-			return -ENODEV;
-		}
-
-		fid = cpu_khz / (fsb * 100);
-		rfid = fid % 5;
-
-		if (rfid) {
-			if (rfid > 2)
-				fid += 5 - rfid;
-			else
-				fid -= rfid;
-		}
-	}
-
-	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
-	       fid / 10, fid % 10);
-
-	/* Set maximum FSB to FSB at boot time */
-	max_fsb = nforce2_fsb_read(1);
-
-	if (!max_fsb)
-		return -EIO;
-
-	if (!min_fsb)
-		min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
-
-	if (min_fsb < NFORCE2_MIN_FSB)
-		min_fsb = NFORCE2_MIN_FSB;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = min_fsb * fid * 100;
-	policy->cpuinfo.max_freq = max_fsb * fid * 100;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = nforce2_get(policy->cpu);
-	policy->min = policy->cpuinfo.min_freq;
-	policy->max = policy->cpuinfo.max_freq;
-
-	return 0;
-}
-
-static int nforce2_cpu_exit(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-
-static struct cpufreq_driver nforce2_driver = {
-	.name = "nforce2",
-	.verify = nforce2_verify,
-	.target = nforce2_target,
-	.get = nforce2_get,
-	.init = nforce2_cpu_init,
-	.exit = nforce2_cpu_exit,
-	.owner = THIS_MODULE,
-};
-
-/**
- * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
- *
- * Detects nForce2 A2 and C1 stepping
- *
- */
-static int nforce2_detect_chipset(void)
-{
-	nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
-					PCI_DEVICE_ID_NVIDIA_NFORCE2,
-					PCI_ANY_ID, PCI_ANY_ID, NULL);
-
-	if (nforce2_dev == NULL)
-		return -ENODEV;
-
-	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
-	       nforce2_dev->revision);
-	printk(KERN_INFO PFX
-	       "FSB changing is maybe unstable and can lead to "
-	       "crashes and data loss.\n");
-
-	return 0;
-}
-
-/**
- * nforce2_init - initializes the nForce2 CPUFreq driver
- *
- * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init nforce2_init(void)
-{
-	/* TODO: do we need to detect the processor? */
-
-	/* detect chipset */
-	if (nforce2_detect_chipset()) {
-		printk(KERN_INFO PFX "No nForce2 chipset.\n");
-		return -ENODEV;
-	}
-
-	return cpufreq_register_driver(&nforce2_driver);
-}
-
-/**
- * nforce2_exit - unregisters cpufreq module
- *
- *   Unregisters nForce2 FSB change support.
- */
-static void __exit nforce2_exit(void)
-{
-	cpufreq_unregister_driver(&nforce2_driver);
-}
-
-module_init(nforce2_init);
-module_exit(nforce2_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
deleted file mode 100644
index 35a257dd4bb..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- *  Based on documentation provided by Dave Jones. Thanks!
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/msr.h>
-#include <asm/tsc.h>
-
-#define EPS_BRAND_C7M	0
-#define EPS_BRAND_C7	1
-#define EPS_BRAND_EDEN	2
-#define EPS_BRAND_C3	3
-#define EPS_BRAND_C7D	4
-
-struct eps_cpu_data {
-	u32 fsb;
-	struct cpufreq_frequency_table freq_table[];
-};
-
-static struct eps_cpu_data *eps_cpu[NR_CPUS];
-
-
-static unsigned int eps_get(unsigned int cpu)
-{
-	struct eps_cpu_data *centaur;
-	u32 lo, hi;
-
-	if (cpu)
-		return 0;
-	centaur = eps_cpu[cpu];
-	if (centaur == NULL)
-		return 0;
-
-	/* Return current frequency */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	return centaur->fsb * ((lo >> 8) & 0xff);
-}
-
-static int eps_set_state(struct eps_cpu_data *centaur,
-			 unsigned int cpu,
-			 u32 dest_state)
-{
-	struct cpufreq_freqs freqs;
-	u32 lo, hi;
-	int err = 0;
-	int i;
-
-	freqs.old = eps_get(cpu);
-	freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
-	freqs.cpu = cpu;
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Wait while CPU is busy */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	i = 0;
-	while (lo & ((1 << 16) | (1 << 17))) {
-		udelay(16);
-		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-		i++;
-		if (unlikely(i > 64)) {
-			err = -ENODEV;
-			goto postchange;
-		}
-	}
-	/* Set new multiplier and voltage */
-	wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
-	/* Wait until transition end */
-	i = 0;
-	do {
-		udelay(16);
-		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-		i++;
-		if (unlikely(i > 64)) {
-			err = -ENODEV;
-			goto postchange;
-		}
-	} while (lo & ((1 << 16) | (1 << 17)));
-
-	/* Return current frequency */
-postchange:
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
-
-#ifdef DEBUG
-	{
-	u8 current_multiplier, current_voltage;
-
-	/* Print voltage and multiplier */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-		current_voltage * 16 + 700);
-	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n",
-		current_multiplier);
-	}
-#endif
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	return err;
-}
-
-static int eps_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
-{
-	struct eps_cpu_data *centaur;
-	unsigned int newstate = 0;
-	unsigned int cpu = policy->cpu;
-	unsigned int dest_state;
-	int ret;
-
-	if (unlikely(eps_cpu[cpu] == NULL))
-		return -ENODEV;
-	centaur = eps_cpu[cpu];
-
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			&eps_cpu[cpu]->freq_table[0],
-			target_freq,
-			relation,
-			&newstate))) {
-		return -EINVAL;
-	}
-
-	/* Make frequency transition */
-	dest_state = centaur->freq_table[newstate].index & 0xffff;
-	ret = eps_set_state(centaur, cpu, dest_state);
-	if (ret)
-		printk(KERN_ERR "eps: Timeout!\n");
-	return ret;
-}
-
-static int eps_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy,
-			&eps_cpu[policy->cpu]->freq_table[0]);
-}
-
-static int eps_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	u32 lo, hi;
-	u64 val;
-	u8 current_multiplier, current_voltage;
-	u8 max_multiplier, max_voltage;
-	u8 min_multiplier, min_voltage;
-	u8 brand = 0;
-	u32 fsb;
-	struct eps_cpu_data *centaur;
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	struct cpufreq_frequency_table *f_table;
-	int k, step, voltage;
-	int ret;
-	int states;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* Check brand */
-	printk(KERN_INFO "eps: Detected VIA ");
-
-	switch (c->x86_model) {
-	case 10:
-		rdmsr(0x1153, lo, hi);
-		brand = (((lo >> 2) ^ lo) >> 18) & 3;
-		printk(KERN_CONT "Model A ");
-		break;
-	case 13:
-		rdmsr(0x1154, lo, hi);
-		brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
-		printk(KERN_CONT "Model D ");
-		break;
-	}
-
-	switch (brand) {
-	case EPS_BRAND_C7M:
-		printk(KERN_CONT "C7-M\n");
-		break;
-	case EPS_BRAND_C7:
-		printk(KERN_CONT "C7\n");
-		break;
-	case EPS_BRAND_EDEN:
-		printk(KERN_CONT "Eden\n");
-		break;
-	case EPS_BRAND_C7D:
-		printk(KERN_CONT "C7-D\n");
-		break;
-	case EPS_BRAND_C3:
-		printk(KERN_CONT "C3\n");
-		return -ENODEV;
-		break;
-	}
-	/* Enable Enhanced PowerSaver */
-	rdmsrl(MSR_IA32_MISC_ENABLE, val);
-	if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-		val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		wrmsrl(MSR_IA32_MISC_ENABLE, val);
-		/* Can be locked at 0 */
-		rdmsrl(MSR_IA32_MISC_ENABLE, val);
-		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
-			return -ENODEV;
-		}
-	}
-
-	/* Print voltage and multiplier */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-			current_voltage * 16 + 700);
-	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
-
-	/* Print limits */
-	max_voltage = hi & 0xff;
-	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
-			max_voltage * 16 + 700);
-	max_multiplier = (hi >> 8) & 0xff;
-	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
-	min_voltage = (hi >> 16) & 0xff;
-	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
-			min_voltage * 16 + 700);
-	min_multiplier = (hi >> 24) & 0xff;
-	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
-
-	/* Sanity checks */
-	if (current_multiplier == 0 || max_multiplier == 0
-	    || min_multiplier == 0)
-		return -EINVAL;
-	if (current_multiplier > max_multiplier
-	    || max_multiplier <= min_multiplier)
-		return -EINVAL;
-	if (current_voltage > 0x1f || max_voltage > 0x1f)
-		return -EINVAL;
-	if (max_voltage < min_voltage)
-		return -EINVAL;
-
-	/* Calc FSB speed */
-	fsb = cpu_khz / current_multiplier;
-	/* Calc number of p-states supported */
-	if (brand == EPS_BRAND_C7M)
-		states = max_multiplier - min_multiplier + 1;
-	else
-		states = 2;
-
-	/* Allocate private data and frequency table for current cpu */
-	centaur = kzalloc(sizeof(struct eps_cpu_data)
-		    + (states + 1) * sizeof(struct cpufreq_frequency_table),
-		    GFP_KERNEL);
-	if (!centaur)
-		return -ENOMEM;
-	eps_cpu[0] = centaur;
-
-	/* Copy basic values */
-	centaur->fsb = fsb;
-
-	/* Fill frequency and MSR value table */
-	f_table = &centaur->freq_table[0];
-	if (brand != EPS_BRAND_C7M) {
-		f_table[0].frequency = fsb * min_multiplier;
-		f_table[0].index = (min_multiplier << 8) | min_voltage;
-		f_table[1].frequency = fsb * max_multiplier;
-		f_table[1].index = (max_multiplier << 8) | max_voltage;
-		f_table[2].frequency = CPUFREQ_TABLE_END;
-	} else {
-		k = 0;
-		step = ((max_voltage - min_voltage) * 256)
-			/ (max_multiplier - min_multiplier);
-		for (i = min_multiplier; i <= max_multiplier; i++) {
-			voltage = (k * step) / 256 + min_voltage;
-			f_table[k].frequency = fsb * i;
-			f_table[k].index = (i << 8) | voltage;
-			k++;
-		}
-		f_table[k].frequency = CPUFREQ_TABLE_END;
-	}
-
-	policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
-	policy->cur = fsb * current_multiplier;
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
-	if (ret) {
-		kfree(centaur);
-		return ret;
-	}
-
-	cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
-	return 0;
-}
-
-static int eps_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-	struct eps_cpu_data *centaur;
-	u32 lo, hi;
-
-	if (eps_cpu[cpu] == NULL)
-		return -ENODEV;
-	centaur = eps_cpu[cpu];
-
-	/* Get max frequency */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	/* Set max frequency */
-	eps_set_state(centaur, cpu, hi & 0xffff);
-	/* Bye */
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	kfree(eps_cpu[cpu]);
-	eps_cpu[cpu] = NULL;
-	return 0;
-}
-
-static struct freq_attr *eps_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver eps_driver = {
-	.verify		= eps_verify,
-	.target		= eps_target,
-	.init		= eps_cpu_init,
-	.exit		= eps_cpu_exit,
-	.get		= eps_get,
-	.name		= "e_powersaver",
-	.owner		= THIS_MODULE,
-	.attr		= eps_attr,
-};
-
-static int __init eps_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	/* This driver will work only on Centaur C7 processors with
-	 * Enhanced SpeedStep/PowerSaver registers */
-	if (c->x86_vendor != X86_VENDOR_CENTAUR
-	    || c->x86 != 6 || c->x86_model < 10)
-		return -ENODEV;
-	if (!cpu_has(c, X86_FEATURE_EST))
-		return -ENODEV;
-
-	if (cpufreq_register_driver(&eps_driver))
-		return -EINVAL;
-	return 0;
-}
-
-static void __exit eps_exit(void)
-{
-	cpufreq_unregister_driver(&eps_driver);
-}
-
-MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
-MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
-MODULE_LICENSE("GPL");
-
-module_init(eps_init);
-module_exit(eps_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
deleted file mode 100644
index c587db472a7..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *	elanfreq:	cpufreq driver for the AMD ELAN family
- *
- *	(c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
- *
- *	Parts of this code are (c) Sven Geggus <sven@geggus.net>
- *
- *      All Rights Reserved.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#define REG_CSCIR 0x22		/* Chip Setup and Control Index Register    */
-#define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
-
-/* Module parameter */
-static int max_freq;
-
-struct s_elan_multiplier {
-	int clock;		/* frequency in kHz                         */
-	int val40h;		/* PMU Force Mode register                  */
-	int val80h;		/* CPU Clock Speed Register                 */
-};
-
-/*
- * It is important that the frequencies
- * are listed in ascending order here!
- */
-static struct s_elan_multiplier elan_multiplier[] = {
-	{1000,	0x02,	0x18},
-	{2000,	0x02,	0x10},
-	{4000,	0x02,	0x08},
-	{8000,	0x00,	0x00},
-	{16000,	0x00,	0x02},
-	{33000,	0x00,	0x04},
-	{66000,	0x01,	0x04},
-	{99000,	0x01,	0x05}
-};
-
-static struct cpufreq_frequency_table elanfreq_table[] = {
-	{0,	1000},
-	{1,	2000},
-	{2,	4000},
-	{3,	8000},
-	{4,	16000},
-	{5,	33000},
-	{6,	66000},
-	{7,	99000},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-
-/**
- *	elanfreq_get_cpu_frequency: determine current cpu speed
- *
- *	Finds out at which frequency the CPU of the Elan SOC runs
- *	at the moment. Frequencies from 1 to 33 MHz are generated
- *	the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
- *	and have the rest of the chip running with 33 MHz.
- */
-
-static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
-{
-	u8 clockspeed_reg;    /* Clock Speed Register */
-
-	local_irq_disable();
-	outb_p(0x80, REG_CSCIR);
-	clockspeed_reg = inb_p(REG_CSCDR);
-	local_irq_enable();
-
-	if ((clockspeed_reg & 0xE0) == 0xE0)
-		return 0;
-
-	/* Are we in CPU clock multiplied mode (66/99 MHz)? */
-	if ((clockspeed_reg & 0xE0) == 0xC0) {
-		if ((clockspeed_reg & 0x01) == 0)
-			return 66000;
-		else
-			return 99000;
-	}
-
-	/* 33 MHz is not 32 MHz... */
-	if ((clockspeed_reg & 0xE0) == 0xA0)
-		return 33000;
-
-	return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
-}
-
-
-/**
- *	elanfreq_set_cpu_frequency: Change the CPU core frequency
- *	@cpu: cpu number
- *	@freq: frequency in kHz
- *
- *	This function takes a frequency value and changes the CPU frequency
- *	according to this. Note that the frequency has to be checked by
- *	elanfreq_validatespeed() for correctness!
- *
- *	There is no return value.
- */
-
-static void elanfreq_set_cpu_state(unsigned int state)
-{
-	struct cpufreq_freqs    freqs;
-
-	freqs.old = elanfreq_get_cpu_frequency(0);
-	freqs.new = elan_multiplier[state].clock;
-	freqs.cpu = 0; /* elanfreq.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
-			elan_multiplier[state].clock);
-
-
-	/*
-	 * Access to the Elan's internal registers is indexed via
-	 * 0x22: Chip Setup & Control Register Index Register (CSCI)
-	 * 0x23: Chip Setup & Control Register Data  Register (CSCD)
-	 *
-	 */
-
-	/*
-	 * 0x40 is the Power Management Unit's Force Mode Register.
-	 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
-	 */
-
-	local_irq_disable();
-	outb_p(0x40, REG_CSCIR);		/* Disable hyperspeed mode */
-	outb_p(0x00, REG_CSCDR);
-	local_irq_enable();		/* wait till internal pipelines and */
-	udelay(1000);			/* buffers have cleaned up          */
-
-	local_irq_disable();
-
-	/* now, set the CPU clock speed register (0x80) */
-	outb_p(0x80, REG_CSCIR);
-	outb_p(elan_multiplier[state].val80h, REG_CSCDR);
-
-	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
-	outb_p(0x40, REG_CSCIR);
-	outb_p(elan_multiplier[state].val40h, REG_CSCDR);
-	udelay(10000);
-	local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-
-/**
- *	elanfreq_validatespeed: test if frequency range is valid
- *	@policy: the policy to validate
- *
- *	This function checks if a given frequency range in kHz is valid
- *	for the hardware supported by the driver.
- */
-
-static int elanfreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
-}
-
-static int elanfreq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	elanfreq_set_cpu_state(newstate);
-
-	return 0;
-}
-
-
-/*
- *	Module init and exit code
- */
-
-static int elanfreq_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	unsigned int i;
-	int result;
-
-	/* capability check */
-	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-	    (c->x86 != 4) || (c->x86_model != 10))
-		return -ENODEV;
-
-	/* max freq */
-	if (!max_freq)
-		max_freq = elanfreq_get_cpu_frequency(0);
-
-	/* table init */
-	for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if (elanfreq_table[i].frequency > max_freq)
-			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-	}
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = elanfreq_get_cpu_frequency(0);
-
-	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
-	return 0;
-}
-
-
-static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-
-#ifndef MODULE
-/**
- * elanfreq_setup - elanfreq command line parameter parsing
- *
- * elanfreq command line parameter.  Use:
- *  elanfreq=66000
- * to set the maximum CPU frequency to 66 MHz. Note that in
- * case you do not give this boot parameter, the maximum
- * frequency will fall back to _current_ CPU frequency which
- * might be lower. If you build this as a module, use the
- * max_freq module parameter instead.
- */
-static int __init elanfreq_setup(char *str)
-{
-	max_freq = simple_strtoul(str, &str, 0);
-	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
-	return 1;
-}
-__setup("elanfreq=", elanfreq_setup);
-#endif
-
-
-static struct freq_attr *elanfreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver elanfreq_driver = {
-	.get		= elanfreq_get_cpu_frequency,
-	.verify		= elanfreq_verify,
-	.target		= elanfreq_target,
-	.init		= elanfreq_cpu_init,
-	.exit		= elanfreq_cpu_exit,
-	.name		= "elanfreq",
-	.owner		= THIS_MODULE,
-	.attr		= elanfreq_attr,
-};
-
-
-static int __init elanfreq_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	/* Test if we have the right hardware */
-	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-		(c->x86 != 4) || (c->x86_model != 10)) {
-		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
-		return -ENODEV;
-	}
-	return cpufreq_register_driver(&elanfreq_driver);
-}
-
-
-static void __exit elanfreq_exit(void)
-{
-	cpufreq_unregister_driver(&elanfreq_driver);
-}
-
-
-module_param(max_freq, int, 0444);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
-		"Sven Geggus <sven@geggus.net>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
-
-module_init(elanfreq_init);
-module_exit(elanfreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
deleted file mode 100644
index 32974cf8423..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
- *	Cyrix MediaGX and NatSemi Geode Suspend Modulation
- *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- *	(C) 2002 Hiroshi Miura   <miura@da-cha.org>
- *	All Rights Reserved
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      version 2 as published by the Free Software Foundation
- *
- *      The author(s) of this software shall not be held liable for damages
- *      of any nature resulting due to the use of this software. This
- *      software is provided AS-IS with no warranties.
- *
- * Theoretical note:
- *
- *	(see Geode(tm) CS5530 manual (rev.4.1) page.56)
- *
- *	CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
- *	are based on Suspend Modulation.
- *
- *	Suspend Modulation works by asserting and de-asserting the SUSP# pin
- *	to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
- *	the CPU enters an idle state. GX1 stops its core clock when SUSP# is
- *	asserted then power consumption is reduced.
- *
- *	Suspend Modulation's OFF/ON duration are configurable
- *	with 'Suspend Modulation OFF Count Register'
- *	and 'Suspend Modulation ON Count Register'.
- *	These registers are 8bit counters that represent the number of
- *	32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
- *	to the processor.
- *
- *	These counters define a ratio which is the effective frequency
- *	of operation of the system.
- *
- *			       OFF Count
- *	F_eff = Fgx * ----------------------
- *	                OFF Count + ON Count
- *
- *	0 <= On Count, Off Count <= 255
- *
- *	From these limits, we can get register values
- *
- *	off_duration + on_duration <= MAX_DURATION
- *	on_duration = off_duration * (stock_freq - freq) / freq
- *
- *      off_duration  =  (freq * DURATION) / stock_freq
- *      on_duration = DURATION - off_duration
- *
- *
- *---------------------------------------------------------------------------
- *
- * ChangeLog:
- *	Dec. 12, 2003	Hiroshi Miura <miura@da-cha.org>
- *		- fix on/off register mistake
- *		- fix cpu_khz calc when it stops cpu modulation.
- *
- *	Dec. 11, 2002	Hiroshi Miura <miura@da-cha.org>
- *		- rewrite for Cyrix MediaGX Cx5510/5520 and
- *		  NatSemi Geode Cs5530(A).
- *
- *	Jul. ??, 2002  Zwane Mwaikambo <zwane@commfireservices.com>
- *		- cs5530_mod patch for 2.4.19-rc1.
- *
- *---------------------------------------------------------------------------
- *
- * Todo
- *	Test on machines with 5510, 5530, 5530A
- */
-
-/************************************************************************
- *			Suspend Modulation - Definitions		*
- ************************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-
-#include <asm/processor-cyrix.h>
-
-/* PCI config registers, all at F0 */
-#define PCI_PMER1	0x80	/* power management enable register 1 */
-#define PCI_PMER2	0x81	/* power management enable register 2 */
-#define PCI_PMER3	0x82	/* power management enable register 3 */
-#define PCI_IRQTC	0x8c	/* irq speedup timer counter register:typical 2 to 4ms */
-#define PCI_VIDTC	0x8d	/* video speedup timer counter register: typical 50 to 100ms */
-#define PCI_MODOFF	0x94	/* suspend modulation OFF counter register, 1 = 32us */
-#define PCI_MODON	0x95	/* suspend modulation ON counter register */
-#define PCI_SUSCFG	0x96	/* suspend configuration register */
-
-/* PMER1 bits */
-#define GPM		(1<<0)	/* global power management */
-#define GIT		(1<<1)	/* globally enable PM device idle timers */
-#define GTR		(1<<2)	/* globally enable IO traps */
-#define IRQ_SPDUP	(1<<3)	/* disable clock throttle during interrupt handling */
-#define VID_SPDUP	(1<<4)	/* disable clock throttle during vga video handling */
-
-/* SUSCFG bits */
-#define SUSMOD		(1<<0)	/* enable/disable suspend modulation */
-/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
-#define SMISPDUP	(1<<1)	/* select how SMI re-enable suspend modulation: */
-				/* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
-#define SUSCFG		(1<<2)	/* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
-/* the below is supported only with cs5530A */
-#define PWRSVE_ISA	(1<<3)	/* stop ISA clock  */
-#define PWRSVE		(1<<4)	/* active idle */
-
-struct gxfreq_params {
-	u8 on_duration;
-	u8 off_duration;
-	u8 pci_suscfg;
-	u8 pci_pmer1;
-	u8 pci_pmer2;
-	struct pci_dev *cs55x0;
-};
-
-static struct gxfreq_params *gx_params;
-static int stock_freq;
-
-/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
-static int pci_busclk;
-module_param(pci_busclk, int, 0444);
-
-/* maximum duration for which the cpu may be suspended
- * (32us * MAX_DURATION). If no parameter is given, this defaults
- * to 255.
- * Note that this leads to a maximum of 8 ms(!) where the CPU clock
- * is suspended -- processing power is just 0.39% of what it used to be,
- * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
-static int max_duration = 255;
-module_param(max_duration, int, 0444);
-
-/* For the default policy, we want at least some processing power
- * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
- */
-#define POLICY_MIN_DIV 20
-
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"gx-suspmod", msg)
-
-/**
- * we can detect a core multipiler from dir0_lsb
- * from GX1 datasheet p.56,
- *	MULT[3:0]:
- *	0000 = SYSCLK multiplied by 4 (test only)
- *	0001 = SYSCLK multiplied by 10
- *	0010 = SYSCLK multiplied by 4
- *	0011 = SYSCLK multiplied by 6
- *	0100 = SYSCLK multiplied by 9
- *	0101 = SYSCLK multiplied by 5
- *	0110 = SYSCLK multiplied by 7
- *	0111 = SYSCLK multiplied by 8
- *              of 33.3MHz
- **/
-static int gx_freq_mult[16] = {
-		4, 10, 4, 6, 9, 5, 7, 8,
-		0, 0, 0, 0, 0, 0, 0, 0
-};
-
-
-/****************************************************************
- *	Low Level chipset interface				*
- ****************************************************************/
-static struct pci_device_id gx_chipset_tbl[] __initdata = {
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
-	{ 0, },
-};
-
-static void gx_write_byte(int reg, int value)
-{
-	pci_write_config_byte(gx_params->cs55x0, reg, value);
-}
-
-/**
- * gx_detect_chipset:
- *
- **/
-static __init struct pci_dev *gx_detect_chipset(void)
-{
-	struct pci_dev *gx_pci = NULL;
-
-	/* check if CPU is a MediaGX or a Geode. */
-	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
-	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		dprintk("error: no MediaGX/Geode processor found!\n");
-		return NULL;
-	}
-
-	/* detect which companion chip is used */
-	for_each_pci_dev(gx_pci) {
-		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
-			return gx_pci;
-	}
-
-	dprintk("error: no supported chipset found!\n");
-	return NULL;
-}
-
-/**
- * gx_get_cpuspeed:
- *
- * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
- * Geode CPU runs.
- */
-static unsigned int gx_get_cpuspeed(unsigned int cpu)
-{
-	if ((gx_params->pci_suscfg & SUSMOD) == 0)
-		return stock_freq;
-
-	return (stock_freq * gx_params->off_duration)
-		/ (gx_params->on_duration + gx_params->off_duration);
-}
-
-/**
- *      gx_validate_speed:
- *      determine current cpu speed
- *
- **/
-
-static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
-		u8 *off_duration)
-{
-	unsigned int i;
-	u8 tmp_on, tmp_off;
-	int old_tmp_freq = stock_freq;
-	int tmp_freq;
-
-	*off_duration = 1;
-	*on_duration = 0;
-
-	for (i = max_duration; i > 0; i--) {
-		tmp_off = ((khz * i) / stock_freq) & 0xff;
-		tmp_on = i - tmp_off;
-		tmp_freq = (stock_freq * tmp_off) / i;
-		/* if this relation is closer to khz, use this. If it's equal,
-		 * prefer it, too - lower latency */
-		if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
-			*on_duration = tmp_on;
-			*off_duration = tmp_off;
-			old_tmp_freq = tmp_freq;
-		}
-	}
-
-	return old_tmp_freq;
-}
-
-
-/**
- * gx_set_cpuspeed:
- * set cpu speed in khz.
- **/
-
-static void gx_set_cpuspeed(unsigned int khz)
-{
-	u8 suscfg, pmer1;
-	unsigned int new_khz;
-	unsigned long flags;
-	struct cpufreq_freqs freqs;
-
-	freqs.cpu = 0;
-	freqs.old = gx_get_cpuspeed(0);
-
-	new_khz = gx_validate_speed(khz, &gx_params->on_duration,
-			&gx_params->off_duration);
-
-	freqs.new = new_khz;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	local_irq_save(flags);
-
-
-
-	if (new_khz != stock_freq) {
-		/* if new khz == 100% of CPU speed, it is special case */
-		switch (gx_params->cs55x0->device) {
-		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
-			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
-			/* FIXME: need to test other values -- Zwane,Miura */
-			/* typical 2 to 4ms */
-			gx_write_byte(PCI_IRQTC, 4);
-			/* typical 50 to 100ms */
-			gx_write_byte(PCI_VIDTC, 100);
-			gx_write_byte(PCI_PMER1, pmer1);
-
-			if (gx_params->cs55x0->revision < 0x10) {
-				/* CS5530(rev 1.2, 1.3) */
-				suscfg = gx_params->pci_suscfg|SUSMOD;
-			} else {
-				/* CS5530A,B.. */
-				suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
-			}
-			break;
-		case PCI_DEVICE_ID_CYRIX_5520:
-		case PCI_DEVICE_ID_CYRIX_5510:
-			suscfg = gx_params->pci_suscfg | SUSMOD;
-			break;
-		default:
-			local_irq_restore(flags);
-			dprintk("fatal: try to set unknown chipset.\n");
-			return;
-		}
-	} else {
-		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
-		gx_params->off_duration = 0;
-		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
-	}
-
-	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
-	gx_write_byte(PCI_MODON, gx_params->on_duration);
-
-	gx_write_byte(PCI_SUSCFG, suscfg);
-	pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
-
-	local_irq_restore(flags);
-
-	gx_params->pci_suscfg = suscfg;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
-		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
-}
-
-/****************************************************************
- *             High level functions                             *
- ****************************************************************/
-
-/*
- *	cpufreq_gx_verify: test if frequency range is valid
- *
- *	This function checks if a given frequency range in kHz is valid
- *      for the hardware supported by the driver.
- */
-
-static int cpufreq_gx_verify(struct cpufreq_policy *policy)
-{
-	unsigned int tmp_freq = 0;
-	u8 tmp1, tmp2;
-
-	if (!stock_freq || !policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
-			stock_freq);
-
-	/* it needs to be assured that at least one supported frequency is
-	 * within policy->min and policy->max. If it is not, policy->max
-	 * needs to be increased until one freuqency is supported.
-	 * policy->min may not be decreased, though. This way we guarantee a
-	 * specific processing capacity.
-	 */
-	tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
-	if (tmp_freq < policy->min)
-		tmp_freq += stock_freq / max_duration;
-	policy->min = tmp_freq;
-	if (policy->min > policy->max)
-		policy->max = tmp_freq;
-	tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
-	if (tmp_freq > policy->max)
-		tmp_freq -= stock_freq / max_duration;
-	policy->max = tmp_freq;
-	if (policy->max < policy->min)
-		policy->max = policy->min;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
-			stock_freq);
-
-	return 0;
-}
-
-/*
- *      cpufreq_gx_target:
- *
- */
-static int cpufreq_gx_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	u8 tmp1, tmp2;
-	unsigned int tmp_freq;
-
-	if (!stock_freq || !policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-
-	tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
-	while (tmp_freq < policy->min) {
-		tmp_freq += stock_freq / max_duration;
-		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
-	}
-	while (tmp_freq > policy->max) {
-		tmp_freq -= stock_freq / max_duration;
-		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
-	}
-
-	gx_set_cpuspeed(tmp_freq);
-
-	return 0;
-}
-
-static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int maxfreq, curfreq;
-
-	if (!policy || policy->cpu != 0)
-		return -ENODEV;
-
-	/* determine maximum frequency */
-	if (pci_busclk)
-		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-	else if (cpu_khz)
-		maxfreq = cpu_khz;
-	else
-		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-
-	stock_freq = maxfreq;
-	curfreq = gx_get_cpuspeed(0);
-
-	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n", curfreq);
-
-	/* setup basic struct for cpufreq API */
-	policy->cpu = 0;
-
-	if (max_duration < POLICY_MIN_DIV)
-		policy->min = maxfreq / max_duration;
-	else
-		policy->min = maxfreq / POLICY_MIN_DIV;
-	policy->max = maxfreq;
-	policy->cur = curfreq;
-	policy->cpuinfo.min_freq = maxfreq / max_duration;
-	policy->cpuinfo.max_freq = maxfreq;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
-	return 0;
-}
-
-/*
- * cpufreq_gx_init:
- *   MediaGX/Geode GX initialize cpufreq driver
- */
-static struct cpufreq_driver gx_suspmod_driver = {
-	.get		= gx_get_cpuspeed,
-	.verify		= cpufreq_gx_verify,
-	.target		= cpufreq_gx_target,
-	.init		= cpufreq_gx_cpu_init,
-	.name		= "gx-suspmod",
-	.owner		= THIS_MODULE,
-};
-
-static int __init cpufreq_gx_init(void)
-{
-	int ret;
-	struct gxfreq_params *params;
-	struct pci_dev *gx_pci;
-
-	/* Test if we have the right hardware */
-	gx_pci = gx_detect_chipset();
-	if (gx_pci == NULL)
-		return -ENODEV;
-
-	/* check whether module parameters are sane */
-	if (max_duration > 0xff)
-		max_duration = 0xff;
-
-	dprintk("geode suspend modulation available.\n");
-
-	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
-	if (params == NULL)
-		return -ENOMEM;
-
-	params->cs55x0 = gx_pci;
-	gx_params = params;
-
-	/* keep cs55x0 configurations */
-	pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
-	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
-	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
-	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
-	pci_read_config_byte(params->cs55x0, PCI_MODOFF,
-			&(params->off_duration));
-
-	ret = cpufreq_register_driver(&gx_suspmod_driver);
-	if (ret) {
-		kfree(params);
-		return ret;                   /* register error! */
-	}
-
-	return 0;
-}
-
-static void __exit cpufreq_gx_exit(void)
-{
-	cpufreq_unregister_driver(&gx_suspmod_driver);
-	pci_dev_put(gx_params->cs55x0);
-	kfree(gx_params);
-}
-
-MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
-MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
-MODULE_LICENSE("GPL");
-
-module_init(cpufreq_gx_init);
-module_exit(cpufreq_gx_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
deleted file mode 100644
index cf48cdd6907..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ /dev/null
@@ -1,1029 +0,0 @@
-/*
- *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
- *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by VIA.
- *
- *  VIA have currently 3 different versions of Longhaul.
- *  Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
- *   It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
- *  Version 2 of longhaul is backward compatible with v1, but adds
- *   LONGHAUL MSR for purpose of both frequency and voltage scaling.
- *   Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
- *  Version 3 of longhaul got renamed to Powersaver and redesigned
- *   to use only the POWERSAVER MSR at 0x110a.
- *   It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
- *   It's pretty much the same feature wise to longhaul v2, though
- *   there is provision for scaling FSB too, but this doesn't work
- *   too well in practice so we don't even try to use this.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-#include <linux/acpi.h>
-
-#include <asm/msr.h>
-#include <acpi/processor.h>
-
-#include "longhaul.h"
-
-#define PFX "longhaul: "
-
-#define TYPE_LONGHAUL_V1	1
-#define TYPE_LONGHAUL_V2	2
-#define TYPE_POWERSAVER		3
-
-#define	CPU_SAMUEL	1
-#define	CPU_SAMUEL2	2
-#define	CPU_EZRA	3
-#define	CPU_EZRA_T	4
-#define	CPU_NEHEMIAH	5
-#define	CPU_NEHEMIAH_C	6
-
-/* Flags */
-#define USE_ACPI_C3		(1 << 1)
-#define USE_NORTHBRIDGE		(1 << 2)
-
-static int cpu_model;
-static unsigned int numscales = 16;
-static unsigned int fsb;
-
-static const struct mV_pos *vrm_mV_table;
-static const unsigned char *mV_vrm_table;
-
-static unsigned int highest_speed, lowest_speed; /* kHz */
-static unsigned int minmult, maxmult;
-static int can_scale_voltage;
-static struct acpi_processor *pr;
-static struct acpi_processor_cx *cx;
-static u32 acpi_regs_addr;
-static u8 longhaul_flags;
-static unsigned int longhaul_index;
-
-/* Module parameters */
-static int scale_voltage;
-static int disable_acpi_c3;
-static int revid_errata;
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longhaul", msg)
-
-
-/* Clock ratios multiplied by 10 */
-static int mults[32];
-static int eblcr[32];
-static int longhaul_version;
-static struct cpufreq_frequency_table *longhaul_table;
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-static char speedbuffer[8];
-
-static char *print_speed(int speed)
-{
-	if (speed < 1000) {
-		snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
-		return speedbuffer;
-	}
-
-	if (speed%1000 == 0)
-		snprintf(speedbuffer, sizeof(speedbuffer),
-			"%dGHz", speed/1000);
-	else
-		snprintf(speedbuffer, sizeof(speedbuffer),
-			"%d.%dGHz", speed/1000, (speed%1000)/100);
-
-	return speedbuffer;
-}
-#endif
-
-
-static unsigned int calc_speed(int mult)
-{
-	int khz;
-	khz = (mult/10)*fsb;
-	if (mult%10)
-		khz += fsb/2;
-	khz *= 1000;
-	return khz;
-}
-
-
-static int longhaul_get_cpu_mult(void)
-{
-	unsigned long invalue = 0, lo, hi;
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
-	invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
-	if (longhaul_version == TYPE_LONGHAUL_V2 ||
-	    longhaul_version == TYPE_POWERSAVER) {
-		if (lo & (1<<27))
-			invalue += 16;
-	}
-	return eblcr[invalue];
-}
-
-/* For processor with BCR2 MSR */
-
-static void do_longhaul1(unsigned int mults_index)
-{
-	union msr_bcr2 bcr2;
-
-	rdmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Enable software clock multiplier */
-	bcr2.bits.ESOFTBF = 1;
-	bcr2.bits.CLOCKMUL = mults_index & 0xff;
-
-	/* Sync to timer tick */
-	safe_halt();
-	/* Change frequency on next halt or sleep */
-	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Invoke transition */
-	ACPI_FLUSH_CPU_CACHE();
-	halt();
-
-	/* Disable software clock multiplier */
-	local_irq_disable();
-	rdmsrl(MSR_VIA_BCR2, bcr2.val);
-	bcr2.bits.ESOFTBF = 0;
-	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-}
-
-/* For processor with Longhaul MSR */
-
-static void do_powersaver(int cx_address, unsigned int mults_index,
-			  unsigned int dir)
-{
-	union msr_longhaul longhaul;
-	u32 t;
-
-	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	/* Setup new frequency */
-	if (!revid_errata)
-		longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
-	else
-		longhaul.bits.RevisionKey = 0;
-	longhaul.bits.SoftBusRatio = mults_index & 0xf;
-	longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
-	/* Setup new voltage */
-	if (can_scale_voltage)
-		longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
-	/* Sync to timer tick */
-	safe_halt();
-	/* Raise voltage if necessary */
-	if (can_scale_voltage && dir) {
-		longhaul.bits.EnableSoftVID = 1;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-		/* Change voltage */
-		if (!cx_address) {
-			ACPI_FLUSH_CPU_CACHE();
-			halt();
-		} else {
-			ACPI_FLUSH_CPU_CACHE();
-			/* Invoke C3 */
-			inb(cx_address);
-			/* Dummy op - must do something useless after P_LVL3
-			 * read */
-			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		}
-		longhaul.bits.EnableSoftVID = 0;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	}
-
-	/* Change frequency on next halt or sleep */
-	longhaul.bits.EnableSoftBusRatio = 1;
-	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	if (!cx_address) {
-		ACPI_FLUSH_CPU_CACHE();
-		halt();
-	} else {
-		ACPI_FLUSH_CPU_CACHE();
-		/* Invoke C3 */
-		inb(cx_address);
-		/* Dummy op - must do something useless after P_LVL3 read */
-		t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-	}
-	/* Disable bus ratio bit */
-	longhaul.bits.EnableSoftBusRatio = 0;
-	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-
-	/* Reduce voltage if necessary */
-	if (can_scale_voltage && !dir) {
-		longhaul.bits.EnableSoftVID = 1;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-		/* Change voltage */
-		if (!cx_address) {
-			ACPI_FLUSH_CPU_CACHE();
-			halt();
-		} else {
-			ACPI_FLUSH_CPU_CACHE();
-			/* Invoke C3 */
-			inb(cx_address);
-			/* Dummy op - must do something useless after P_LVL3
-			 * read */
-			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		}
-		longhaul.bits.EnableSoftVID = 0;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	}
-}
-
-/**
- * longhaul_set_cpu_frequency()
- * @mults_index : bitpattern of the new multiplier.
- *
- * Sets a new clock ratio.
- */
-
-static void longhaul_setstate(unsigned int table_index)
-{
-	unsigned int mults_index;
-	int speed, mult;
-	struct cpufreq_freqs freqs;
-	unsigned long flags;
-	unsigned int pic1_mask, pic2_mask;
-	u16 bm_status = 0;
-	u32 bm_timeout = 1000;
-	unsigned int dir = 0;
-
-	mults_index = longhaul_table[table_index].index;
-	/* Safety precautions */
-	mult = mults[mults_index & 0x1f];
-	if (mult == -1)
-		return;
-	speed = calc_speed(mult);
-	if ((speed > highest_speed) || (speed < lowest_speed))
-		return;
-	/* Voltage transition before frequency transition? */
-	if (can_scale_voltage && longhaul_index < table_index)
-		dir = 1;
-
-	freqs.old = calc_speed(longhaul_get_cpu_mult());
-	freqs.new = speed;
-	freqs.cpu = 0; /* longhaul.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
-			fsb, mult/10, mult%10, print_speed(speed/1000));
-retry_loop:
-	preempt_disable();
-	local_irq_save(flags);
-
-	pic2_mask = inb(0xA1);
-	pic1_mask = inb(0x21);	/* works on C3. save mask. */
-	outb(0xFF, 0xA1);	/* Overkill */
-	outb(0xFE, 0x21);	/* TMR0 only */
-
-	/* Wait while PCI bus is busy. */
-	if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
-	    || ((pr != NULL) && pr->flags.bm_control))) {
-		bm_status = inw(acpi_regs_addr);
-		bm_status &= 1 << 4;
-		while (bm_status && bm_timeout) {
-			outw(1 << 4, acpi_regs_addr);
-			bm_timeout--;
-			bm_status = inw(acpi_regs_addr);
-			bm_status &= 1 << 4;
-		}
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE) {
-		/* Disable AGP and PCI arbiters */
-		outb(3, 0x22);
-	} else if ((pr != NULL) && pr->flags.bm_control) {
-		/* Disable bus master arbitration */
-		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
-	}
-	switch (longhaul_version) {
-
-	/*
-	 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
-	 * Software controlled multipliers only.
-	 */
-	case TYPE_LONGHAUL_V1:
-		do_longhaul1(mults_index);
-		break;
-
-	/*
-	 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
-	 *
-	 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
-	 * Nehemiah can do FSB scaling too, but this has never been proven
-	 * to work in practice.
-	 */
-	case TYPE_LONGHAUL_V2:
-	case TYPE_POWERSAVER:
-		if (longhaul_flags & USE_ACPI_C3) {
-			/* Don't allow wakeup */
-			acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
-			do_powersaver(cx->address, mults_index, dir);
-		} else {
-			do_powersaver(0, mults_index, dir);
-		}
-		break;
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE) {
-		/* Enable arbiters */
-		outb(0, 0x22);
-	} else if ((pr != NULL) && pr->flags.bm_control) {
-		/* Enable bus master arbitration */
-		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
-	}
-	outb(pic2_mask, 0xA1);	/* restore mask */
-	outb(pic1_mask, 0x21);
-
-	local_irq_restore(flags);
-	preempt_enable();
-
-	freqs.new = calc_speed(longhaul_get_cpu_mult());
-	/* Check if requested frequency is set. */
-	if (unlikely(freqs.new != speed)) {
-		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
-		/* Revision ID = 1 but processor is expecting revision key
-		 * equal to 0. Jumpers at the bottom of processor will change
-		 * multiplier and FSB, but will not change bits in Longhaul
-		 * MSR nor enable voltage scaling. */
-		if (!revid_errata) {
-			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
-						"option.\n");
-			revid_errata = 1;
-			msleep(200);
-			goto retry_loop;
-		}
-		/* Why ACPI C3 sometimes doesn't work is a mystery for me.
-		 * But it does happen. Processor is entering ACPI C3 state,
-		 * but it doesn't change frequency. I tried poking various
-		 * bits in northbridge registers, but without success. */
-		if (longhaul_flags & USE_ACPI_C3) {
-			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
-			longhaul_flags &= ~USE_ACPI_C3;
-			if (revid_errata) {
-				printk(KERN_INFO PFX "Disabling \"Ignore "
-						"Revision ID\" option.\n");
-				revid_errata = 0;
-			}
-			msleep(200);
-			goto retry_loop;
-		}
-		/* This shouldn't happen. Longhaul ver. 2 was reported not
-		 * working on processors without voltage scaling, but with
-		 * RevID = 1. RevID errata will make things right. Just
-		 * to be 100% sure. */
-		if (longhaul_version == TYPE_LONGHAUL_V2) {
-			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
-			longhaul_version = TYPE_LONGHAUL_V1;
-			msleep(200);
-			goto retry_loop;
-		}
-	}
-	/* Report true CPU frequency */
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	if (!bm_timeout)
-		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
-				"idle PCI bus.\n");
-}
-
-/*
- * Centaur decided to make life a little more tricky.
- * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
- * Samuel2 and above have to try and guess what the FSB is.
- * We do this by assuming we booted at maximum multiplier, and interpolate
- * between that value multiplied by possible FSBs and cpu_mhz which
- * was calculated at boot time. Really ugly, but no other way to do this.
- */
-
-#define ROUNDING	0xf
-
-static int guess_fsb(int mult)
-{
-	int speed = cpu_khz / 1000;
-	int i;
-	int speeds[] = { 666, 1000, 1333, 2000 };
-	int f_max, f_min;
-
-	for (i = 0; i < 4; i++) {
-		f_max = ((speeds[i] * mult) + 50) / 100;
-		f_max += (ROUNDING / 2);
-		f_min = f_max - ROUNDING;
-		if ((speed <= f_max) && (speed >= f_min))
-			return speeds[i] / 10;
-	}
-	return 0;
-}
-
-
-static int __cpuinit longhaul_get_ranges(void)
-{
-	unsigned int i, j, k = 0;
-	unsigned int ratio;
-	int mult;
-
-	/* Get current frequency */
-	mult = longhaul_get_cpu_mult();
-	if (mult == -1) {
-		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
-		return -EINVAL;
-	}
-	fsb = guess_fsb(mult);
-	if (fsb == 0) {
-		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
-		return -EINVAL;
-	}
-	/* Get max multiplier - as we always did.
-	 * Longhaul MSR is useful only when voltage scaling is enabled.
-	 * C3 is booting at max anyway. */
-	maxmult = mult;
-	/* Get min multiplier */
-	switch (cpu_model) {
-	case CPU_NEHEMIAH:
-		minmult = 50;
-		break;
-	case CPU_NEHEMIAH_C:
-		minmult = 40;
-		break;
-	default:
-		minmult = 30;
-		break;
-	}
-
-	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
-		 minmult/10, minmult%10, maxmult/10, maxmult%10);
-
-	highest_speed = calc_speed(maxmult);
-	lowest_speed = calc_speed(minmult);
-	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
-		 print_speed(lowest_speed/1000),
-		 print_speed(highest_speed/1000));
-
-	if (lowest_speed == highest_speed) {
-		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
-		return -EINVAL;
-	}
-	if (lowest_speed > highest_speed) {
-		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
-			lowest_speed, highest_speed);
-		return -EINVAL;
-	}
-
-	longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
-			GFP_KERNEL);
-	if (!longhaul_table)
-		return -ENOMEM;
-
-	for (j = 0; j < numscales; j++) {
-		ratio = mults[j];
-		if (ratio == -1)
-			continue;
-		if (ratio > maxmult || ratio < minmult)
-			continue;
-		longhaul_table[k].frequency = calc_speed(ratio);
-		longhaul_table[k].index	= j;
-		k++;
-	}
-	if (k <= 1) {
-		kfree(longhaul_table);
-		return -ENODEV;
-	}
-	/* Sort */
-	for (j = 0; j < k - 1; j++) {
-		unsigned int min_f, min_i;
-		min_f = longhaul_table[j].frequency;
-		min_i = j;
-		for (i = j + 1; i < k; i++) {
-			if (longhaul_table[i].frequency < min_f) {
-				min_f = longhaul_table[i].frequency;
-				min_i = i;
-			}
-		}
-		if (min_i != j) {
-			swap(longhaul_table[j].frequency,
-			     longhaul_table[min_i].frequency);
-			swap(longhaul_table[j].index,
-			     longhaul_table[min_i].index);
-		}
-	}
-
-	longhaul_table[k].frequency = CPUFREQ_TABLE_END;
-
-	/* Find index we are running on */
-	for (j = 0; j < k; j++) {
-		if (mults[longhaul_table[j].index & 0x1f] == mult) {
-			longhaul_index = j;
-			break;
-		}
-	}
-	return 0;
-}
-
-
-static void __cpuinit longhaul_setup_voltagescaling(void)
-{
-	union msr_longhaul longhaul;
-	struct mV_pos minvid, maxvid, vid;
-	unsigned int j, speed, pos, kHz_step, numvscales;
-	int min_vid_speed;
-
-	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	if (!(longhaul.bits.RevisionID & 1)) {
-		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
-		return;
-	}
-
-	if (!longhaul.bits.VRMRev) {
-		printk(KERN_INFO PFX "VRM 8.5\n");
-		vrm_mV_table = &vrm85_mV[0];
-		mV_vrm_table = &mV_vrm85[0];
-	} else {
-		printk(KERN_INFO PFX "Mobile VRM\n");
-		if (cpu_model < CPU_NEHEMIAH)
-			return;
-		vrm_mV_table = &mobilevrm_mV[0];
-		mV_vrm_table = &mV_mobilevrm[0];
-	}
-
-	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
-	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
-
-	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
-					"Voltage scaling disabled.\n",
-					minvid.mV/1000, minvid.mV%1000,
-					maxvid.mV/1000, maxvid.mV%1000);
-		return;
-	}
-
-	if (minvid.mV == maxvid.mV) {
-		printk(KERN_INFO PFX "Claims to support voltage scaling but "
-				"min & max are both %d.%03d. "
-				"Voltage scaling disabled\n",
-				maxvid.mV/1000, maxvid.mV%1000);
-		return;
-	}
-
-	/* How many voltage steps*/
-	numvscales = maxvid.pos - minvid.pos + 1;
-	printk(KERN_INFO PFX
-		"Max VID=%d.%03d  "
-		"Min VID=%d.%03d, "
-		"%d possible voltage scales\n",
-		maxvid.mV/1000, maxvid.mV%1000,
-		minvid.mV/1000, minvid.mV%1000,
-		numvscales);
-
-	/* Calculate max frequency at min voltage */
-	j = longhaul.bits.MinMHzBR;
-	if (longhaul.bits.MinMHzBR4)
-		j += 16;
-	min_vid_speed = eblcr[j];
-	if (min_vid_speed == -1)
-		return;
-	switch (longhaul.bits.MinMHzFSB) {
-	case 0:
-		min_vid_speed *= 13333;
-		break;
-	case 1:
-		min_vid_speed *= 10000;
-		break;
-	case 3:
-		min_vid_speed *= 6666;
-		break;
-	default:
-		return;
-		break;
-	}
-	if (min_vid_speed >= highest_speed)
-		return;
-	/* Calculate kHz for one voltage step */
-	kHz_step = (highest_speed - min_vid_speed) / numvscales;
-
-	j = 0;
-	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
-		speed = longhaul_table[j].frequency;
-		if (speed > min_vid_speed)
-			pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
-		else
-			pos = minvid.pos;
-		longhaul_table[j].index |= mV_vrm_table[pos] << 8;
-		vid = vrm_mV_table[mV_vrm_table[pos]];
-		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
-				speed, j, vid.mV);
-		j++;
-	}
-
-	can_scale_voltage = 1;
-	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
-}
-
-
-static int longhaul_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, longhaul_table);
-}
-
-
-static int longhaul_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq, unsigned int relation)
-{
-	unsigned int table_index = 0;
-	unsigned int i;
-	unsigned int dir = 0;
-	u8 vid, current_vid;
-
-	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
-				relation, &table_index))
-		return -EINVAL;
-
-	/* Don't set same frequency again */
-	if (longhaul_index == table_index)
-		return 0;
-
-	if (!can_scale_voltage)
-		longhaul_setstate(table_index);
-	else {
-		/* On test system voltage transitions exceeding single
-		 * step up or down were turning motherboard off. Both
-		 * "ondemand" and "userspace" are unsafe. C7 is doing
-		 * this in hardware, C3 is old and we need to do this
-		 * in software. */
-		i = longhaul_index;
-		current_vid = (longhaul_table[longhaul_index].index >> 8);
-		current_vid &= 0x1f;
-		if (table_index > longhaul_index)
-			dir = 1;
-		while (i != table_index) {
-			vid = (longhaul_table[i].index >> 8) & 0x1f;
-			if (vid != current_vid) {
-				longhaul_setstate(i);
-				current_vid = vid;
-				msleep(200);
-			}
-			if (dir)
-				i++;
-			else
-				i--;
-		}
-		longhaul_setstate(table_index);
-	}
-	longhaul_index = table_index;
-	return 0;
-}
-
-
-static unsigned int longhaul_get(unsigned int cpu)
-{
-	if (cpu)
-		return 0;
-	return calc_speed(longhaul_get_cpu_mult());
-}
-
-static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
-					  u32 nesting_level,
-					  void *context, void **return_value)
-{
-	struct acpi_device *d;
-
-	if (acpi_bus_get_device(obj_handle, &d))
-		return 0;
-
-	*return_value = acpi_driver_data(d);
-	return 1;
-}
-
-/* VIA don't support PM2 reg, but have something similar */
-static int enable_arbiter_disable(void)
-{
-	struct pci_dev *dev;
-	int status = 1;
-	int reg;
-	u8 pci_cmd;
-
-	/* Find PLE133 host bridge */
-	reg = 0x78;
-	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
-			     NULL);
-	/* Find PM133/VT8605 host bridge */
-	if (dev == NULL)
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_8605_0, NULL);
-	/* Find CLE266 host bridge */
-	if (dev == NULL) {
-		reg = 0x76;
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_862X_0, NULL);
-		/* Find CN400 V-Link host bridge */
-		if (dev == NULL)
-			dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
-	}
-	if (dev != NULL) {
-		/* Enable access to port 0x22 */
-		pci_read_config_byte(dev, reg, &pci_cmd);
-		if (!(pci_cmd & 1<<7)) {
-			pci_cmd |= 1<<7;
-			pci_write_config_byte(dev, reg, pci_cmd);
-			pci_read_config_byte(dev, reg, &pci_cmd);
-			if (!(pci_cmd & 1<<7)) {
-				printk(KERN_ERR PFX
-					"Can't enable access to port 0x22.\n");
-				status = 0;
-			}
-		}
-		pci_dev_put(dev);
-		return status;
-	}
-	return 0;
-}
-
-static int longhaul_setup_southbridge(void)
-{
-	struct pci_dev *dev;
-	u8 pci_cmd;
-
-	/* Find VT8235 southbridge */
-	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
-	if (dev == NULL)
-		/* Find VT8237 southbridge */
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_8237, NULL);
-	if (dev != NULL) {
-		/* Set transition time to max */
-		pci_read_config_byte(dev, 0xec, &pci_cmd);
-		pci_cmd &= ~(1 << 2);
-		pci_write_config_byte(dev, 0xec, pci_cmd);
-		pci_read_config_byte(dev, 0xe4, &pci_cmd);
-		pci_cmd &= ~(1 << 7);
-		pci_write_config_byte(dev, 0xe4, pci_cmd);
-		pci_read_config_byte(dev, 0xe5, &pci_cmd);
-		pci_cmd |= 1 << 7;
-		pci_write_config_byte(dev, 0xe5, pci_cmd);
-		/* Get address of ACPI registers block*/
-		pci_read_config_byte(dev, 0x81, &pci_cmd);
-		if (pci_cmd & 1 << 7) {
-			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
-			acpi_regs_addr &= 0xff00;
-			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
-					acpi_regs_addr);
-		}
-
-		pci_dev_put(dev);
-		return 1;
-	}
-	return 0;
-}
-
-static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	char *cpuname = NULL;
-	int ret;
-	u32 lo, hi;
-
-	/* Check what we have on this motherboard */
-	switch (c->x86_model) {
-	case 6:
-		cpu_model = CPU_SAMUEL;
-		cpuname = "C3 'Samuel' [C5A]";
-		longhaul_version = TYPE_LONGHAUL_V1;
-		memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
-		memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
-		break;
-
-	case 7:
-		switch (c->x86_mask) {
-		case 0:
-			longhaul_version = TYPE_LONGHAUL_V1;
-			cpu_model = CPU_SAMUEL2;
-			cpuname = "C3 'Samuel 2' [C5B]";
-			/* Note, this is not a typo, early Samuel2's had
-			 * Samuel1 ratios. */
-			memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
-			memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
-			break;
-		case 1 ... 15:
-			longhaul_version = TYPE_LONGHAUL_V2;
-			if (c->x86_mask < 8) {
-				cpu_model = CPU_SAMUEL2;
-				cpuname = "C3 'Samuel 2' [C5B]";
-			} else {
-				cpu_model = CPU_EZRA;
-				cpuname = "C3 'Ezra' [C5C]";
-			}
-			memcpy(mults, ezra_mults, sizeof(ezra_mults));
-			memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
-			break;
-		}
-		break;
-
-	case 8:
-		cpu_model = CPU_EZRA_T;
-		cpuname = "C3 'Ezra-T' [C5M]";
-		longhaul_version = TYPE_POWERSAVER;
-		numscales = 32;
-		memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
-		memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
-		break;
-
-	case 9:
-		longhaul_version = TYPE_POWERSAVER;
-		numscales = 32;
-		memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
-		memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
-		switch (c->x86_mask) {
-		case 0 ... 1:
-			cpu_model = CPU_NEHEMIAH;
-			cpuname = "C3 'Nehemiah A' [C5XLOE]";
-			break;
-		case 2 ... 4:
-			cpu_model = CPU_NEHEMIAH;
-			cpuname = "C3 'Nehemiah B' [C5XLOH]";
-			break;
-		case 5 ... 15:
-			cpu_model = CPU_NEHEMIAH_C;
-			cpuname = "C3 'Nehemiah C' [C5P]";
-			break;
-		}
-		break;
-
-	default:
-		cpuname = "Unknown";
-		break;
-	}
-	/* Check Longhaul ver. 2 */
-	if (longhaul_version == TYPE_LONGHAUL_V2) {
-		rdmsr(MSR_VIA_LONGHAUL, lo, hi);
-		if (lo == 0 && hi == 0)
-			/* Looks like MSR isn't present */
-			longhaul_version = TYPE_LONGHAUL_V1;
-	}
-
-	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
-	switch (longhaul_version) {
-	case TYPE_LONGHAUL_V1:
-	case TYPE_LONGHAUL_V2:
-		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
-		break;
-	case TYPE_POWERSAVER:
-		printk(KERN_CONT "Powersaver supported.\n");
-		break;
-	};
-
-	/* Doesn't hurt */
-	longhaul_setup_southbridge();
-
-	/* Find ACPI data for processor */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-				ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
-				NULL, (void *)&pr);
-
-	/* Check ACPI support for C3 state */
-	if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
-		cx = &pr->power.states[ACPI_STATE_C3];
-		if (cx->address > 0 && cx->latency <= 1000)
-			longhaul_flags |= USE_ACPI_C3;
-	}
-	/* Disable if it isn't working */
-	if (disable_acpi_c3)
-		longhaul_flags &= ~USE_ACPI_C3;
-	/* Check if northbridge is friendly */
-	if (enable_arbiter_disable())
-		longhaul_flags |= USE_NORTHBRIDGE;
-
-	/* Check ACPI support for bus master arbiter disable */
-	if (!(longhaul_flags & USE_ACPI_C3
-	     || longhaul_flags & USE_NORTHBRIDGE)
-	    && ((pr == NULL) || !(pr->flags.bm_control))) {
-		printk(KERN_ERR PFX
-			"No ACPI support. Unsupported northbridge.\n");
-		return -ENODEV;
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE)
-		printk(KERN_INFO PFX "Using northbridge support.\n");
-	if (longhaul_flags & USE_ACPI_C3)
-		printk(KERN_INFO PFX "Using ACPI support.\n");
-
-	ret = longhaul_get_ranges();
-	if (ret != 0)
-		return ret;
-
-	if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
-		longhaul_setup_voltagescaling();
-
-	policy->cpuinfo.transition_latency = 200000;	/* nsec */
-	policy->cur = calc_speed(longhaul_get_cpu_mult());
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
-	if (ret)
-		return ret;
-
-	cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
-
-	return 0;
-}
-
-static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static struct freq_attr *longhaul_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver longhaul_driver = {
-	.verify	= longhaul_verify,
-	.target	= longhaul_target,
-	.get	= longhaul_get,
-	.init	= longhaul_cpu_init,
-	.exit	= __devexit_p(longhaul_cpu_exit),
-	.name	= "longhaul",
-	.owner	= THIS_MODULE,
-	.attr	= longhaul_attr,
-};
-
-
-static int __init longhaul_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
-		return -ENODEV;
-
-#ifdef CONFIG_SMP
-	if (num_online_cpus() > 1) {
-		printk(KERN_ERR PFX "More than 1 CPU detected, "
-				"longhaul disabled.\n");
-		return -ENODEV;
-	}
-#endif
-#ifdef CONFIG_X86_IO_APIC
-	if (cpu_has_apic) {
-		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
-				"broken in this configuration.\n");
-		return -ENODEV;
-	}
-#endif
-	switch (c->x86_model) {
-	case 6 ... 9:
-		return cpufreq_register_driver(&longhaul_driver);
-	case 10:
-		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
-	default:
-		;
-	}
-
-	return -ENODEV;
-}
-
-
-static void __exit longhaul_exit(void)
-{
-	int i;
-
-	for (i = 0; i < numscales; i++) {
-		if (mults[i] == maxmult) {
-			longhaul_setstate(i);
-			break;
-		}
-	}
-
-	cpufreq_unregister_driver(&longhaul_driver);
-	kfree(longhaul_table);
-}
-
-/* Even if BIOS is exporting ACPI C3 state, and it is used
- * with success when CPU is idle, this state doesn't
- * trigger frequency transition in some cases. */
-module_param(disable_acpi_c3, int, 0644);
-MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
-/* Change CPU voltage with frequency. Very useful to save
- * power, but most VIA C3 processors aren't supporting it. */
-module_param(scale_voltage, int, 0644);
-MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
-/* Force revision key to 0 for processors which doesn't
- * support voltage scaling, but are introducing itself as
- * such. */
-module_param(revid_errata, int, 0644);
-MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
-MODULE_LICENSE("GPL");
-
-late_initcall(longhaul_init);
-module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
deleted file mode 100644
index cbf48fbca88..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- *  longhaul.h
- *  (C) 2003 Dave Jones.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  VIA-specific information
- */
-
-union msr_bcr2 {
-	struct {
-		unsigned Reseved:19,	// 18:0
-		ESOFTBF:1,		// 19
-		Reserved2:3,		// 22:20
-		CLOCKMUL:4,		// 26:23
-		Reserved3:5;		// 31:27
-	} bits;
-	unsigned long val;
-};
-
-union msr_longhaul {
-	struct {
-		unsigned RevisionID:4,	// 3:0
-		RevisionKey:4,		// 7:4
-		EnableSoftBusRatio:1,	// 8
-		EnableSoftVID:1,	// 9
-		EnableSoftBSEL:1,	// 10
-		Reserved:3,		// 11:13
-		SoftBusRatio4:1,	// 14
-		VRMRev:1,		// 15
-		SoftBusRatio:4,		// 19:16
-		SoftVID:5,		// 24:20
-		Reserved2:3,		// 27:25
-		SoftBSEL:2,		// 29:28
-		Reserved3:2,		// 31:30
-		MaxMHzBR:4,		// 35:32
-		MaximumVID:5,		// 40:36
-		MaxMHzFSB:2,		// 42:41
-		MaxMHzBR4:1,		// 43
-		Reserved4:4,		// 47:44
-		MinMHzBR:4,		// 51:48
-		MinimumVID:5,		// 56:52
-		MinMHzFSB:2,		// 58:57
-		MinMHzBR4:1,		// 59
-		Reserved5:4;		// 63:60
-	} bits;
-	unsigned long long val;
-};
-
-/*
- * Clock ratio tables. Div/Mod by 10 to get ratio.
- * The eblcr values specify the ratio read from the CPU.
- * The mults values specify what to write to the CPU.
- */
-
-/*
- * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
- */
-static const int __cpuinitdata samuel1_mults[16] = {
-	-1, /* 0000 -> RESERVED */
-	30, /* 0001 ->  3.0x */
-	40, /* 0010 ->  4.0x */
-	-1, /* 0011 -> RESERVED */
-	-1, /* 0100 -> RESERVED */
-	35, /* 0101 ->  3.5x */
-	45, /* 0110 ->  4.5x */
-	55, /* 0111 ->  5.5x */
-	60, /* 1000 ->  6.0x */
-	70, /* 1001 ->  7.0x */
-	80, /* 1010 ->  8.0x */
-	50, /* 1011 ->  5.0x */
-	65, /* 1100 ->  6.5x */
-	75, /* 1101 ->  7.5x */
-	-1, /* 1110 -> RESERVED */
-	-1, /* 1111 -> RESERVED */
-};
-
-static const int __cpuinitdata samuel1_eblcr[16] = {
-	50, /* 0000 -> RESERVED */
-	30, /* 0001 ->  3.0x */
-	40, /* 0010 ->  4.0x */
-	-1, /* 0011 -> RESERVED */
-	55, /* 0100 ->  5.5x */
-	35, /* 0101 ->  3.5x */
-	45, /* 0110 ->  4.5x */
-	-1, /* 0111 -> RESERVED */
-	-1, /* 1000 -> RESERVED */
-	70, /* 1001 ->  7.0x */
-	80, /* 1010 ->  8.0x */
-	60, /* 1011 ->  6.0x */
-	-1, /* 1100 -> RESERVED */
-	75, /* 1101 ->  7.5x */
-	-1, /* 1110 -> RESERVED */
-	65, /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 Samuel2 Stepping 1->15
- */
-static const int __cpuinitdata samuel2_eblcr[16] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	110, /* 0111 -> 11.0x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	130, /* 1110 -> 13.0x */
-	65,  /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 Ezra
- */
-static const int __cpuinitdata ezra_mults[16] = {
-	100, /* 0000 -> 10.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 -> 12.0x */
-};
-
-static const int __cpuinitdata ezra_eblcr[16] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 (Ezra-T) [C5M].
- */
-static const int __cpuinitdata ezrat_mults[32] = {
-	100, /* 0000 -> 10.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 ->  12.0x */
-
-	-1,  /* 0000 -> RESERVED (10.0x) */
-	110, /* 0001 -> 11.0x */
-	-1, /* 0010 -> 12.0x */
-	-1,  /* 0011 -> RESERVED (9.0x)*/
-	105, /* 0100 -> 10.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	135, /* 0111 -> 13.5x */
-	140, /* 1000 -> 14.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	130, /* 1011 -> 13.0x */
-	145, /* 1100 -> 14.5x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	-1,  /* 1111 -> RESERVED (12.0x) */
-};
-
-static const int __cpuinitdata ezrat_eblcr[32] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-
-	-1,  /* 0000 -> RESERVED (9.0x) */
-	110, /* 0001 -> 11.0x */
-	120, /* 0010 -> 12.0x */
-	-1,  /* 0011 -> RESERVED (10.0x)*/
-	135, /* 0100 -> 13.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	105, /* 0111 -> 10.5x */
-	130, /* 1000 -> 13.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	140, /* 1011 -> 14.0x */
-	-1,  /* 1100 -> RESERVED (12.0x) */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	145, /* 1111 -> 14.5x */
-};
-
-/*
- * VIA C3 Nehemiah */
-
-static const int __cpuinitdata nehemiah_mults[32] = {
-	100, /* 0000 -> 10.0x */
-	-1, /* 0001 -> 16.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	-1,  /* 0101 ->  RESERVED */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 -> 12.0x */
-	-1, /* 0000 -> 10.0x */
-	110, /* 0001 -> 11.0x */
-	-1, /* 0010 -> 12.0x */
-	-1,  /* 0011 ->  9.0x */
-	105, /* 0100 -> 10.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	135, /* 0111 -> 13.5x */
-	140, /* 1000 -> 14.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	130, /* 1011 -> 13.0x */
-	145, /* 1100 -> 14.5x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	-1, /* 1111 -> 12.0x */
-};
-
-static const int __cpuinitdata nehemiah_eblcr[32] = {
-	50,  /* 0000 ->  5.0x */
-	160, /* 0001 -> 16.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	-1,  /* 0101 ->  RESERVED */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-	90,  /* 0000 ->  9.0x */
-	110, /* 0001 -> 11.0x */
-	120, /* 0010 -> 12.0x */
-	100, /* 0011 -> 10.0x */
-	135, /* 0100 -> 13.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	105, /* 0111 -> 10.5x */
-	130, /* 1000 -> 13.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	140, /* 1011 -> 14.0x */
-	120, /* 1100 -> 12.0x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	145 /* 1111 -> 14.5x */
-};
-
-/*
- * Voltage scales. Div/Mod by 1000 to get actual voltage.
- * Which scale to use depends on the VRM type in use.
- */
-
-struct mV_pos {
-	unsigned short mV;
-	unsigned short pos;
-};
-
-static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
-	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
-	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
-	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
-	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
-	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
-	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
-	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
-	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
-};
-
-static const unsigned char __cpuinitdata mV_vrm85[32] = {
-	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
-	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
-	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
-	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
-};
-
-static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
-	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
-	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
-	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
-	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
-	{975, 15},	{950, 14},	{925, 13},	{900, 12},
-	{875, 11},	{850, 10},	{825, 9},	{800, 8},
-	{775, 7},	{750, 6},	{725, 5},	{700, 4},
-	{675, 3},	{650, 2},	{625, 1},	{600, 0}
-};
-
-static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
-	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
-	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
-	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
-	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
-};
-
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
deleted file mode 100644
index d9f51367666..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/timex.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longrun", msg)
-
-static struct cpufreq_driver	longrun_driver;
-
-/**
- * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
- * values into per cent values. In TMTA microcode, the following is valid:
- * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
- */
-static unsigned int longrun_low_freq, longrun_high_freq;
-
-
-/**
- * longrun_get_policy - get the current LongRun policy
- * @policy: struct cpufreq_policy where current policy is written into
- *
- * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
- * and MSR_TMTA_LONGRUN_CTRL
- */
-static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
-{
-	u32 msr_lo, msr_hi;
-
-	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
-	if (msr_lo & 0x01)
-		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
-	else
-		policy->policy = CPUFREQ_POLICY_POWERSAVE;
-
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
-	msr_lo &= 0x0000007F;
-	msr_hi &= 0x0000007F;
-
-	if (longrun_high_freq <= longrun_low_freq) {
-		/* Assume degenerate Longrun table */
-		policy->min = policy->max = longrun_high_freq;
-	} else {
-		policy->min = longrun_low_freq + msr_lo *
-			((longrun_high_freq - longrun_low_freq) / 100);
-		policy->max = longrun_low_freq + msr_hi *
-			((longrun_high_freq - longrun_low_freq) / 100);
-	}
-	policy->cpu = 0;
-}
-
-
-/**
- * longrun_set_policy - sets a new CPUFreq policy
- * @policy: new policy
- *
- * Sets a new CPUFreq policy on LongRun-capable processors. This function
- * has to be called with cpufreq_driver locked.
- */
-static int longrun_set_policy(struct cpufreq_policy *policy)
-{
-	u32 msr_lo, msr_hi;
-	u32 pctg_lo, pctg_hi;
-
-	if (!policy)
-		return -EINVAL;
-
-	if (longrun_high_freq <= longrun_low_freq) {
-		/* Assume degenerate Longrun table */
-		pctg_lo = pctg_hi = 100;
-	} else {
-		pctg_lo = (policy->min - longrun_low_freq) /
-			((longrun_high_freq - longrun_low_freq) / 100);
-		pctg_hi = (policy->max - longrun_low_freq) /
-			((longrun_high_freq - longrun_low_freq) / 100);
-	}
-
-	if (pctg_hi > 100)
-		pctg_hi = 100;
-	if (pctg_lo > pctg_hi)
-		pctg_lo = pctg_hi;
-
-	/* performance or economy mode */
-	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	msr_lo &= 0xFFFFFFFE;
-	switch (policy->policy) {
-	case CPUFREQ_POLICY_PERFORMANCE:
-		msr_lo |= 0x00000001;
-		break;
-	case CPUFREQ_POLICY_POWERSAVE:
-		break;
-	}
-	wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-
-	/* lower and upper boundary */
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	msr_lo &= 0xFFFFFF80;
-	msr_hi &= 0xFFFFFF80;
-	msr_lo |= pctg_lo;
-	msr_hi |= pctg_hi;
-	wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
-	return 0;
-}
-
-
-/**
- * longrun_verify_poliy - verifies a new CPUFreq policy
- * @policy: the policy to verify
- *
- * Validates a new CPUFreq policy. This function has to be called with
- * cpufreq_driver locked.
- */
-static int longrun_verify_policy(struct cpufreq_policy *policy)
-{
-	if (!policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy,
-		policy->cpuinfo.min_freq,
-		policy->cpuinfo.max_freq);
-
-	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
-	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
-		return -EINVAL;
-
-	return 0;
-}
-
-static unsigned int longrun_get(unsigned int cpu)
-{
-	u32 eax, ebx, ecx, edx;
-
-	if (cpu)
-		return 0;
-
-	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	dprintk("cpuid eax is %u\n", eax);
-
-	return eax * 1000;
-}
-
-/**
- * longrun_determine_freqs - determines the lowest and highest possible core frequency
- * @low_freq: an int to put the lowest frequency into
- * @high_freq: an int to put the highest frequency into
- *
- * Determines the lowest and highest possible core frequencies on this CPU.
- * This is necessary to calculate the performance percentage according to
- * TMTA rules:
- * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
- */
-static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
-						      unsigned int *high_freq)
-{
-	u32 msr_lo, msr_hi;
-	u32 save_lo, save_hi;
-	u32 eax, ebx, ecx, edx;
-	u32 try_hi;
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (!low_freq || !high_freq)
-		return -EINVAL;
-
-	if (cpu_has(c, X86_FEATURE_LRTI)) {
-		/* if the LongRun Table Interface is present, the
-		 * detection is a bit easier:
-		 * For minimum frequency, read out the maximum
-		 * level (msr_hi), write that into "currently
-		 * selected level", and read out the frequency.
-		 * For maximum frequency, read out level zero.
-		 */
-		/* minimum */
-		rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
-		wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
-		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
-		*low_freq = msr_lo * 1000; /* to kHz */
-
-		/* maximum */
-		wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
-		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
-		*high_freq = msr_lo * 1000; /* to kHz */
-
-		dprintk("longrun table interface told %u - %u kHz\n",
-				*low_freq, *high_freq);
-
-		if (*low_freq > *high_freq)
-			*low_freq = *high_freq;
-		return 0;
-	}
-
-	/* set the upper border to the value determined during TSC init */
-	*high_freq = (cpu_khz / 1000);
-	*high_freq = *high_freq * 1000;
-	dprintk("high frequency is %u kHz\n", *high_freq);
-
-	/* get current borders */
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	save_lo = msr_lo & 0x0000007F;
-	save_hi = msr_hi & 0x0000007F;
-
-	/* if current perf_pctg is larger than 90%, we need to decrease the
-	 * upper limit to make the calculation more accurate.
-	 */
-	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	/* try decreasing in 10% steps, some processors react only
-	 * on some barrier values */
-	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
-		/* set to 0 to try_hi perf_pctg */
-		msr_lo &= 0xFFFFFF80;
-		msr_hi &= 0xFFFFFF80;
-		msr_hi |= try_hi;
-		wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
-		/* read out current core MHz and current perf_pctg */
-		cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-
-		/* restore values */
-		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
-	}
-	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
-
-	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
-	 * eqals
-	 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
-	 *
-	 * high_freq * perf_pctg is stored tempoarily into "ebx".
-	 */
-	ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
-
-	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
-		return -EIO;
-
-	edx = ((eax - ebx) * 100) / (100 - ecx);
-	*low_freq = edx * 1000; /* back to kHz */
-
-	dprintk("low frequency is %u kHz\n", *low_freq);
-
-	if (*low_freq > *high_freq)
-		*low_freq = *high_freq;
-
-	return 0;
-}
-
-
-static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
-{
-	int result = 0;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* detect low and high frequency */
-	result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
-	if (result)
-		return result;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = longrun_low_freq;
-	policy->cpuinfo.max_freq = longrun_high_freq;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	longrun_get_policy(policy);
-
-	return 0;
-}
-
-
-static struct cpufreq_driver longrun_driver = {
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.verify		= longrun_verify_policy,
-	.setpolicy	= longrun_set_policy,
-	.get		= longrun_get,
-	.init		= longrun_cpu_init,
-	.name		= "longrun",
-	.owner		= THIS_MODULE,
-};
-
-
-/**
- * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
- *
- * Initializes the LongRun support.
- */
-static int __init longrun_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
-	    !cpu_has(c, X86_FEATURE_LONGRUN))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&longrun_driver);
-}
-
-
-/**
- * longrun_exit - unregisters LongRun support
- */
-static void __exit longrun_exit(void)
-{
-	cpufreq_unregister_driver(&longrun_driver);
-}
-
-
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
-		"Efficeon processors.");
-MODULE_LICENSE("GPL");
-
-module_init(longrun_init);
-module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c
deleted file mode 100644
index 911e193018a..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-
-#include "mperf.h"
-
-static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void read_measured_perf_ctrs(void *_cur)
-{
-	struct aperfmperf *am = _cur;
-
-	get_aperfmperf(am);
-}
-
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-					unsigned int cpu)
-{
-	struct aperfmperf perf;
-	unsigned long ratio;
-	unsigned int retval;
-
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
-		return 0;
-
-	ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
-	per_cpu(acfreq_old_perf, cpu) = perf;
-
-	retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h
deleted file mode 100644
index 5dbf2950dc2..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- *  (c) 2010 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- */
-
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-					unsigned int cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
deleted file mode 100644
index 52c93648e49..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- *	Pentium 4/Xeon CPU on demand clock modulation/speed scaling
- *	(C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- *	(C) 2002 Arjan van de Ven <arjanv@redhat.com>
- *	(C) 2002 Tora T. Engstad
- *	All Rights Reserved
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *      The author(s) of this software shall not be held liable for damages
- *      of any nature resulting due to the use of this software. This
- *      software is provided AS-IS with no warranties.
- *
- *	Date		Errata			Description
- *	20020525	N44, O17	12.5% or 25% DC causes lockup
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/cpumask.h>
-#include <linux/timex.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/timer.h>
-
-#include "speedstep-lib.h"
-
-#define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"p4-clockmod", msg)
-
-/*
- * Duty Cycle (3bits), note DC_DISABLE is not specified in
- * intel docs i just use it to mean disable
- */
-enum {
-	DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
-	DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
-};
-
-#define DC_ENTRIES	8
-
-
-static int has_N44_O17_errata[NR_CPUS];
-static unsigned int stock_freq;
-static struct cpufreq_driver p4clockmod_driver;
-static unsigned int cpufreq_p4_get(unsigned int cpu);
-
-static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
-{
-	u32 l, h;
-
-	if (!cpu_online(cpu) ||
-	    (newstate > DC_DISABLE) || (newstate == DC_RESV))
-		return -EINVAL;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
-
-	if (l & 0x01)
-		dprintk("CPU#%d currently thermal throttled\n", cpu);
-
-	if (has_N44_O17_errata[cpu] &&
-	    (newstate == DC_25PT || newstate == DC_DFLT))
-		newstate = DC_38PT;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
-	if (newstate == DC_DISABLE) {
-		dprintk("CPU#%d disabling modulation\n", cpu);
-		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
-	} else {
-		dprintk("CPU#%d setting duty cycle to %d%%\n",
-			cpu, ((125 * newstate) / 10));
-		/* bits 63 - 5	: reserved
-		 * bit  4	: enable/disable
-		 * bits 3-1	: duty cycle
-		 * bit  0	: reserved
-		 */
-		l = (l & ~14);
-		l = l | (1<<4) | ((newstate & 0x7)<<1);
-		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
-	}
-
-	return 0;
-}
-
-
-static struct cpufreq_frequency_table p4clockmod_table[] = {
-	{DC_RESV, CPUFREQ_ENTRY_INVALID},
-	{DC_DFLT, 0},
-	{DC_25PT, 0},
-	{DC_38PT, 0},
-	{DC_50PT, 0},
-	{DC_64PT, 0},
-	{DC_75PT, 0},
-	{DC_88PT, 0},
-	{DC_DISABLE, 0},
-	{DC_RESV, CPUFREQ_TABLE_END},
-};
-
-
-static int cpufreq_p4_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	unsigned int    newstate = DC_RESV;
-	struct cpufreq_freqs freqs;
-	int i;
-
-	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	freqs.old = cpufreq_p4_get(policy->cpu);
-	freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
-
-	if (freqs.new == freqs.old)
-		return 0;
-
-	/* notifiers */
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	/* run on each logical CPU,
-	 * see section 13.15.3 of IA32 Intel Architecture Software
-	 * Developer's Manual, Volume 3
-	 */
-	for_each_cpu(i, policy->cpus)
-		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
-
-	/* notifiers */
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	return 0;
-}
-
-
-static int cpufreq_p4_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
-}
-
-
-static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
-{
-	if (c->x86 == 0x06) {
-		if (cpu_has(c, X86_FEATURE_EST))
-			printk_once(KERN_WARNING PFX "Warning: EST-capable "
-			       "CPU detected. The acpi-cpufreq module offers "
-			       "voltage scaling in addition to frequency "
-			       "scaling. You should use that instead of "
-			       "p4-clockmod, if possible.\n");
-		switch (c->x86_model) {
-		case 0x0E: /* Core */
-		case 0x0F: /* Core Duo */
-		case 0x16: /* Celeron Core */
-		case 0x1C: /* Atom */
-			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-			return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
-		case 0x0D: /* Pentium M (Dothan) */
-			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-			/* fall through */
-		case 0x09: /* Pentium M (Banias) */
-			return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
-		}
-	}
-
-	if (c->x86 != 0xF)
-		return 0;
-
-	/* on P-4s, the TSC runs with constant frequency independent whether
-	 * throttling is active or not. */
-	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
-		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
-		       "The speedstep-ich or acpi cpufreq modules offer "
-		       "voltage scaling in addition of frequency scaling. "
-		       "You should use either one instead of p4-clockmod, "
-		       "if possible.\n");
-		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
-	}
-
-	return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
-}
-
-
-
-static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
-	int cpuid = 0;
-	unsigned int i;
-
-#ifdef CONFIG_SMP
-	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
-#endif
-
-	/* Errata workaround */
-	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
-	switch (cpuid) {
-	case 0x0f07:
-	case 0x0f0a:
-	case 0x0f11:
-	case 0x0f12:
-		has_N44_O17_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling low frequencies\n");
-	}
-
-	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
-	    c->x86_model < 2) {
-		/* switch to maximum frequency and measure result */
-		cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
-		recalibrate_cpu_khz();
-	}
-	/* get max frequency */
-	stock_freq = cpufreq_p4_get_frequency(c);
-	if (!stock_freq)
-		return -EINVAL;
-
-	/* table init */
-	for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
-			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else
-			p4clockmod_table[i].frequency = (stock_freq * i)/8;
-	}
-	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
-
-	/* cpuinfo and default policy values */
-
-	/* the transition latency is set to be 1 higher than the maximum
-	 * transition latency of the ondemand governor */
-	policy->cpuinfo.transition_latency = 10000001;
-	policy->cur = stock_freq;
-
-	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
-}
-
-
-static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int cpufreq_p4_get(unsigned int cpu)
-{
-	u32 l, h;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
-
-	if (l & 0x10) {
-		l = l >> 1;
-		l &= 0x7;
-	} else
-		l = DC_DISABLE;
-
-	if (l != DC_DISABLE)
-		return stock_freq * l / 8;
-
-	return stock_freq;
-}
-
-static struct freq_attr *p4clockmod_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver p4clockmod_driver = {
-	.verify		= cpufreq_p4_verify,
-	.target		= cpufreq_p4_target,
-	.init		= cpufreq_p4_cpu_init,
-	.exit		= cpufreq_p4_cpu_exit,
-	.get		= cpufreq_p4_get,
-	.name		= "p4-clockmod",
-	.owner		= THIS_MODULE,
-	.attr		= p4clockmod_attr,
-};
-
-
-static int __init cpufreq_p4_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int ret;
-
-	/*
-	 * THERM_CONTROL is architectural for IA32 now, so
-	 * we can rely on the capability checks
-	 */
-	if (c->x86_vendor != X86_VENDOR_INTEL)
-		return -ENODEV;
-
-	if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
-				!test_cpu_cap(c, X86_FEATURE_ACC))
-		return -ENODEV;
-
-	ret = cpufreq_register_driver(&p4clockmod_driver);
-	if (!ret)
-		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
-				"Modulation available\n");
-
-	return ret;
-}
-
-
-static void __exit cpufreq_p4_exit(void)
-{
-	cpufreq_unregister_driver(&p4clockmod_driver);
-}
-
-
-MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
-MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
-MODULE_LICENSE("GPL");
-
-late_initcall(cpufreq_p4_init);
-module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
deleted file mode 100644
index 755a31e0f5b..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- *  pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
- *
- *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
- *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
- *	Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
- *  INFRINGEMENT. See the GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/compiler.h>
-#include <linux/slab.h>
-
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-
-#include <acpi/processor.h>
-
-#define PCC_VERSION 	"1.00.00"
-#define POLL_LOOPS 	300
-
-#define CMD_COMPLETE 	0x1
-#define CMD_GET_FREQ 	0x0
-#define CMD_SET_FREQ 	0x1
-
-#define BUF_SZ		4
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
-					     "pcc-cpufreq", msg)
-
-struct pcc_register_resource {
-	u8 descriptor;
-	u16 length;
-	u8 space_id;
-	u8 bit_width;
-	u8 bit_offset;
-	u8 access_size;
-	u64 address;
-} __attribute__ ((packed));
-
-struct pcc_memory_resource {
-	u8 descriptor;
-	u16 length;
-	u8 space_id;
-	u8 resource_usage;
-	u8 type_specific;
-	u64 granularity;
-	u64 minimum;
-	u64 maximum;
-	u64 translation_offset;
-	u64 address_length;
-} __attribute__ ((packed));
-
-static struct cpufreq_driver pcc_cpufreq_driver;
-
-struct pcc_header {
-	u32 signature;
-	u16 length;
-	u8 major;
-	u8 minor;
-	u32 features;
-	u16 command;
-	u16 status;
-	u32 latency;
-	u32 minimum_time;
-	u32 maximum_time;
-	u32 nominal;
-	u32 throttled_frequency;
-	u32 minimum_frequency;
-};
-
-static void __iomem *pcch_virt_addr;
-static struct pcc_header __iomem *pcch_hdr;
-
-static DEFINE_SPINLOCK(pcc_lock);
-
-static struct acpi_generic_address doorbell;
-
-static u64 doorbell_preserve;
-static u64 doorbell_write;
-
-static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
-			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
-
-struct pcc_cpu {
-	u32 input_offset;
-	u32 output_offset;
-};
-
-static struct pcc_cpu __percpu *pcc_cpu_info;
-
-static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-	return 0;
-}
-
-static inline void pcc_cmd(void)
-{
-	u64 doorbell_value;
-	int i;
-
-	acpi_read(&doorbell_value, &doorbell);
-	acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
-		   &doorbell);
-
-	for (i = 0; i < POLL_LOOPS; i++) {
-		if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
-			break;
-	}
-}
-
-static inline void pcc_clear_mapping(void)
-{
-	if (pcch_virt_addr)
-		iounmap(pcch_virt_addr);
-	pcch_virt_addr = NULL;
-}
-
-static unsigned int pcc_get_freq(unsigned int cpu)
-{
-	struct pcc_cpu *pcc_cpu_data;
-	unsigned int curr_freq;
-	unsigned int freq_limit;
-	u16 status;
-	u32 input_buffer;
-	u32 output_buffer;
-
-	spin_lock(&pcc_lock);
-
-	dprintk("get: get_freq for CPU %d\n", cpu);
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	input_buffer = 0x1;
-	iowrite32(input_buffer,
-			(pcch_virt_addr + pcc_cpu_data->input_offset));
-	iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
-
-	pcc_cmd();
-
-	output_buffer =
-		ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
-
-	/* Clear the input buffer - we are done with the current command */
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-
-	status = ioread16(&pcch_hdr->status);
-	if (status != CMD_COMPLETE) {
-		dprintk("get: FAILED: for CPU %d, status is %d\n",
-			cpu, status);
-		goto cmd_incomplete;
-	}
-	iowrite16(0, &pcch_hdr->status);
-	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
-			/ 100) * 1000);
-
-	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
-		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
-		output_buffer, curr_freq);
-
-	freq_limit = (output_buffer >> 8) & 0xff;
-	if (freq_limit != 0xff) {
-		dprintk("get: frequency for cpu %d is being temporarily"
-			" capped at %d\n", cpu, curr_freq);
-	}
-
-	spin_unlock(&pcc_lock);
-	return curr_freq;
-
-cmd_incomplete:
-	iowrite16(0, &pcch_hdr->status);
-	spin_unlock(&pcc_lock);
-	return 0;
-}
-
-static int pcc_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
-{
-	struct pcc_cpu *pcc_cpu_data;
-	struct cpufreq_freqs freqs;
-	u16 status;
-	u32 input_buffer;
-	int cpu;
-
-	spin_lock(&pcc_lock);
-	cpu = policy->cpu;
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	dprintk("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%x\n",
-		cpu, target_freq,
-		(pcch_virt_addr + pcc_cpu_data->input_offset));
-
-	freqs.new = target_freq;
-	freqs.cpu = cpu;
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	input_buffer = 0x1 | (((target_freq * 100)
-			       / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
-	iowrite32(input_buffer,
-			(pcch_virt_addr + pcc_cpu_data->input_offset));
-	iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
-
-	pcc_cmd();
-
-	/* Clear the input buffer - we are done with the current command */
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-
-	status = ioread16(&pcch_hdr->status);
-	if (status != CMD_COMPLETE) {
-		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
-			cpu, status);
-		goto cmd_incomplete;
-	}
-	iowrite16(0, &pcch_hdr->status);
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
-	spin_unlock(&pcc_lock);
-
-	return 0;
-
-cmd_incomplete:
-	iowrite16(0, &pcch_hdr->status);
-	spin_unlock(&pcc_lock);
-	return -EINVAL;
-}
-
-static int pcc_get_offset(int cpu)
-{
-	acpi_status status;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *pccp, *offset;
-	struct pcc_cpu *pcc_cpu_data;
-	struct acpi_processor *pr;
-	int ret = 0;
-
-	pr = per_cpu(processors, cpu);
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	pccp = buffer.pointer;
-	if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
-		ret = -ENODEV;
-		goto out_free;
-	};
-
-	offset = &(pccp->package.elements[0]);
-	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcc_cpu_data->input_offset = offset->integer.value;
-
-	offset = &(pccp->package.elements[1]);
-	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcc_cpu_data->output_offset = offset->integer.value;
-
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
-
-	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
-		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
-		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
-out_free:
-	kfree(buffer.pointer);
-	return ret;
-}
-
-static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
-{
-	acpi_status status;
-	struct acpi_object_list input;
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object in_params[4];
-	union acpi_object *out_obj;
-	u32 capabilities[2];
-	u32 errors;
-	u32 supported;
-	int ret = 0;
-
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type               = ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length      = 16;
-	in_params[0].buffer.pointer     = OSC_UUID;
-	in_params[1].type               = ACPI_TYPE_INTEGER;
-	in_params[1].integer.value      = 1;
-	in_params[2].type               = ACPI_TYPE_INTEGER;
-	in_params[2].integer.value      = 2;
-	in_params[3].type               = ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length      = 8;
-	in_params[3].buffer.pointer     = (u8 *)&capabilities;
-
-	capabilities[0] = OSC_QUERY_ENABLE;
-	capabilities[1] = 0x1;
-
-	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	if (!output.length)
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	supported = *((u32 *)(out_obj->buffer.pointer + 4));
-	if (!(supported & 0x1)) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	kfree(output.pointer);
-	capabilities[0] = 0x0;
-	capabilities[1] = 0x1;
-
-	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	if (!output.length)
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	supported = *((u32 *)(out_obj->buffer.pointer + 4));
-	if (!(supported & 0x1)) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-out_free:
-	kfree(output.pointer);
-	return ret;
-}
-
-static int __init pcc_cpufreq_probe(void)
-{
-	acpi_status status;
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	struct pcc_memory_resource *mem_resource;
-	struct pcc_register_resource *reg_resource;
-	union acpi_object *out_obj, *member;
-	acpi_handle handle, osc_handle, pcch_handle;
-	int ret = 0;
-
-	status = acpi_get_handle(NULL, "\\_SB", &handle);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_get_handle(handle, "PCCH", &pcch_handle);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_get_handle(handle, "_OSC", &osc_handle);
-	if (ACPI_SUCCESS(status)) {
-		ret = pcc_cpufreq_do_osc(&osc_handle);
-		if (ret)
-			dprintk("probe: _OSC evaluation did not succeed\n");
-		/* Firmware's use of _OSC is optional */
-		ret = 0;
-	}
-
-	status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_PACKAGE) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	member = &out_obj->package.elements[0];
-	if (member->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
-
-	dprintk("probe: mem_resource descriptor: 0x%x,"
-		" length: %d, space_id: %d, resource_usage: %d,"
-		" type_specific: %d, granularity: 0x%llx,"
-		" minimum: 0x%llx, maximum: 0x%llx,"
-		" translation_offset: 0x%llx, address_length: 0x%llx\n",
-		mem_resource->descriptor, mem_resource->length,
-		mem_resource->space_id, mem_resource->resource_usage,
-		mem_resource->type_specific, mem_resource->granularity,
-		mem_resource->minimum, mem_resource->maximum,
-		mem_resource->translation_offset,
-		mem_resource->address_length);
-
-	if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
-					mem_resource->address_length);
-	if (pcch_virt_addr == NULL) {
-		dprintk("probe: could not map shared mem region\n");
-		goto out_free;
-	}
-	pcch_hdr = pcch_virt_addr;
-
-	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	dprintk("probe: PCCH header is at physical address: 0x%llx,"
-		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
-		" supported features: 0x%x, command field: 0x%x,"
-		" status field: 0x%x, nominal latency: %d us\n",
-		mem_resource->minimum, ioread32(&pcch_hdr->signature),
-		ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
-		ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
-		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
-		ioread32(&pcch_hdr->latency));
-
-	dprintk("probe: min time between commands: %d us,"
-		" max time between commands: %d us,"
-		" nominal CPU frequency: %d MHz,"
-		" minimum CPU frequency: %d MHz,"
-		" minimum CPU frequency without throttling: %d MHz\n",
-		ioread32(&pcch_hdr->minimum_time),
-		ioread32(&pcch_hdr->maximum_time),
-		ioread32(&pcch_hdr->nominal),
-		ioread32(&pcch_hdr->throttled_frequency),
-		ioread32(&pcch_hdr->minimum_frequency));
-
-	member = &out_obj->package.elements[1];
-	if (member->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
-
-	doorbell.space_id = reg_resource->space_id;
-	doorbell.bit_width = reg_resource->bit_width;
-	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
-	doorbell.address = reg_resource->address;
-
-	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
-		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
-		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
-		doorbell.access_width, reg_resource->address);
-
-	member = &out_obj->package.elements[2];
-	if (member->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	doorbell_preserve = member->integer.value;
-
-	member = &out_obj->package.elements[3];
-	if (member->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	doorbell_write = member->integer.value;
-
-	dprintk("probe: doorbell_preserve: 0x%llx,"
-		" doorbell_write: 0x%llx\n",
-		doorbell_preserve, doorbell_write);
-
-	pcc_cpu_info = alloc_percpu(struct pcc_cpu);
-	if (!pcc_cpu_info) {
-		ret = -ENOMEM;
-		goto pcch_free;
-	}
-
-	printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
-	       " limits: %d MHz, %d MHz\n", PCC_VERSION,
-	       ioread32(&pcch_hdr->minimum_frequency),
-	       ioread32(&pcch_hdr->nominal));
-	kfree(output.pointer);
-	return ret;
-pcch_free:
-	pcc_clear_mapping();
-out_free:
-	kfree(output.pointer);
-	return ret;
-}
-
-static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-	unsigned int result = 0;
-
-	if (!pcch_virt_addr) {
-		result = -1;
-		goto out;
-	}
-
-	result = pcc_get_offset(cpu);
-	if (result) {
-		dprintk("init: PCCP evaluation failed\n");
-		goto out;
-	}
-
-	policy->max = policy->cpuinfo.max_freq =
-		ioread32(&pcch_hdr->nominal) * 1000;
-	policy->min = policy->cpuinfo.min_freq =
-		ioread32(&pcch_hdr->minimum_frequency) * 1000;
-	policy->cur = pcc_get_freq(cpu);
-
-	if (!policy->cur) {
-		dprintk("init: Unable to get current CPU frequency\n");
-		result = -EINVAL;
-		goto out;
-	}
-
-	dprintk("init: policy->max is %d, policy->min is %d\n",
-		policy->max, policy->min);
-out:
-	return result;
-}
-
-static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-
-static struct cpufreq_driver pcc_cpufreq_driver = {
-	.flags = CPUFREQ_CONST_LOOPS,
-	.get = pcc_get_freq,
-	.verify = pcc_cpufreq_verify,
-	.target = pcc_cpufreq_target,
-	.init = pcc_cpufreq_cpu_init,
-	.exit = pcc_cpufreq_cpu_exit,
-	.name = "pcc-cpufreq",
-	.owner = THIS_MODULE,
-};
-
-static int __init pcc_cpufreq_init(void)
-{
-	int ret;
-
-	if (acpi_disabled)
-		return 0;
-
-	ret = pcc_cpufreq_probe();
-	if (ret) {
-		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
-		return ret;
-	}
-
-	ret = cpufreq_register_driver(&pcc_cpufreq_driver);
-
-	return ret;
-}
-
-static void __exit pcc_cpufreq_exit(void)
-{
-	cpufreq_unregister_driver(&pcc_cpufreq_driver);
-
-	pcc_clear_mapping();
-
-	free_percpu(pcc_cpu_info);
-}
-
-MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
-MODULE_VERSION(PCC_VERSION);
-MODULE_DESCRIPTION("Processor Clocking Control interface driver");
-MODULE_LICENSE("GPL");
-
-late_initcall(pcc_cpufreq_init);
-module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
deleted file mode 100644
index b3379d6a5c5..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne Pänkälä,
- *                 Dominik Brodowski.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/msr.h>
-
-#define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
-					   as it is unused */
-
-#define PFX "powernow-k6: "
-static unsigned int                     busfreq;   /* FSB, in 10 kHz */
-static unsigned int                     max_multiplier;
-
-
-/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
-static struct cpufreq_frequency_table clock_ratio[] = {
-	{45,  /* 000 -> 4.5x */ 0},
-	{50,  /* 001 -> 5.0x */ 0},
-	{40,  /* 010 -> 4.0x */ 0},
-	{55,  /* 011 -> 5.5x */ 0},
-	{20,  /* 100 -> 2.0x */ 0},
-	{30,  /* 101 -> 3.0x */ 0},
-	{60,  /* 110 -> 6.0x */ 0},
-	{35,  /* 111 -> 3.5x */ 0},
-	{0, CPUFREQ_TABLE_END}
-};
-
-
-/**
- * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
- *
- *   Returns the current setting of the frequency multiplier. Core clock
- * speed is frequency of the Front-Side Bus multiplied with this value.
- */
-static int powernow_k6_get_cpu_multiplier(void)
-{
-	u64 invalue = 0;
-	u32 msrval;
-
-	msrval = POWERNOW_IOPORT + 0x1;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
-	msrval = POWERNOW_IOPORT + 0x0;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
-	return clock_ratio[(invalue >> 5)&7].index;
-}
-
-
-/**
- * powernow_k6_set_state - set the PowerNow! multiplier
- * @best_i: clock_ratio[best_i] is the target multiplier
- *
- *   Tries to change the PowerNow! multiplier
- */
-static void powernow_k6_set_state(unsigned int best_i)
-{
-	unsigned long outvalue = 0, invalue = 0;
-	unsigned long msrval;
-	struct cpufreq_freqs freqs;
-
-	if (clock_ratio[best_i].index > max_multiplier) {
-		printk(KERN_ERR PFX "invalid target frequency\n");
-		return;
-	}
-
-	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
-	freqs.new = busfreq * clock_ratio[best_i].index;
-	freqs.cpu = 0; /* powernow-k6.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* we now need to transform best_i to the BVC format, see AMD#23446 */
-
-	outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
-
-	msrval = POWERNOW_IOPORT + 0x1;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
-	invalue = invalue & 0xf;
-	outvalue = outvalue | invalue;
-	outl(outvalue , (POWERNOW_IOPORT + 0x8));
-	msrval = POWERNOW_IOPORT + 0x0;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return;
-}
-
-
-/**
- * powernow_k6_verify - verifies a new CPUfreq policy
- * @policy: new policy
- *
- * Policy must be within lowest and highest possible CPU Frequency,
- * and at least one possible state must be within min and max.
- */
-static int powernow_k6_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
-}
-
-
-/**
- * powernow_k6_setpolicy - sets a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * sets a new CPUFreq policy
- */
-static int powernow_k6_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	powernow_k6_set_state(newstate);
-
-	return 0;
-}
-
-
-static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i, f;
-	int result;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* get frequencies */
-	max_multiplier = powernow_k6_get_cpu_multiplier();
-	busfreq = cpu_khz / max_multiplier;
-
-	/* table init */
-	for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
-		f = clock_ratio[i].index;
-		if (f > max_multiplier)
-			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else
-			clock_ratio[i].frequency = busfreq * f;
-	}
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = 200000;
-	policy->cur = busfreq * max_multiplier;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
-
-	return 0;
-}
-
-
-static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	for (i = 0; i < 8; i++) {
-		if (i == max_multiplier)
-			powernow_k6_set_state(i);
-	}
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int powernow_k6_get(unsigned int cpu)
-{
-	unsigned int ret;
-	ret = (busfreq * powernow_k6_get_cpu_multiplier());
-	return ret;
-}
-
-static struct freq_attr *powernow_k6_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver powernow_k6_driver = {
-	.verify		= powernow_k6_verify,
-	.target		= powernow_k6_target,
-	.init		= powernow_k6_cpu_init,
-	.exit		= powernow_k6_cpu_exit,
-	.get		= powernow_k6_get,
-	.name		= "powernow-k6",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_k6_attr,
-};
-
-
-/**
- * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
- *
- *   Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
- * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
- * on success.
- */
-static int __init powernow_k6_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
-		((c->x86_model != 12) && (c->x86_model != 13)))
-		return -ENODEV;
-
-	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
-		return -EIO;
-	}
-
-	if (cpufreq_register_driver(&powernow_k6_driver)) {
-		release_region(POWERNOW_IOPORT, 16);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
-/**
- * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
- *
- *   Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
- */
-static void __exit powernow_k6_exit(void)
-{
-	cpufreq_unregister_driver(&powernow_k6_driver);
-	release_region(POWERNOW_IOPORT, 16);
-}
-
-
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE("GPL");
-
-module_init(powernow_k6_init);
-module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
deleted file mode 100644
index 4a45fd6e41b..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones on behalf of SuSE Labs.
- *  (C) 2003-2004 Dave Jones <davej@redhat.com>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- * Errata 5:
- *  CPU may fail to execute a FID/VID change in presence of interrupt.
- *  - We cli/sti on stepping A0 CPUs around the FID/VID transition.
- * Errata 15:
- *  CPU with half frequency multipliers may hang upon wakeup from disconnect.
- *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/dmi.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/timer.h>		/* Needed for recalibrate_cpu_khz() */
-#include <asm/msr.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-#endif
-
-#include "powernow-k7.h"
-
-#define PFX "powernow: "
-
-
-struct psb_s {
-	u8 signature[10];
-	u8 tableversion;
-	u8 flags;
-	u16 settlingtime;
-	u8 reserved1;
-	u8 numpst;
-};
-
-struct pst_s {
-	u32 cpuid;
-	u8 fsbspeed;
-	u8 maxfid;
-	u8 startvid;
-	u8 numpstates;
-};
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-union powernow_acpi_control_t {
-	struct {
-		unsigned long fid:5,
-			vid:5,
-			sgtc:20,
-			res1:2;
-	} bits;
-	unsigned long val;
-};
-#endif
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-/* divide by 1000 to get VCore voltage in V. */
-static const int mobile_vid_table[32] = {
-    2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-    1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
-    1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-    1075, 1050, 1025, 1000, 975, 950, 925, 0,
-};
-#endif
-
-/* divide by 10 to get FID. */
-static const int fid_codes[32] = {
-    110, 115, 120, 125, 50, 55, 60, 65,
-    70, 75, 80, 85, 90, 95, 100, 105,
-    30, 190, 40, 200, 130, 135, 140, 210,
-    150, 225, 160, 165, 170, 180, -1, -1,
-};
-
-/* This parameter is used in order to force ACPI instead of legacy method for
- * configuration purpose.
- */
-
-static int acpi_force;
-
-static struct cpufreq_frequency_table *powernow_table;
-
-static unsigned int can_scale_bus;
-static unsigned int can_scale_vid;
-static unsigned int minimum_speed = -1;
-static unsigned int maximum_speed;
-static unsigned int number_scales;
-static unsigned int fsb;
-static unsigned int latency;
-static char have_a0;
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"powernow-k7", msg)
-
-static int check_fsb(unsigned int fsbspeed)
-{
-	int delta;
-	unsigned int f = fsb / 1000;
-
-	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
-	return delta < 5;
-}
-
-static int check_powernow(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	unsigned int maxei, eax, ebx, ecx, edx;
-
-	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
-#ifdef MODULE
-		printk(KERN_INFO PFX "This module only works with "
-				"AMD K7 CPUs\n");
-#endif
-		return 0;
-	}
-
-	/* Get maximum capabilities */
-	maxei = cpuid_eax(0x80000000);
-	if (maxei < 0x80000007) {	/* Any powernow info ? */
-#ifdef MODULE
-		printk(KERN_INFO PFX "No powernow capabilities detected\n");
-#endif
-		return 0;
-	}
-
-	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		printk(KERN_INFO PFX "K7 660[A0] core detected, "
-				"enabling errata workarounds\n");
-		have_a0 = 1;
-	}
-
-	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
-
-	/* Check we can actually do something before we say anything.*/
-	if (!(edx & (1 << 1 | 1 << 2)))
-		return 0;
-
-	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
-
-	if (edx & 1 << 1) {
-		printk("frequency");
-		can_scale_bus = 1;
-	}
-
-	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
-		printk(" and ");
-
-	if (edx & 1 << 2) {
-		printk("voltage");
-		can_scale_vid = 1;
-	}
-
-	printk(".\n");
-	return 1;
-}
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-static void invalidate_entry(unsigned int entry)
-{
-	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
-}
-#endif
-
-static int get_ranges(unsigned char *pst)
-{
-	unsigned int j;
-	unsigned int speed;
-	u8 fid, vid;
-
-	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
-				(number_scales + 1)), GFP_KERNEL);
-	if (!powernow_table)
-		return -ENOMEM;
-
-	for (j = 0 ; j < number_scales; j++) {
-		fid = *pst++;
-
-		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
-		powernow_table[j].index = fid; /* lower 8 bits */
-
-		speed = powernow_table[j].frequency;
-
-		if ((fid_codes[fid] % 10) == 5) {
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-			if (have_a0 == 1)
-				invalidate_entry(j);
-#endif
-		}
-
-		if (speed < minimum_speed)
-			minimum_speed = speed;
-		if (speed > maximum_speed)
-			maximum_speed = speed;
-
-		vid = *pst++;
-		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
-
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
-			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
-			 fid_codes[fid] % 10, speed/1000, vid,
-			 mobile_vid_table[vid]/1000,
-			 mobile_vid_table[vid]%1000);
-	}
-	powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
-	powernow_table[number_scales].index = 0;
-
-	return 0;
-}
-
-
-static void change_FID(int fid)
-{
-	union msr_fidvidctl fidvidctl;
-
-	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	if (fidvidctl.bits.FID != fid) {
-		fidvidctl.bits.SGTC = latency;
-		fidvidctl.bits.FID = fid;
-		fidvidctl.bits.VIDC = 0;
-		fidvidctl.bits.FIDC = 1;
-		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	}
-}
-
-
-static void change_VID(int vid)
-{
-	union msr_fidvidctl fidvidctl;
-
-	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	if (fidvidctl.bits.VID != vid) {
-		fidvidctl.bits.SGTC = latency;
-		fidvidctl.bits.VID = vid;
-		fidvidctl.bits.FIDC = 0;
-		fidvidctl.bits.VIDC = 1;
-		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	}
-}
-
-
-static void change_speed(unsigned int index)
-{
-	u8 fid, vid;
-	struct cpufreq_freqs freqs;
-	union msr_fidvidstatus fidvidstatus;
-	int cfid;
-
-	/* fid are the lower 8 bits of the index we stored into
-	 * the cpufreq frequency table in powernow_decode_bios,
-	 * vid are the upper 8 bits.
-	 */
-
-	fid = powernow_table[index].index & 0xFF;
-	vid = (powernow_table[index].index & 0xFF00) >> 8;
-
-	freqs.cpu = 0;
-
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-	cfid = fidvidstatus.bits.CFID;
-	freqs.old = fsb * fid_codes[cfid] / 10;
-
-	freqs.new = powernow_table[index].frequency;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Now do the magic poking into the MSRs.  */
-
-	if (have_a0 == 1)	/* A0 errata 5 */
-		local_irq_disable();
-
-	if (freqs.old > freqs.new) {
-		/* Going down, so change FID first */
-		change_FID(fid);
-		change_VID(vid);
-	} else {
-		/* Going up, so change VID first */
-		change_VID(vid);
-		change_FID(fid);
-	}
-
-
-	if (have_a0 == 1)
-		local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-}
-
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-
-static struct acpi_processor_performance *acpi_processor_perf;
-
-static int powernow_acpi_init(void)
-{
-	int i;
-	int retval = 0;
-	union powernow_acpi_control_t pc;
-
-	if (acpi_processor_perf != NULL && powernow_table != NULL) {
-		retval = -EINVAL;
-		goto err0;
-	}
-
-	acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
-				      GFP_KERNEL);
-	if (!acpi_processor_perf) {
-		retval = -ENOMEM;
-		goto err0;
-	}
-
-	if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
-								GFP_KERNEL)) {
-		retval = -ENOMEM;
-		goto err05;
-	}
-
-	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
-		retval = -EIO;
-		goto err1;
-	}
-
-	if (acpi_processor_perf->control_register.space_id !=
-			ACPI_ADR_SPACE_FIXED_HARDWARE) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	if (acpi_processor_perf->status_register.space_id !=
-			ACPI_ADR_SPACE_FIXED_HARDWARE) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	number_scales = acpi_processor_perf->state_count;
-
-	if (number_scales < 2) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
-				(number_scales + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		retval = -ENOMEM;
-		goto err2;
-	}
-
-	pc.val = (unsigned long) acpi_processor_perf->states[0].control;
-	for (i = 0; i < number_scales; i++) {
-		u8 fid, vid;
-		struct acpi_processor_px *state =
-			&acpi_processor_perf->states[i];
-		unsigned int speed, speed_mhz;
-
-		pc.val = (unsigned long) state->control;
-		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
-			 i,
-			 (u32) state->core_frequency,
-			 (u32) state->power,
-			 (u32) state->transition_latency,
-			 (u32) state->control,
-			 pc.bits.sgtc);
-
-		vid = pc.bits.vid;
-		fid = pc.bits.fid;
-
-		powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
-		powernow_table[i].index = fid; /* lower 8 bits */
-		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-
-		speed = powernow_table[i].frequency;
-		speed_mhz = speed / 1000;
-
-		/* processor_perflib will multiply the MHz value by 1000 to
-		 * get a KHz value (e.g. 1266000). However, powernow-k7 works
-		 * with true KHz values (e.g. 1266768). To ensure that all
-		 * powernow frequencies are available, we must ensure that
-		 * ACPI doesn't restrict them, so we round up the MHz value
-		 * to ensure that perflib's computed KHz value is greater than
-		 * or equal to powernow's KHz value.
-		 */
-		if (speed % 1000 > 0)
-			speed_mhz++;
-
-		if ((fid_codes[fid] % 10) == 5) {
-			if (have_a0 == 1)
-				invalidate_entry(i);
-		}
-
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
-			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
-			 fid_codes[fid] % 10, speed_mhz, vid,
-			 mobile_vid_table[vid]/1000,
-			 mobile_vid_table[vid]%1000);
-
-		if (state->core_frequency != speed_mhz) {
-			state->core_frequency = speed_mhz;
-			dprintk("   Corrected ACPI frequency to %d\n",
-				speed_mhz);
-		}
-
-		if (latency < pc.bits.sgtc)
-			latency = pc.bits.sgtc;
-
-		if (speed < minimum_speed)
-			minimum_speed = speed;
-		if (speed > maximum_speed)
-			maximum_speed = speed;
-	}
-
-	powernow_table[i].frequency = CPUFREQ_TABLE_END;
-	powernow_table[i].index = 0;
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	return 0;
-
-err2:
-	acpi_processor_unregister_performance(acpi_processor_perf, 0);
-err1:
-	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
-err05:
-	kfree(acpi_processor_perf);
-err0:
-	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
-			"this platform\n");
-	acpi_processor_perf = NULL;
-	return retval;
-}
-#else
-static int powernow_acpi_init(void)
-{
-	printk(KERN_INFO PFX "no support for ACPI processor found."
-	       "  Please recompile your kernel with ACPI processor\n");
-	return -EINVAL;
-}
-#endif
-
-static void print_pst_entry(struct pst_s *pst, unsigned int j)
-{
-	dprintk("PST:%d (@%p)\n", j, pst);
-	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
-		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
-}
-
-static int powernow_decode_bios(int maxfid, int startvid)
-{
-	struct psb_s *psb;
-	struct pst_s *pst;
-	unsigned int i, j;
-	unsigned char *p;
-	unsigned int etuple;
-	unsigned int ret;
-
-	etuple = cpuid_eax(0x80000001);
-
-	for (i = 0xC0000; i < 0xffff0 ; i += 16) {
-
-		p = phys_to_virt(i);
-
-		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			dprintk("Found PSB header at %p\n", p);
-			psb = (struct psb_s *) p;
-			dprintk("Table version: 0x%x\n", psb->tableversion);
-			if (psb->tableversion != 0x12) {
-				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
-						" supported right now\n");
-				return -ENODEV;
-			}
-
-			dprintk("Flags: 0x%x\n", psb->flags);
-			if ((psb->flags & 1) == 0)
-				dprintk("Mobile voltage regulator\n");
-			else
-				dprintk("Desktop voltage regulator\n");
-
-			latency = psb->settlingtime;
-			if (latency < 100) {
-				printk(KERN_INFO PFX "BIOS set settling time "
-						"to %d microseconds. "
-						"Should be at least 100. "
-						"Correcting.\n", latency);
-				latency = 100;
-			}
-			dprintk("Settling Time: %d microseconds.\n",
-					psb->settlingtime);
-			dprintk("Has %d PST tables. (Only dumping ones "
-					"relevant to this CPU).\n",
-					psb->numpst);
-
-			p += sizeof(struct psb_s);
-
-			pst = (struct pst_s *) p;
-
-			for (j = 0; j < psb->numpst; j++) {
-				pst = (struct pst_s *) p;
-				number_scales = pst->numpstates;
-
-				if ((etuple == pst->cpuid) &&
-				    check_fsb(pst->fsbspeed) &&
-				    (maxfid == pst->maxfid) &&
-				    (startvid == pst->startvid)) {
-					print_pst_entry(pst, j);
-					p = (char *)pst + sizeof(struct pst_s);
-					ret = get_ranges(p);
-					return ret;
-				} else {
-					unsigned int k;
-					p = (char *)pst + sizeof(struct pst_s);
-					for (k = 0; k < number_scales; k++)
-						p += 2;
-				}
-			}
-			printk(KERN_INFO PFX "No PST tables match this cpuid "
-					"(0x%x)\n", etuple);
-			printk(KERN_INFO PFX "This is indicative of a broken "
-					"BIOS.\n");
-
-			return -EINVAL;
-		}
-		p++;
-	}
-
-	return -ENODEV;
-}
-
-
-static int powernow_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate;
-
-	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
-				relation, &newstate))
-		return -EINVAL;
-
-	change_speed(newstate);
-
-	return 0;
-}
-
-
-static int powernow_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, powernow_table);
-}
-
-/*
- * We use the fact that the bus frequency is somehow
- * a multiple of 100000/3 khz, then we compute sgtc according
- * to this multiple.
- * That way, we match more how AMD thinks all of that work.
- * We will then get the same kind of behaviour already tested under
- * the "well-known" other OS.
- */
-static int __cpuinit fixup_sgtc(void)
-{
-	unsigned int sgtc;
-	unsigned int m;
-
-	m = fsb / 3333;
-	if ((m % 10) >= 5)
-		m += 5;
-
-	m /= 10;
-
-	sgtc = 100 * m * latency;
-	sgtc = sgtc / 3;
-	if (sgtc > 0xfffff) {
-		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
-		sgtc = 0xfffff;
-	}
-	return sgtc;
-}
-
-static unsigned int powernow_get(unsigned int cpu)
-{
-	union msr_fidvidstatus fidvidstatus;
-	unsigned int cfid;
-
-	if (cpu)
-		return 0;
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-	cfid = fidvidstatus.bits.CFID;
-
-	return fsb * fid_codes[cfid] / 10;
-}
-
-
-static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
-{
-	printk(KERN_WARNING PFX
-		"%s laptop with broken PST tables in BIOS detected.\n",
-		d->ident);
-	printk(KERN_WARNING PFX
-		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
-		"BIOS than 3A71 (01/20/2003)\n");
-	printk(KERN_WARNING PFX
-		"cpufreq scaling has been disabled as a result of this.\n");
-	return 0;
-}
-
-/*
- * Some Athlon laptops have really fucked PST tables.
- * A BIOS update is all that can save them.
- * Mention this, and disable cpufreq.
- */
-static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
-	{
-		.callback = acer_cpufreq_pst,
-		.ident = "Acer Aspire",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
-			DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
-		},
-	},
-	{ }
-};
-
-static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
-{
-	union msr_fidvidstatus fidvidstatus;
-	int result;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-
-	recalibrate_cpu_khz();
-
-	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
-	if (!fsb) {
-		printk(KERN_WARNING PFX "can not determine bus frequency\n");
-		return -EINVAL;
-	}
-	dprintk("FSB: %3dMHz\n", fsb/1000);
-
-	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
-				"Trying ACPI instead\n");
-		result = powernow_acpi_init();
-	} else {
-		result = powernow_decode_bios(fidvidstatus.bits.MFID,
-				fidvidstatus.bits.SVID);
-		if (result) {
-			printk(KERN_INFO PFX "Trying ACPI perflib\n");
-			maximum_speed = 0;
-			minimum_speed = -1;
-			latency = 0;
-			result = powernow_acpi_init();
-			if (result) {
-				printk(KERN_INFO PFX
-					"ACPI and legacy methods failed\n");
-			}
-		} else {
-			/* SGTC use the bus clock as timer */
-			latency = fixup_sgtc();
-			printk(KERN_INFO PFX "SGTC: %d\n", latency);
-		}
-	}
-
-	if (result)
-		return result;
-
-	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
-				minimum_speed/1000, maximum_speed/1000);
-
-	policy->cpuinfo.transition_latency =
-		cpufreq_scale(2000000UL, fsb, latency);
-
-	policy->cur = powernow_get(0);
-
-	cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
-
-	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
-}
-
-static int powernow_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-	if (acpi_processor_perf) {
-		acpi_processor_unregister_performance(acpi_processor_perf, 0);
-		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
-		kfree(acpi_processor_perf);
-	}
-#endif
-
-	kfree(powernow_table);
-	return 0;
-}
-
-static struct freq_attr *powernow_table_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver powernow_driver = {
-	.verify		= powernow_verify,
-	.target		= powernow_target,
-	.get		= powernow_get,
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-	.bios_limit	= acpi_processor_get_bios_limit,
-#endif
-	.init		= powernow_cpu_init,
-	.exit		= powernow_cpu_exit,
-	.name		= "powernow-k7",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_table_attr,
-};
-
-static int __init powernow_init(void)
-{
-	if (check_powernow() == 0)
-		return -ENODEV;
-	return cpufreq_register_driver(&powernow_driver);
-}
-
-
-static void __exit powernow_exit(void)
-{
-	cpufreq_unregister_driver(&powernow_driver);
-}
-
-module_param(acpi_force,  int, 0444);
-MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
-MODULE_LICENSE("GPL");
-
-late_initcall(powernow_init);
-module_exit(powernow_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
deleted file mode 100644
index 35fb4eaf6e1..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  (C) 2003 Dave Jones.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  AMD-specific information
- *
- */
-
-union msr_fidvidctl {
-	struct {
-		unsigned FID:5,			// 4:0
-		reserved1:3,	// 7:5
-		VID:5,			// 12:8
-		reserved2:3,	// 15:13
-		FIDC:1,			// 16
-		VIDC:1,			// 17
-		reserved3:2,	// 19:18
-		FIDCHGRATIO:1,	// 20
-		reserved4:11,	// 31-21
-		SGTC:20,		// 32:51
-		reserved5:12;	// 63:52
-	} bits;
-	unsigned long long val;
-};
-
-union msr_fidvidstatus {
-	struct {
-		unsigned CFID:5,			// 4:0
-		reserved1:3,	// 7:5
-		SFID:5,			// 12:8
-		reserved2:3,	// 15:13
-		MFID:5,			// 20:16
-		reserved3:11,	// 31:21
-		CVID:5,			// 36:32
-		reserved4:3,	// 39:37
-		SVID:5,			// 44:40
-		reserved5:3,	// 47:45
-		MVID:5,			// 52:48
-		reserved6:11;	// 63:53
-	} bits;
-	unsigned long long val;
-};
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
deleted file mode 100644
index 2368e38327b..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ /dev/null
@@ -1,1607 +0,0 @@
-/*
- *   (c) 2003-2010 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- *
- *  Support : mark.langsdorf@amd.com
- *
- *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones on behalf of SuSE Labs
- *  (C) 2004 Dominik Brodowski <linux@brodo.de>
- *  (C) 2004 Pavel Machek <pavel@ucw.cz>
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- *  Valuable input gratefully received from Dave Jones, Pavel Machek,
- *  Dominik Brodowski, Jacob Shin, and others.
- *  Originally developed by Paul Devriendt.
- *  Processor information obtained from Chapter 9 (Power and Thermal Management)
- *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
- *  Opteron Processors" available for download from www.amd.com
- *
- *  Tables for specific CPUs can be inferred from
- *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/cpumask.h>
-#include <linux/sched.h>	/* for current / set_cpus_allowed() */
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/msr.h>
-
-#include <linux/acpi.h>
-#include <linux/mutex.h>
-#include <acpi/processor.h>
-
-#define PFX "powernow-k8: "
-#define VERSION "version 2.20.00"
-#include "powernow-k8.h"
-#include "mperf.h"
-
-/* serialize freq changes  */
-static DEFINE_MUTEX(fidvid_mutex);
-
-static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
-
-static int cpu_family = CPU_OPTERON;
-
-/* core performance boost */
-static bool cpb_capable, cpb_enabled;
-static struct msr __percpu *msrs;
-
-static struct cpufreq_driver cpufreq_amd64_driver;
-
-#ifndef CONFIG_SMP
-static inline const struct cpumask *cpu_core_mask(int cpu)
-{
-	return cpumask_of(0);
-}
-#endif
-
-/* Return a frequency in MHz, given an input fid */
-static u32 find_freq_from_fid(u32 fid)
-{
-	return 800 + (fid * 100);
-}
-
-/* Return a frequency in KHz, given an input fid */
-static u32 find_khz_freq_from_fid(u32 fid)
-{
-	return 1000 * find_freq_from_fid(fid);
-}
-
-static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
-		u32 pstate)
-{
-	return data[pstate].frequency;
-}
-
-/* Return the vco fid for an input fid
- *
- * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
- * only from corresponding high fids. This returns "high" fid corresponding to
- * "low" one.
- */
-static u32 convert_fid_to_vco_fid(u32 fid)
-{
-	if (fid < HI_FID_TABLE_BOTTOM)
-		return 8 + (2 * fid);
-	else
-		return fid;
-}
-
-/*
- * Return 1 if the pending bit is set. Unless we just instructed the processor
- * to transition to a new state, seeing this bit set is really bad news.
- */
-static int pending_bit_stuck(void)
-{
-	u32 lo, hi;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		return 0;
-
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
-}
-
-/*
- * Update the global current fid / vid values from the status msr.
- * Returns 1 on error.
- */
-static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
-{
-	u32 lo, hi;
-	u32 i = 0;
-
-	if (cpu_family == CPU_HW_PSTATE) {
-		rdmsr(MSR_PSTATE_STATUS, lo, hi);
-		i = lo & HW_PSTATE_MASK;
-		data->currpstate = i;
-
-		/*
-		 * a workaround for family 11h erratum 311 might cause
-		 * an "out-of-range Pstate if the core is in Pstate-0
-		 */
-		if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
-			data->currpstate = HW_PSTATE_0;
-
-		return 0;
-	}
-	do {
-		if (i++ > 10000) {
-			dprintk("detected change pending stuck\n");
-			return 1;
-		}
-		rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	} while (lo & MSR_S_LO_CHANGE_PENDING);
-
-	data->currvid = hi & MSR_S_HI_CURRENT_VID;
-	data->currfid = lo & MSR_S_LO_CURRENT_FID;
-
-	return 0;
-}
-
-/* the isochronous relief time */
-static void count_off_irt(struct powernow_k8_data *data)
-{
-	udelay((1 << data->irt) * 10);
-	return;
-}
-
-/* the voltage stabilization time */
-static void count_off_vst(struct powernow_k8_data *data)
-{
-	udelay(data->vstable * VST_UNITS_20US);
-	return;
-}
-
-/* need to init the control msr to a safe value (for each cpu) */
-static void fidvid_msr_init(void)
-{
-	u32 lo, hi;
-	u8 fid, vid;
-
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	vid = hi & MSR_S_HI_CURRENT_VID;
-	fid = lo & MSR_S_LO_CURRENT_FID;
-	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
-	hi = MSR_C_HI_STP_GNT_BENIGN;
-	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
-	wrmsr(MSR_FIDVID_CTL, lo, hi);
-}
-
-/* write the new fid value along with the other control fields to the msr */
-static int write_new_fid(struct powernow_k8_data *data, u32 fid)
-{
-	u32 lo;
-	u32 savevid = data->currvid;
-	u32 i = 0;
-
-	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
-		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
-		return 1;
-	}
-
-	lo = fid;
-	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
-	lo |= MSR_C_LO_INIT_FID_VID;
-
-	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
-		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
-
-	do {
-		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
-		if (i++ > 100) {
-			printk(KERN_ERR PFX
-				"Hardware error - pending bit very stuck - "
-				"no further pstate changes possible\n");
-			return 1;
-		}
-	} while (query_current_values_with_pending_wait(data));
-
-	count_off_irt(data);
-
-	if (savevid != data->currvid) {
-		printk(KERN_ERR PFX
-			"vid change on fid trans, old 0x%x, new 0x%x\n",
-			savevid, data->currvid);
-		return 1;
-	}
-
-	if (fid != data->currfid) {
-		printk(KERN_ERR PFX
-			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
-			data->currfid);
-		return 1;
-	}
-
-	return 0;
-}
-
-/* Write a new vid to the hardware */
-static int write_new_vid(struct powernow_k8_data *data, u32 vid)
-{
-	u32 lo;
-	u32 savefid = data->currfid;
-	int i = 0;
-
-	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
-		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
-		return 1;
-	}
-
-	lo = data->currfid;
-	lo |= (vid << MSR_C_LO_VID_SHIFT);
-	lo |= MSR_C_LO_INIT_FID_VID;
-
-	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
-		vid, lo, STOP_GRANT_5NS);
-
-	do {
-		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
-		if (i++ > 100) {
-			printk(KERN_ERR PFX "internal error - pending bit "
-					"very stuck - no further pstate "
-					"changes possible\n");
-			return 1;
-		}
-	} while (query_current_values_with_pending_wait(data));
-
-	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "fid changed on vid trans, old "
-			"0x%x new 0x%x\n",
-		       savefid, data->currfid);
-		return 1;
-	}
-
-	if (vid != data->currvid) {
-		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
-				"curr 0x%x\n",
-				vid, data->currvid);
-		return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Reduce the vid by the max of step or reqvid.
- * Decreasing vid codes represent increasing voltages:
- * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
- */
-static int decrease_vid_code_by_step(struct powernow_k8_data *data,
-		u32 reqvid, u32 step)
-{
-	if ((data->currvid - reqvid) > step)
-		reqvid = data->currvid - step;
-
-	if (write_new_vid(data, reqvid))
-		return 1;
-
-	count_off_vst(data);
-
-	return 0;
-}
-
-/* Change hardware pstate by single MSR write */
-static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
-{
-	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
-	data->currpstate = pstate;
-	return 0;
-}
-
-/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
-static int transition_fid_vid(struct powernow_k8_data *data,
-		u32 reqfid, u32 reqvid)
-{
-	if (core_voltage_pre_transition(data, reqvid, reqfid))
-		return 1;
-
-	if (core_frequency_transition(data, reqfid))
-		return 1;
-
-	if (core_voltage_post_transition(data, reqvid))
-		return 1;
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
-		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
-				"curr 0x%x 0x%x\n",
-				smp_processor_id(),
-				reqfid, reqvid, data->currfid, data->currvid);
-		return 1;
-	}
-
-	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
-		smp_processor_id(), data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 1 - core voltage transition ... setup voltage */
-static int core_voltage_pre_transition(struct powernow_k8_data *data,
-		u32 reqvid, u32 reqfid)
-{
-	u32 rvosteps = data->rvo;
-	u32 savefid = data->currfid;
-	u32 maxvid, lo, rvomult = 1;
-
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
-		"reqvid 0x%x, rvo 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid, reqvid, data->rvo);
-
-	if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
-		rvomult = 2;
-	rvosteps *= rvomult;
-	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
-	maxvid = 0x1f & (maxvid >> 16);
-	dprintk("ph1 maxvid=0x%x\n", maxvid);
-	if (reqvid < maxvid) /* lower numbers are higher voltages */
-		reqvid = maxvid;
-
-	while (data->currvid > reqvid) {
-		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
-			data->currvid, reqvid);
-		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
-			return 1;
-	}
-
-	while ((rvosteps > 0) &&
-			((rvomult * data->rvo + data->currvid) > reqvid)) {
-		if (data->currvid == maxvid) {
-			rvosteps = 0;
-		} else {
-			dprintk("ph1: changing vid for rvo, req 0x%x\n",
-				data->currvid - 1);
-			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
-				return 1;
-			rvosteps--;
-		}
-	}
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
-				data->currfid);
-		return 1;
-	}
-
-	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 2 - core frequency transition */
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
-{
-	u32 vcoreqfid, vcocurrfid, vcofiddiff;
-	u32 fid_interval, savevid = data->currvid;
-
-	if (data->currfid == reqfid) {
-		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
-				data->currfid);
-		return 0;
-	}
-
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
-		"reqfid 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid, reqfid);
-
-	vcoreqfid = convert_fid_to_vco_fid(reqfid);
-	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
-	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
-	    : vcoreqfid - vcocurrfid;
-
-	if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
-		vcofiddiff = 0;
-
-	while (vcofiddiff > 2) {
-		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
-
-		if (reqfid > data->currfid) {
-			if (data->currfid > LO_FID_TABLE_TOP) {
-				if (write_new_fid(data,
-						data->currfid + fid_interval))
-					return 1;
-			} else {
-				if (write_new_fid
-				    (data,
-				     2 + convert_fid_to_vco_fid(data->currfid)))
-					return 1;
-			}
-		} else {
-			if (write_new_fid(data, data->currfid - fid_interval))
-				return 1;
-		}
-
-		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
-		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
-		    : vcoreqfid - vcocurrfid;
-	}
-
-	if (write_new_fid(data, reqfid))
-		return 1;
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (data->currfid != reqfid) {
-		printk(KERN_ERR PFX
-			"ph2: mismatch, failed fid transition, "
-			"curr 0x%x, req 0x%x\n",
-			data->currfid, reqfid);
-		return 1;
-	}
-
-	if (savevid != data->currvid) {
-		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
-			savevid, data->currvid);
-		return 1;
-	}
-
-	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 3 - core voltage transition flow ... jump to the final vid. */
-static int core_voltage_post_transition(struct powernow_k8_data *data,
-		u32 reqvid)
-{
-	u32 savefid = data->currfid;
-	u32 savereqvid = reqvid;
-
-	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid);
-
-	if (reqvid != data->currvid) {
-		if (write_new_vid(data, reqvid))
-			return 1;
-
-		if (savefid != data->currfid) {
-			printk(KERN_ERR PFX
-			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
-			       savefid, data->currfid);
-			return 1;
-		}
-
-		if (data->currvid != reqvid) {
-			printk(KERN_ERR PFX
-			       "ph3: failed vid transition\n, "
-			       "req 0x%x, curr 0x%x",
-			       reqvid, data->currvid);
-			return 1;
-		}
-	}
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (savereqvid != data->currvid) {
-		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
-		return 1;
-	}
-
-	if (savefid != data->currfid) {
-		dprintk("ph3 failed, currfid changed 0x%x\n",
-			data->currfid);
-		return 1;
-	}
-
-	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-static void check_supported_cpu(void *_rc)
-{
-	u32 eax, ebx, ecx, edx;
-	int *rc = _rc;
-
-	*rc = -ENODEV;
-
-	if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
-		return;
-
-	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
-	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
-		return;
-
-	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
-		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
-		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
-			printk(KERN_INFO PFX
-				"Processor cpuid %x not supported\n", eax);
-			return;
-		}
-
-		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
-		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
-			printk(KERN_INFO PFX
-			       "No frequency change capabilities detected\n");
-			return;
-		}
-
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & P_STATE_TRANSITION_CAPABLE)
-			!= P_STATE_TRANSITION_CAPABLE) {
-			printk(KERN_INFO PFX
-				"Power state transitions not supported\n");
-			return;
-		}
-	} else { /* must be a HW Pstate capable processor */
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
-			cpu_family = CPU_HW_PSTATE;
-		else
-			return;
-	}
-
-	*rc = 0;
-}
-
-static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
-		u8 maxvid)
-{
-	unsigned int j;
-	u8 lastfid = 0xff;
-
-	for (j = 0; j < data->numps; j++) {
-		if (pst[j].vid > LEAST_VID) {
-			printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
-			       j, pst[j].vid);
-			return -EINVAL;
-		}
-		if (pst[j].vid < data->rvo) {
-			/* vid + rvo >= 0 */
-			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (pst[j].vid < maxvid + data->rvo) {
-			/* vid + rvo >= maxvid */
-			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (pst[j].fid > MAX_FID) {
-			printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
-			/* Only first fid is allowed to be in "low" range */
-			printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
-			       "0x%x\n", j, pst[j].fid);
-			return -EINVAL;
-		}
-		if (pst[j].fid < lastfid)
-			lastfid = pst[j].fid;
-	}
-	if (lastfid & 1) {
-		printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
-		return -EINVAL;
-	}
-	if (lastfid > LO_FID_TABLE_TOP)
-		printk(KERN_INFO FW_BUG PFX
-			"first fid not from lo freq table\n");
-
-	return 0;
-}
-
-static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
-		unsigned int entry)
-{
-	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
-}
-
-static void print_basics(struct powernow_k8_data *data)
-{
-	int j;
-	for (j = 0; j < data->numps; j++) {
-		if (data->powernow_table[j].frequency !=
-				CPUFREQ_ENTRY_INVALID) {
-			if (cpu_family == CPU_HW_PSTATE) {
-				printk(KERN_INFO PFX
-					"   %d : pstate %d (%d MHz)\n", j,
-					data->powernow_table[j].index,
-					data->powernow_table[j].frequency/1000);
-			} else {
-				printk(KERN_INFO PFX
-					"fid 0x%x (%d MHz), vid 0x%x\n",
-					data->powernow_table[j].index & 0xff,
-					data->powernow_table[j].frequency/1000,
-					data->powernow_table[j].index >> 8);
-			}
-		}
-	}
-	if (data->batps)
-		printk(KERN_INFO PFX "Only %d pstates on battery\n",
-				data->batps);
-}
-
-static u32 freq_from_fid_did(u32 fid, u32 did)
-{
-	u32 mhz = 0;
-
-	if (boot_cpu_data.x86 == 0x10)
-		mhz = (100 * (fid + 0x10)) >> did;
-	else if (boot_cpu_data.x86 == 0x11)
-		mhz = (100 * (fid + 8)) >> did;
-	else
-		BUG();
-
-	return mhz * 1000;
-}
-
-static int fill_powernow_table(struct powernow_k8_data *data,
-		struct pst_s *pst, u8 maxvid)
-{
-	struct cpufreq_frequency_table *powernow_table;
-	unsigned int j;
-
-	if (data->batps) {
-		/* use ACPI support to get full speed on mains power */
-		printk(KERN_WARNING PFX
-			"Only %d pstates usable (use ACPI driver for full "
-			"range\n", data->batps);
-		data->numps = data->batps;
-	}
-
-	for (j = 1; j < data->numps; j++) {
-		if (pst[j-1].fid >= pst[j].fid) {
-			printk(KERN_ERR PFX "PST out of sequence\n");
-			return -EINVAL;
-		}
-	}
-
-	if (data->numps < 2) {
-		printk(KERN_ERR PFX "no p states to transition\n");
-		return -ENODEV;
-	}
-
-	if (check_pst_table(data, pst, maxvid))
-		return -EINVAL;
-
-	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
-		* (data->numps + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
-		return -ENOMEM;
-	}
-
-	for (j = 0; j < data->numps; j++) {
-		int freq;
-		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
-		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
-		freq = find_khz_freq_from_fid(pst[j].fid);
-		powernow_table[j].frequency = freq;
-	}
-	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
-	powernow_table[data->numps].index = 0;
-
-	if (query_current_values_with_pending_wait(data)) {
-		kfree(powernow_table);
-		return -EIO;
-	}
-
-	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
-	data->powernow_table = powernow_table;
-	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
-		print_basics(data);
-
-	for (j = 0; j < data->numps; j++)
-		if ((pst[j].fid == data->currfid) &&
-		    (pst[j].vid == data->currvid))
-			return 0;
-
-	dprintk("currfid/vid do not match PST, ignoring\n");
-	return 0;
-}
-
-/* Find and validate the PSB/PST table in BIOS. */
-static int find_psb_table(struct powernow_k8_data *data)
-{
-	struct psb_s *psb;
-	unsigned int i;
-	u32 mvs;
-	u8 maxvid;
-	u32 cpst = 0;
-	u32 thiscpuid;
-
-	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
-		/* Scan BIOS looking for the signature. */
-		/* It can not be at ffff0 - it is too big. */
-
-		psb = phys_to_virt(i);
-		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
-			continue;
-
-		dprintk("found PSB header at 0x%p\n", psb);
-
-		dprintk("table vers: 0x%x\n", psb->tableversion);
-		if (psb->tableversion != PSB_VERSION_1_4) {
-			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
-			return -ENODEV;
-		}
-
-		dprintk("flags: 0x%x\n", psb->flags1);
-		if (psb->flags1) {
-			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
-			return -ENODEV;
-		}
-
-		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n",
-				data->vstable);
-
-		dprintk("flags2: 0x%x\n", psb->flags2);
-		data->rvo = psb->flags2 & 3;
-		data->irt = ((psb->flags2) >> 2) & 3;
-		mvs = ((psb->flags2) >> 4) & 3;
-		data->vidmvs = 1 << mvs;
-		data->batps = ((psb->flags2) >> 6) & 3;
-
-		dprintk("ramp voltage offset: %d\n", data->rvo);
-		dprintk("isochronous relief time: %d\n", data->irt);
-		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
-
-		dprintk("numpst: 0x%x\n", psb->num_tables);
-		cpst = psb->num_tables;
-		if ((psb->cpuid == 0x00000fc0) ||
-		    (psb->cpuid == 0x00000fe0)) {
-			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-			if ((thiscpuid == 0x00000fc0) ||
-			    (thiscpuid == 0x00000fe0))
-				cpst = 1;
-		}
-		if (cpst != 1) {
-			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
-			return -ENODEV;
-		}
-
-		data->plllock = psb->plllocktime;
-		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		dprintk("maxfid: 0x%x\n", psb->maxfid);
-		dprintk("maxvid: 0x%x\n", psb->maxvid);
-		maxvid = psb->maxvid;
-
-		data->numps = psb->numps;
-		dprintk("numpstates: 0x%x\n", data->numps);
-		return fill_powernow_table(data,
-				(struct pst_s *)(psb+1), maxvid);
-	}
-	/*
-	 * If you see this message, complain to BIOS manufacturer. If
-	 * he tells you "we do not support Linux" or some similar
-	 * nonsense, remember that Windows 2000 uses the same legacy
-	 * mechanism that the old Linux PSB driver uses. Tell them it
-	 * is broken with Windows 2000.
-	 *
-	 * The reference to the AMD documentation is chapter 9 in the
-	 * BIOS and Kernel Developer's Guide, which is available on
-	 * www.amd.com
-	 */
-	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
-	printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
-		" and Cool'N'Quiet support is enabled in BIOS setup\n");
-	return -ENODEV;
-}
-
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u64 control;
-
-	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
-		return;
-
-	control = data->acpi_data.states[index].control;
-	data->irt = (control >> IRT_SHIFT) & IRT_MASK;
-	data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
-	data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
-	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
-	data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
-	data->vstable = (control >> VST_SHIFT) & VST_MASK;
-}
-
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
-{
-	struct cpufreq_frequency_table *powernow_table;
-	int ret_val = -ENODEV;
-	u64 control, status;
-
-	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		dprintk("register performance failed: bad ACPI data\n");
-		return -EIO;
-	}
-
-	/* verify the data contained in the ACPI structures */
-	if (data->acpi_data.state_count <= 1) {
-		dprintk("No ACPI P-States\n");
-		goto err_out;
-	}
-
-	control = data->acpi_data.control_register.space_id;
-	status = data->acpi_data.status_register.space_id;
-
-	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Invalid control/status registers (%x - %x)\n",
-			control, status);
-		goto err_out;
-	}
-
-	/* fill in data->powernow_table */
-	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
-		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		dprintk("powernow_table memory alloc failure\n");
-		goto err_out;
-	}
-
-	/* fill in data */
-	data->numps = data->acpi_data.state_count;
-	powernow_k8_acpi_pst_values(data, 0);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		ret_val = fill_powernow_table_pstate(data, powernow_table);
-	else
-		ret_val = fill_powernow_table_fidvid(data, powernow_table);
-	if (ret_val)
-		goto err_out_mem;
-
-	powernow_table[data->acpi_data.state_count].frequency =
-		CPUFREQ_TABLE_END;
-	powernow_table[data->acpi_data.state_count].index = 0;
-	data->powernow_table = powernow_table;
-
-	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
-		print_basics(data);
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
-		printk(KERN_ERR PFX
-				"unable to alloc powernow_k8_data cpumask\n");
-		ret_val = -ENOMEM;
-		goto err_out_mem;
-	}
-
-	return 0;
-
-err_out_mem:
-	kfree(powernow_table);
-
-err_out:
-	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
-	/* data->acpi_data.state_count informs us at ->exit()
-	 * whether ACPI was used */
-	data->acpi_data.state_count = 0;
-
-	return ret_val;
-}
-
-static int fill_powernow_table_pstate(struct powernow_k8_data *data,
-		struct cpufreq_frequency_table *powernow_table)
-{
-	int i;
-	u32 hi = 0, lo = 0;
-	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
-	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
-
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		u32 index;
-
-		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
-		if (index > data->max_hw_pstate) {
-			printk(KERN_ERR PFX "invalid pstate %d - "
-					"bad value %d.\n", i, index);
-			printk(KERN_ERR PFX "Please report to BIOS "
-					"manufacturer\n");
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
-		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			dprintk("invalid pstate %d, ignoring\n", index);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		powernow_table[i].index = index;
-
-		/* Frequency may be rounded for these */
-		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
-				 || boot_cpu_data.x86 == 0x11) {
-			powernow_table[i].frequency =
-				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
-		} else
-			powernow_table[i].frequency =
-				data->acpi_data.states[i].core_frequency * 1000;
-	}
-	return 0;
-}
-
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
-		struct cpufreq_frequency_table *powernow_table)
-{
-	int i;
-
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		u32 fid;
-		u32 vid;
-		u32 freq, index;
-		u64 status, control;
-
-		if (data->exttype) {
-			status =  data->acpi_data.states[i].status;
-			fid = status & EXT_FID_MASK;
-			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
-		} else {
-			control =  data->acpi_data.states[i].control;
-			fid = control & FID_MASK;
-			vid = (control >> VID_SHIFT) & VID_MASK;
-		}
-
-		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
-
-		index = fid | (vid<<8);
-		powernow_table[i].index = index;
-
-		freq = find_khz_freq_from_fid(fid);
-		powernow_table[i].frequency = freq;
-
-		/* verify frequency is OK */
-		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", freq);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		/* verify voltage is OK -
-		 * BIOSs are using "off" to indicate invalid */
-		if (vid == VID_OFF) {
-			dprintk("invalid vid %u, ignoring\n", vid);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
-			printk(KERN_INFO PFX "invalid freq entries "
-				"%u kHz vs. %u kHz\n", freq,
-				(unsigned int)
-				(data->acpi_data.states[i].core_frequency
-				 * 1000));
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-	}
-	return 0;
-}
-
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
-{
-	if (data->acpi_data.state_count)
-		acpi_processor_unregister_performance(&data->acpi_data,
-				data->cpu);
-	free_cpumask_var(data->acpi_data.shared_cpu_map);
-}
-
-static int get_transition_latency(struct powernow_k8_data *data)
-{
-	int max_latency = 0;
-	int i;
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		int cur_latency = data->acpi_data.states[i].transition_latency
-			+ data->acpi_data.states[i].bus_master_latency;
-		if (cur_latency > max_latency)
-			max_latency = cur_latency;
-	}
-	if (max_latency == 0) {
-		/*
-		 * Fam 11h and later may return 0 as transition latency. This
-		 * is intended and means "very fast". While cpufreq core and
-		 * governors currently can handle that gracefully, better set it
-		 * to 1 to avoid problems in the future.
-		 */
-		if (boot_cpu_data.x86 < 0x11)
-			printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
-				"latency\n");
-		max_latency = 1;
-	}
-	/* value in usecs, needs to be in nanoseconds */
-	return 1000 * max_latency;
-}
-
-/* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency_fidvid(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u32 fid = 0;
-	u32 vid = 0;
-	int res, i;
-	struct cpufreq_freqs freqs;
-
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
-
-	/* fid/vid correctness check for k8 */
-	/* fid are the lower 8 bits of the index we stored into
-	 * the cpufreq frequency table in find_psb_table, vid
-	 * are the upper 8 bits.
-	 */
-	fid = data->powernow_table[index].index & 0xFF;
-	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
-
-	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if ((data->currvid == vid) && (data->currfid == fid)) {
-		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
-			fid, vid);
-		return 0;
-	}
-
-	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
-		smp_processor_id(), fid, vid);
-	freqs.old = find_khz_freq_from_fid(data->currfid);
-	freqs.new = find_khz_freq_from_fid(fid);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	res = transition_fid_vid(data, fid, vid);
-	freqs.new = find_khz_freq_from_fid(data->currfid);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	return res;
-}
-
-/* Take a frequency, and issue the hardware pstate transition command */
-static int transition_frequency_pstate(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u32 pstate = 0;
-	int res, i;
-	struct cpufreq_freqs freqs;
-
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
-
-	/* get MSR index for hardware pstate transition */
-	pstate = index & HW_PSTATE_MASK;
-	if (pstate > data->max_hw_pstate)
-		return 0;
-	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
-			data->currpstate);
-	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	res = transition_pstate(data, pstate);
-	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	return res;
-}
-
-/* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol,
-		unsigned targfreq, unsigned relation)
-{
-	cpumask_var_t oldmask;
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-	u32 checkfid;
-	u32 checkvid;
-	unsigned int newstate;
-	int ret = -EIO;
-
-	if (!data)
-		return -EINVAL;
-
-	checkfid = data->currfid;
-	checkvid = data->currvid;
-
-	/* only run on specific CPU from here on. */
-	/* This is poor form: use a workqueue or smp_call_function_single */
-	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_copy(oldmask, tsk_cpus_allowed(current));
-	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
-	if (pending_bit_stuck()) {
-		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
-		goto err_out;
-	}
-
-	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
-		pol->cpu, targfreq, pol->min, pol->max, relation);
-
-	if (query_current_values_with_pending_wait(data))
-		goto err_out;
-
-	if (cpu_family != CPU_HW_PSTATE) {
-		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
-		data->currfid, data->currvid);
-
-		if ((checkvid != data->currvid) ||
-		    (checkfid != data->currfid)) {
-			printk(KERN_INFO PFX
-				"error - out of sync, fix 0x%x 0x%x, "
-				"vid 0x%x 0x%x\n",
-				checkfid, data->currfid,
-				checkvid, data->currvid);
-		}
-	}
-
-	if (cpufreq_frequency_table_target(pol, data->powernow_table,
-				targfreq, relation, &newstate))
-		goto err_out;
-
-	mutex_lock(&fidvid_mutex);
-
-	powernow_k8_acpi_pst_values(data, newstate);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		ret = transition_frequency_pstate(data, newstate);
-	else
-		ret = transition_frequency_fidvid(data, newstate);
-	if (ret) {
-		printk(KERN_ERR PFX "transition frequency failed\n");
-		ret = 1;
-		mutex_unlock(&fidvid_mutex);
-		goto err_out;
-	}
-	mutex_unlock(&fidvid_mutex);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
-				newstate);
-	else
-		pol->cur = find_khz_freq_from_fid(data->currfid);
-	ret = 0;
-
-err_out:
-	set_cpus_allowed_ptr(current, oldmask);
-	free_cpumask_var(oldmask);
-	return ret;
-}
-
-/* Driver entry point to verify the policy and range of frequencies */
-static int powernowk8_verify(struct cpufreq_policy *pol)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-
-	if (!data)
-		return -EINVAL;
-
-	return cpufreq_frequency_table_verify(pol, data->powernow_table);
-}
-
-struct init_on_cpu {
-	struct powernow_k8_data *data;
-	int rc;
-};
-
-static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
-{
-	struct init_on_cpu *init_on_cpu = _init_on_cpu;
-
-	if (pending_bit_stuck()) {
-		printk(KERN_ERR PFX "failing init, change pending bit set\n");
-		init_on_cpu->rc = -ENODEV;
-		return;
-	}
-
-	if (query_current_values_with_pending_wait(init_on_cpu->data)) {
-		init_on_cpu->rc = -ENODEV;
-		return;
-	}
-
-	if (cpu_family == CPU_OPTERON)
-		fidvid_msr_init();
-
-	init_on_cpu->rc = 0;
-}
-
-/* per CPU init entry point to the driver */
-static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
-{
-	static const char ACPI_PSS_BIOS_BUG_MSG[] =
-		KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
-		FW_BUG PFX "Try again with latest BIOS.\n";
-	struct powernow_k8_data *data;
-	struct init_on_cpu init_on_cpu;
-	int rc;
-	struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
-
-	if (!cpu_online(pol->cpu))
-		return -ENODEV;
-
-	smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
-	if (rc)
-		return -ENODEV;
-
-	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
-	if (!data) {
-		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
-		return -ENOMEM;
-	}
-
-	data->cpu = pol->cpu;
-	data->currpstate = HW_PSTATE_INVALID;
-
-	if (powernow_k8_cpu_init_acpi(data)) {
-		/*
-		 * Use the PSB BIOS structure. This is only available on
-		 * an UP version, and is deprecated by AMD.
-		 */
-		if (num_online_cpus() != 1) {
-			printk_once(ACPI_PSS_BIOS_BUG_MSG);
-			goto err_out;
-		}
-		if (pol->cpu != 0) {
-			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
-			       "CPU other than CPU0. Complain to your BIOS "
-			       "vendor.\n");
-			goto err_out;
-		}
-		rc = find_psb_table(data);
-		if (rc)
-			goto err_out;
-
-		/* Take a crude guess here.
-		 * That guess was in microseconds, so multiply with 1000 */
-		pol->cpuinfo.transition_latency = (
-			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
-			 ((1 << data->irt) * 30)) * 1000;
-	} else /* ACPI _PSS objects available */
-		pol->cpuinfo.transition_latency = get_transition_latency(data);
-
-	/* only run on specific CPU from here on */
-	init_on_cpu.data = data;
-	smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
-				 &init_on_cpu, 1);
-	rc = init_on_cpu.rc;
-	if (rc != 0)
-		goto err_out_exit_acpi;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
-	else
-		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
-	data->available_cores = pol->cpus;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
-				data->currpstate);
-	else
-		pol->cur = find_khz_freq_from_fid(data->currfid);
-	dprintk("policy current frequency %d kHz\n", pol->cur);
-
-	/* min/max the cpu is capable of */
-	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
-		printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
-		powernow_k8_cpu_exit_acpi(data);
-		kfree(data->powernow_table);
-		kfree(data);
-		return -EINVAL;
-	}
-
-	/* Check for APERF/MPERF support in hardware */
-	if (cpu_has(c, X86_FEATURE_APERFMPERF))
-		cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
-
-	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n",
-				data->currpstate);
-	else
-		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
-			data->currfid, data->currvid);
-
-	per_cpu(powernow_data, pol->cpu) = data;
-
-	return 0;
-
-err_out_exit_acpi:
-	powernow_k8_cpu_exit_acpi(data);
-
-err_out:
-	kfree(data);
-	return -ENODEV;
-}
-
-static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-
-	if (!data)
-		return -EINVAL;
-
-	powernow_k8_cpu_exit_acpi(data);
-
-	cpufreq_frequency_table_put_attr(pol->cpu);
-
-	kfree(data->powernow_table);
-	kfree(data);
-	per_cpu(powernow_data, pol->cpu) = NULL;
-
-	return 0;
-}
-
-static void query_values_on_cpu(void *_err)
-{
-	int *err = _err;
-	struct powernow_k8_data *data = __this_cpu_read(powernow_data);
-
-	*err = query_current_values_with_pending_wait(data);
-}
-
-static unsigned int powernowk8_get(unsigned int cpu)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
-	unsigned int khz = 0;
-	int err;
-
-	if (!data)
-		return 0;
-
-	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
-	if (err)
-		goto out;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		khz = find_khz_freq_from_pstate(data->powernow_table,
-						data->currpstate);
-	else
-		khz = find_khz_freq_from_fid(data->currfid);
-
-
-out:
-	return khz;
-}
-
-static void _cpb_toggle_msrs(bool t)
-{
-	int cpu;
-
-	get_online_cpus();
-
-	rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-	for_each_cpu(cpu, cpu_online_mask) {
-		struct msr *reg = per_cpu_ptr(msrs, cpu);
-		if (t)
-			reg->l &= ~BIT(25);
-		else
-			reg->l |= BIT(25);
-	}
-	wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-	put_online_cpus();
-}
-
-/*
- * Switch on/off core performance boosting.
- *
- * 0=disable
- * 1=enable.
- */
-static void cpb_toggle(bool t)
-{
-	if (!cpb_capable)
-		return;
-
-	if (t && !cpb_enabled) {
-		cpb_enabled = true;
-		_cpb_toggle_msrs(t);
-		printk(KERN_INFO PFX "Core Boosting enabled.\n");
-	} else if (!t && cpb_enabled) {
-		cpb_enabled = false;
-		_cpb_toggle_msrs(t);
-		printk(KERN_INFO PFX "Core Boosting disabled.\n");
-	}
-}
-
-static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
-				 size_t count)
-{
-	int ret = -EINVAL;
-	unsigned long val = 0;
-
-	ret = strict_strtoul(buf, 10, &val);
-	if (!ret && (val == 0 || val == 1) && cpb_capable)
-		cpb_toggle(val);
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
-{
-	return sprintf(buf, "%u\n", cpb_enabled);
-}
-
-#define define_one_rw(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_rw(cpb);
-
-static struct freq_attr *powernow_k8_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	&cpb,
-	NULL,
-};
-
-static struct cpufreq_driver cpufreq_amd64_driver = {
-	.verify		= powernowk8_verify,
-	.target		= powernowk8_target,
-	.bios_limit	= acpi_processor_get_bios_limit,
-	.init		= powernowk8_cpu_init,
-	.exit		= __devexit_p(powernowk8_cpu_exit),
-	.get		= powernowk8_get,
-	.name		= "powernow-k8",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_k8_attr,
-};
-
-/*
- * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
- * cannot block the remaining ones from boosting. On the CPU_UP path we
- * simply keep the boost-disable flag in sync with the current global
- * state.
- */
-static int cpb_notify(struct notifier_block *nb, unsigned long action,
-		      void *hcpu)
-{
-	unsigned cpu = (long)hcpu;
-	u32 lo, hi;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-
-		if (!cpb_enabled) {
-			rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
-			lo |= BIT(25);
-			wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
-		}
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
-		lo &= ~BIT(25);
-		wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
-		break;
-
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cpb_nb = {
-	.notifier_call		= cpb_notify,
-};
-
-/* driver entry point for init */
-static int __cpuinit powernowk8_init(void)
-{
-	unsigned int i, supported_cpus = 0, cpu;
-	int rv;
-
-	for_each_online_cpu(i) {
-		int rc;
-		smp_call_function_single(i, check_supported_cpu, &rc, 1);
-		if (rc == 0)
-			supported_cpus++;
-	}
-
-	if (supported_cpus != num_online_cpus())
-		return -ENODEV;
-
-	printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
-		num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
-
-	if (boot_cpu_has(X86_FEATURE_CPB)) {
-
-		cpb_capable = true;
-
-		msrs = msrs_alloc();
-		if (!msrs) {
-			printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
-			return -ENOMEM;
-		}
-
-		register_cpu_notifier(&cpb_nb);
-
-		rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-		for_each_cpu(cpu, cpu_online_mask) {
-			struct msr *reg = per_cpu_ptr(msrs, cpu);
-			cpb_enabled |= !(!!(reg->l & BIT(25)));
-		}
-
-		printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
-			(cpb_enabled ? "on" : "off"));
-	}
-
-	rv = cpufreq_register_driver(&cpufreq_amd64_driver);
-	if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
-		unregister_cpu_notifier(&cpb_nb);
-		msrs_free(msrs);
-		msrs = NULL;
-	}
-	return rv;
-}
-
-/* driver entry point for term */
-static void __exit powernowk8_exit(void)
-{
-	dprintk("exit\n");
-
-	if (boot_cpu_has(X86_FEATURE_CPB)) {
-		msrs_free(msrs);
-		msrs = NULL;
-
-		unregister_cpu_notifier(&cpb_nb);
-	}
-
-	cpufreq_unregister_driver(&cpufreq_amd64_driver);
-}
-
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
-		"Mark Langsdorf <mark.langsdorf@amd.com>");
-MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
-MODULE_LICENSE("GPL");
-
-late_initcall(powernowk8_init);
-module_exit(powernowk8_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
deleted file mode 100644
index df3529b1c02..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- *  (c) 2003-2006 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- */
-
-enum pstate {
-	HW_PSTATE_INVALID = 0xff,
-	HW_PSTATE_0 = 0,
-	HW_PSTATE_1 = 1,
-	HW_PSTATE_2 = 2,
-	HW_PSTATE_3 = 3,
-	HW_PSTATE_4 = 4,
-	HW_PSTATE_5 = 5,
-	HW_PSTATE_6 = 6,
-	HW_PSTATE_7 = 7,
-};
-
-struct powernow_k8_data {
-	unsigned int cpu;
-
-	u32 numps;  /* number of p-states */
-	u32 batps;  /* number of p-states supported on battery */
-	u32 max_hw_pstate; /* maximum legal hardware pstate */
-
-	/* these values are constant when the PSB is used to determine
-	 * vid/fid pairings, but are modified during the ->target() call
-	 * when ACPI is used */
-	u32 rvo;     /* ramp voltage offset */
-	u32 irt;     /* isochronous relief time */
-	u32 vidmvs;  /* usable value calculated from mvs */
-	u32 vstable; /* voltage stabilization time, units 20 us */
-	u32 plllock; /* pll lock time, units 1 us */
-        u32 exttype; /* extended interface = 1 */
-
-	/* keep track of the current fid / vid or pstate */
-	u32 currvid;
-	u32 currfid;
-	enum pstate currpstate;
-
-	/* the powernow_table includes all frequency and vid/fid pairings:
-	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
-	 * frequency is in kHz */
-	struct cpufreq_frequency_table  *powernow_table;
-
-	/* the acpi table needs to be kept. it's only available if ACPI was
-	 * used to determine valid frequency/vid/fid states */
-	struct acpi_processor_performance acpi_data;
-
-	/* we need to keep track of associated cores, but let cpufreq
-	 * handle hotplug events - so just point at cpufreq pol->cpus
-	 * structure */
-	struct cpumask *available_cores;
-};
-
-/* processor's cpuid instruction support */
-#define CPUID_PROCESSOR_SIGNATURE	1	/* function 1 */
-#define CPUID_XFAM			0x0ff00000	/* extended family */
-#define CPUID_XFAM_K8			0
-#define CPUID_XMOD			0x000f0000	/* extended model */
-#define CPUID_XMOD_REV_MASK		0x000c0000
-#define CPUID_XFAM_10H			0x00100000	/* family 0x10 */
-#define CPUID_USE_XFAM_XMOD		0x00000f00
-#define CPUID_GET_MAX_CAPABILITIES	0x80000000
-#define CPUID_FREQ_VOLT_CAPABILITIES	0x80000007
-#define P_STATE_TRANSITION_CAPABLE	6
-
-/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For     */
-/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and   */
-/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
-/* the register number is placed in ecx, and the data is returned in edx:eax. */
-
-#define MSR_FIDVID_CTL      0xc0010041
-#define MSR_FIDVID_STATUS   0xc0010042
-
-/* Field definitions within the FID VID Low Control MSR : */
-#define MSR_C_LO_INIT_FID_VID     0x00010000
-#define MSR_C_LO_NEW_VID          0x00003f00
-#define MSR_C_LO_NEW_FID          0x0000003f
-#define MSR_C_LO_VID_SHIFT        8
-
-/* Field definitions within the FID VID High Control MSR : */
-#define MSR_C_HI_STP_GNT_TO	  0x000fffff
-
-/* Field definitions within the FID VID Low Status MSR : */
-#define MSR_S_LO_CHANGE_PENDING   0x80000000   /* cleared when completed */
-#define MSR_S_LO_MAX_RAMP_VID     0x3f000000
-#define MSR_S_LO_MAX_FID          0x003f0000
-#define MSR_S_LO_START_FID        0x00003f00
-#define MSR_S_LO_CURRENT_FID      0x0000003f
-
-/* Field definitions within the FID VID High Status MSR : */
-#define MSR_S_HI_MIN_WORKING_VID  0x3f000000
-#define MSR_S_HI_MAX_WORKING_VID  0x003f0000
-#define MSR_S_HI_START_VID        0x00003f00
-#define MSR_S_HI_CURRENT_VID      0x0000003f
-#define MSR_C_HI_STP_GNT_BENIGN	  0x00000001
-
-
-/* Hardware Pstate _PSS and MSR definitions */
-#define USE_HW_PSTATE		0x00000080
-#define HW_PSTATE_MASK 		0x00000007
-#define HW_PSTATE_VALID_MASK 	0x80000000
-#define HW_PSTATE_MAX_MASK	0x000000f0
-#define HW_PSTATE_MAX_SHIFT	4
-#define MSR_PSTATE_DEF_BASE 	0xc0010064 /* base of Pstate MSRs */
-#define MSR_PSTATE_STATUS 	0xc0010063 /* Pstate Status MSR */
-#define MSR_PSTATE_CTRL 	0xc0010062 /* Pstate control MSR */
-#define MSR_PSTATE_CUR_LIMIT	0xc0010061 /* pstate current limit MSR */
-
-/* define the two driver architectures */
-#define CPU_OPTERON 0
-#define CPU_HW_PSTATE 1
-
-
-/*
- * There are restrictions frequencies have to follow:
- * - only 1 entry in the low fid table ( <=1.4GHz )
- * - lowest entry in the high fid table must be >= 2 * the entry in the
- *   low fid table
- * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
- *   in the low fid table
- * - the parts can only step at <= 200 MHz intervals, odd fid values are
- *   supported in revision G and later revisions.
- * - lowest frequency must be >= interprocessor hypertransport link speed
- *   (only applies to MP systems obviously)
- */
-
-/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
-#define LO_FID_TABLE_TOP     7	/* fid values marking the boundary    */
-#define HI_FID_TABLE_BOTTOM  8	/* between the low and high tables    */
-
-#define LO_VCOFREQ_TABLE_TOP    1400	/* corresponding vco frequency values */
-#define HI_VCOFREQ_TABLE_BOTTOM 1600
-
-#define MIN_FREQ_RESOLUTION  200 /* fids jump by 2 matching freq jumps by 200 */
-
-#define MAX_FID 0x2a	/* Spec only gives FID values as far as 5 GHz */
-#define LEAST_VID 0x3e	/* Lowest (numerically highest) useful vid value */
-
-#define MIN_FREQ 800	/* Min and max freqs, per spec */
-#define MAX_FREQ 5000
-
-#define INVALID_FID_MASK 0xffffffc0  /* not a valid fid if these bits are set */
-#define INVALID_VID_MASK 0xffffffc0  /* not a valid vid if these bits are set */
-
-#define VID_OFF 0x3f
-
-#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
-
-#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
-
-#define MAXIMUM_VID_STEPS 1  /* Current cpus only allow a single step of 25mV */
-#define VST_UNITS_20US 20   /* Voltage Stabilization Time is in units of 20us */
-
-/*
- * Most values of interest are encoded in a single field of the _PSS
- * entries: the "control" value.
- */
-
-#define IRT_SHIFT      30
-#define RVO_SHIFT      28
-#define EXT_TYPE_SHIFT 27
-#define PLL_L_SHIFT    20
-#define MVS_SHIFT      18
-#define VST_SHIFT      11
-#define VID_SHIFT       6
-#define IRT_MASK        3
-#define RVO_MASK        3
-#define EXT_TYPE_MASK   1
-#define PLL_L_MASK   0x7f
-#define MVS_MASK        3
-#define VST_MASK     0x7f
-#define VID_MASK     0x1f
-#define FID_MASK     0x1f
-#define EXT_VID_MASK 0x3f
-#define EXT_FID_MASK 0x3f
-
-
-/*
- * Version 1.4 of the PSB table. This table is constructed by BIOS and is
- * to tell the OS's power management driver which VIDs and FIDs are
- * supported by this particular processor.
- * If the data in the PSB / PST is wrong, then this driver will program the
- * wrong values into hardware, which is very likely to lead to a crash.
- */
-
-#define PSB_ID_STRING      "AMDK7PNOW!"
-#define PSB_ID_STRING_LEN  10
-
-#define PSB_VERSION_1_4  0x14
-
-struct psb_s {
-	u8 signature[10];
-	u8 tableversion;
-	u8 flags1;
-	u16 vstable;
-	u8 flags2;
-	u8 num_tables;
-	u32 cpuid;
-	u8 plllocktime;
-	u8 maxfid;
-	u8 maxvid;
-	u8 numps;
-};
-
-/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
-struct pst_s {
-	u8 fid;
-	u8 vid;
-};
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
-static int core_voltage_pre_transition(struct powernow_k8_data *data,
-	u32 reqvid, u32 regfid);
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
-
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
-
-static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
deleted file mode 100644
index 435a996a613..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- *	sc520_freq.c: cpufreq driver for the AMD Elan sc520
- *
- *	Copyright (C) 2005 Sean Young <sean@mess.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	Based on elanfreq.c
- *
- *	2005-03-30: - initial revision
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/msr.h>
-
-#define MMCR_BASE	0xfffef000	/* The default base address */
-#define OFFS_CPUCTL	0x2   /* CPU Control Register */
-
-static __u8 __iomem *cpuctl;
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"sc520_freq", msg)
-#define PFX "sc520_freq: "
-
-static struct cpufreq_frequency_table sc520_freq_table[] = {
-	{0x01,	100000},
-	{0x02,	133000},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
-{
-	u8 clockspeed_reg = *cpuctl;
-
-	switch (clockspeed_reg & 0x03) {
-	default:
-		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
-				"value %02x\n", clockspeed_reg);
-	case 0x01:
-		return 100000;
-	case 0x02:
-		return 133000;
-	}
-}
-
-static void sc520_freq_set_cpu_state(unsigned int state)
-{
-
-	struct cpufreq_freqs	freqs;
-	u8 clockspeed_reg;
-
-	freqs.old = sc520_freq_get_cpu_frequency(0);
-	freqs.new = sc520_freq_table[state].frequency;
-	freqs.cpu = 0; /* AMD Elan is UP */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	dprintk("attempting to set frequency to %i kHz\n",
-			sc520_freq_table[state].frequency);
-
-	local_irq_disable();
-
-	clockspeed_reg = *cpuctl & ~0x03;
-	*cpuctl = clockspeed_reg | sc520_freq_table[state].index;
-
-	local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int sc520_freq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
-}
-
-static int sc520_freq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, sc520_freq_table,
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	sc520_freq_set_cpu_state(newstate);
-
-	return 0;
-}
-
-
-/*
- *	Module init and exit code
- */
-
-static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int result;
-
-	/* capability check */
-	if (c->x86_vendor != X86_VENDOR_AMD ||
-	    c->x86 != 4 || c->x86_model != 9)
-		return -ENODEV;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = 1000000; /* 1ms */
-	policy->cur = sc520_freq_get_cpu_frequency(0);
-
-	result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
-
-	return 0;
-}
-
-
-static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-
-static struct freq_attr *sc520_freq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver sc520_freq_driver = {
-	.get	= sc520_freq_get_cpu_frequency,
-	.verify	= sc520_freq_verify,
-	.target	= sc520_freq_target,
-	.init	= sc520_freq_cpu_init,
-	.exit	= sc520_freq_cpu_exit,
-	.name	= "sc520_freq",
-	.owner	= THIS_MODULE,
-	.attr	= sc520_freq_attr,
-};
-
-
-static int __init sc520_freq_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int err;
-
-	/* Test if we have the right hardware */
-	if (c->x86_vendor != X86_VENDOR_AMD ||
-	    c->x86 != 4 || c->x86_model != 9) {
-		dprintk("no Elan SC520 processor found!\n");
-		return -ENODEV;
-	}
-	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
-	if (!cpuctl) {
-		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
-		return -ENOMEM;
-	}
-
-	err = cpufreq_register_driver(&sc520_freq_driver);
-	if (err)
-		iounmap(cpuctl);
-
-	return err;
-}
-
-
-static void __exit sc520_freq_exit(void)
-{
-	cpufreq_unregister_driver(&sc520_freq_driver);
-	iounmap(cpuctl);
-}
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Sean Young <sean@mess.org>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
-
-module_init(sc520_freq_init);
-module_exit(sc520_freq_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
deleted file mode 100644
index 9b1ff37de46..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
- * M (part of the Centrino chipset).
- *
- * Since the original Pentium M, most new Intel CPUs support Enhanced
- * SpeedStep.
- *
- * Despite the "SpeedStep" in the name, this is almost entirely unlike
- * traditional SpeedStep.
- *
- * Modelled on speedstep.c
- *
- * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/sched.h>	/* current */
-#include <linux/delay.h>
-#include <linux/compiler.h>
-#include <linux/gfp.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-
-#define PFX		"speedstep-centrino: "
-#define MAINTAINER	"cpufreq@vger.kernel.org"
-
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
-#define INTEL_MSR_RANGE	(0xffff)
-
-struct cpu_id
-{
-	__u8	x86;            /* CPU family */
-	__u8	x86_model;	/* model */
-	__u8	x86_mask;	/* stepping */
-};
-
-enum {
-	CPU_BANIAS,
-	CPU_DOTHAN_A1,
-	CPU_DOTHAN_A2,
-	CPU_DOTHAN_B0,
-	CPU_MP4HT_D0,
-	CPU_MP4HT_E0,
-};
-
-static const struct cpu_id cpu_ids[] = {
-	[CPU_BANIAS]	= { 6,  9, 5 },
-	[CPU_DOTHAN_A1]	= { 6, 13, 1 },
-	[CPU_DOTHAN_A2]	= { 6, 13, 2 },
-	[CPU_DOTHAN_B0]	= { 6, 13, 6 },
-	[CPU_MP4HT_D0]	= {15,  3, 4 },
-	[CPU_MP4HT_E0]	= {15,  4, 1 },
-};
-#define N_IDS	ARRAY_SIZE(cpu_ids)
-
-struct cpu_model
-{
-	const struct cpu_id *cpu_id;
-	const char	*model_name;
-	unsigned	max_freq; /* max clock in kHz */
-
-	struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
-};
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
-				  const struct cpu_id *x);
-
-/* Operating points for current CPU */
-static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
-static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
-
-static struct cpufreq_driver centrino_driver;
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
-
-/* Computes the correct form for IA32_PERF_CTL MSR for a particular
-   frequency/voltage operating point; frequency in MHz, volts in mV.
-   This is stored as "index" in the structure. */
-#define OP(mhz, mv)							\
-	{								\
-		.frequency = (mhz) * 1000,				\
-		.index = (((mhz)/100) << 8) | ((mv - 700) / 16)		\
-	}
-
-/*
- * These voltage tables were derived from the Intel Pentium M
- * datasheet, document 25261202.pdf, Table 5.  I have verified they
- * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
- * M.
- */
-
-/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
-static struct cpufreq_frequency_table banias_900[] =
-{
-	OP(600,  844),
-	OP(800,  988),
-	OP(900, 1004),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
-static struct cpufreq_frequency_table banias_1000[] =
-{
-	OP(600,   844),
-	OP(800,   972),
-	OP(900,   988),
-	OP(1000, 1004),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
-static struct cpufreq_frequency_table banias_1100[] =
-{
-	OP( 600,  956),
-	OP( 800, 1020),
-	OP( 900, 1100),
-	OP(1000, 1164),
-	OP(1100, 1180),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-
-/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
-static struct cpufreq_frequency_table banias_1200[] =
-{
-	OP( 600,  956),
-	OP( 800, 1004),
-	OP( 900, 1020),
-	OP(1000, 1100),
-	OP(1100, 1164),
-	OP(1200, 1180),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.30GHz (Banias) */
-static struct cpufreq_frequency_table banias_1300[] =
-{
-	OP( 600,  956),
-	OP( 800, 1260),
-	OP(1000, 1292),
-	OP(1200, 1356),
-	OP(1300, 1388),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.40GHz (Banias) */
-static struct cpufreq_frequency_table banias_1400[] =
-{
-	OP( 600,  956),
-	OP( 800, 1180),
-	OP(1000, 1308),
-	OP(1200, 1436),
-	OP(1400, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.50GHz (Banias) */
-static struct cpufreq_frequency_table banias_1500[] =
-{
-	OP( 600,  956),
-	OP( 800, 1116),
-	OP(1000, 1228),
-	OP(1200, 1356),
-	OP(1400, 1452),
-	OP(1500, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.60GHz (Banias) */
-static struct cpufreq_frequency_table banias_1600[] =
-{
-	OP( 600,  956),
-	OP( 800, 1036),
-	OP(1000, 1164),
-	OP(1200, 1276),
-	OP(1400, 1420),
-	OP(1600, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.70GHz (Banias) */
-static struct cpufreq_frequency_table banias_1700[] =
-{
-	OP( 600,  956),
-	OP( 800, 1004),
-	OP(1000, 1116),
-	OP(1200, 1228),
-	OP(1400, 1308),
-	OP(1700, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-#undef OP
-
-#define _BANIAS(cpuid, max, name)	\
-{	.cpu_id		= cpuid,	\
-	.model_name	= "Intel(R) Pentium(R) M processor " name "MHz", \
-	.max_freq	= (max)*1000,	\
-	.op_points	= banias_##max,	\
-}
-#define BANIAS(max)	_BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
-
-/* CPU models, their operating frequency range, and freq/voltage
-   operating points */
-static struct cpu_model models[] =
-{
-	_BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
-	BANIAS(1000),
-	BANIAS(1100),
-	BANIAS(1200),
-	BANIAS(1300),
-	BANIAS(1400),
-	BANIAS(1500),
-	BANIAS(1600),
-	BANIAS(1700),
-
-	/* NULL model_name is a wildcard */
-	{ &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
-	{ &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
-	{ &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
-	{ &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
-	{ &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
-
-	{ NULL, }
-};
-#undef _BANIAS
-#undef BANIAS
-
-static int centrino_cpu_init_table(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
-	struct cpu_model *model;
-
-	for(model = models; model->cpu_id != NULL; model++)
-		if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
-		    (model->model_name == NULL ||
-		     strcmp(cpu->x86_model_id, model->model_name) == 0))
-			break;
-
-	if (model->cpu_id == NULL) {
-		/* No match at all */
-		dprintk("no support for CPU model \"%s\": "
-		       "send /proc/cpuinfo to " MAINTAINER "\n",
-		       cpu->x86_model_id);
-		return -ENOENT;
-	}
-
-	if (model->op_points == NULL) {
-		/* Matched a non-match */
-		dprintk("no table support for CPU model \"%s\"\n",
-		       cpu->x86_model_id);
-		dprintk("try using the acpi-cpufreq driver\n");
-		return -ENOENT;
-	}
-
-	per_cpu(centrino_model, policy->cpu) = model;
-
-	dprintk("found \"%s\": max frequency: %dkHz\n",
-	       model->model_name, model->max_freq);
-
-	return 0;
-}
-
-#else
-static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
-{
-	return -ENODEV;
-}
-#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
-
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
-				  const struct cpu_id *x)
-{
-	if ((c->x86 == x->x86) &&
-	    (c->x86_model == x->x86_model) &&
-	    (c->x86_mask == x->x86_mask))
-		return 1;
-	return 0;
-}
-
-/* To be called only after centrino_model is initialized */
-static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
-{
-	int i;
-
-	/*
-	 * Extract clock in kHz from PERF_CTL value
-	 * for centrino, as some DSDTs are buggy.
-	 * Ideally, this can be done using the acpi_data structure.
-	 */
-	if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
-	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
-	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
-		msr = (msr >> 8) & 0xff;
-		return msr * 100000;
-	}
-
-	if ((!per_cpu(centrino_model, cpu)) ||
-	    (!per_cpu(centrino_model, cpu)->op_points))
-		return 0;
-
-	msr &= 0xffff;
-	for (i = 0;
-		per_cpu(centrino_model, cpu)->op_points[i].frequency
-							!= CPUFREQ_TABLE_END;
-	     i++) {
-		if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
-			return per_cpu(centrino_model, cpu)->
-							op_points[i].frequency;
-	}
-	if (failsafe)
-		return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
-	else
-		return 0;
-}
-
-/* Return the current CPU frequency in kHz */
-static unsigned int get_cur_freq(unsigned int cpu)
-{
-	unsigned l, h;
-	unsigned clock_freq;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
-	clock_freq = extract_clock(l, cpu, 0);
-
-	if (unlikely(clock_freq == 0)) {
-		/*
-		 * On some CPUs, we can see transient MSR values (which are
-		 * not present in _PSS), while CPU is doing some automatic
-		 * P-state transition (like TM2). Get the last freq set 
-		 * in PERF_CTL.
-		 */
-		rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
-		clock_freq = extract_clock(l, cpu, 1);
-	}
-	return clock_freq;
-}
-
-
-static int centrino_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
-	unsigned freq;
-	unsigned l, h;
-	int ret;
-	int i;
-
-	/* Only Intel makes Enhanced Speedstep-capable CPUs */
-	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
-	    !cpu_has(cpu, X86_FEATURE_EST))
-		return -ENODEV;
-
-	if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
-		centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	for (i = 0; i < N_IDS; i++)
-		if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
-			break;
-
-	if (i != N_IDS)
-		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
-
-	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		dprintk("found unsupported CPU with "
-		"Enhanced SpeedStep: send /proc/cpuinfo to "
-		MAINTAINER "\n");
-		return -ENODEV;
-	}
-
-	if (centrino_cpu_init_table(policy)) {
-		return -ENODEV;
-	}
-
-	/* Check to see if Enhanced SpeedStep is enabled, and try to
-	   enable it if not. */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-
-	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
-		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
-
-		/* check to see if it stuck */
-		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO PFX
-				"couldn't enable Enhanced SpeedStep\n");
-			return -ENODEV;
-		}
-	}
-
-	freq = get_cur_freq(policy->cpu);
-	policy->cpuinfo.transition_latency = 10000;
-						/* 10uS transition latency */
-	policy->cur = freq;
-
-	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
-
-	ret = cpufreq_frequency_table_cpuinfo(policy,
-		per_cpu(centrino_model, policy->cpu)->op_points);
-	if (ret)
-		return (ret);
-
-	cpufreq_frequency_table_get_attr(
-		per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
-
-	return 0;
-}
-
-static int centrino_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-
-	if (!per_cpu(centrino_model, cpu))
-		return -ENODEV;
-
-	cpufreq_frequency_table_put_attr(cpu);
-
-	per_cpu(centrino_model, cpu) = NULL;
-
-	return 0;
-}
-
-/**
- * centrino_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within this model's frequency range at least one
- * border included.
- */
-static int centrino_verify (struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy,
-			per_cpu(centrino_model, policy->cpu)->op_points);
-}
-
-/**
- * centrino_setpolicy - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int centrino_target (struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int    newstate = 0;
-	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
-	struct cpufreq_freqs	freqs;
-	int			retval = 0;
-	unsigned int		j, k, first_cpu, tmp;
-	cpumask_var_t covered_cpus;
-
-	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
-		return -ENOMEM;
-
-	if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			per_cpu(centrino_model, cpu)->op_points,
-			target_freq,
-			relation,
-			&newstate))) {
-		retval = -EINVAL;
-		goto out;
-	}
-
-	first_cpu = 1;
-	for_each_cpu(j, policy->cpus) {
-		int good_cpu;
-
-		/* cpufreq holds the hotplug lock, so we are safe here */
-		if (!cpu_online(j))
-			continue;
-
-		/*
-		 * Support for SMP systems.
-		 * Make sure we are running on CPU that wants to change freq
-		 */
-		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			good_cpu = cpumask_any_and(policy->cpus,
-						   cpu_online_mask);
-		else
-			good_cpu = j;
-
-		if (good_cpu >= nr_cpu_ids) {
-			dprintk("couldn't limit to CPUs in this domain\n");
-			retval = -EAGAIN;
-			if (first_cpu) {
-				/* We haven't started the transition yet. */
-				goto out;
-			}
-			break;
-		}
-
-		msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
-
-		if (first_cpu) {
-			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
-			if (msr == (oldmsr & 0xffff)) {
-				dprintk("no change needed - msr was and needs "
-					"to be %x\n", oldmsr);
-				retval = 0;
-				goto out;
-			}
-
-			freqs.old = extract_clock(oldmsr, cpu, 0);
-			freqs.new = extract_clock(msr, cpu, 0);
-
-			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
-				target_freq, freqs.old, freqs.new, msr);
-
-			for_each_cpu(k, policy->cpus) {
-				if (!cpu_online(k))
-					continue;
-				freqs.cpu = k;
-				cpufreq_notify_transition(&freqs,
-					CPUFREQ_PRECHANGE);
-			}
-
-			first_cpu = 0;
-			/* all but 16 LSB are reserved, treat them with care */
-			oldmsr &= ~0xffff;
-			msr &= 0xffff;
-			oldmsr |= msr;
-		}
-
-		wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
-		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			break;
-
-		cpumask_set_cpu(j, covered_cpus);
-	}
-
-	for_each_cpu(k, policy->cpus) {
-		if (!cpu_online(k))
-			continue;
-		freqs.cpu = k;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	if (unlikely(retval)) {
-		/*
-		 * We have failed halfway through the frequency change.
-		 * We have sent callbacks to policy->cpus and
-		 * MSRs have already been written on coverd_cpus.
-		 * Best effort undo..
-		 */
-
-		for_each_cpu(j, covered_cpus)
-			wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
-
-		tmp = freqs.new;
-		freqs.new = freqs.old;
-		freqs.old = tmp;
-		for_each_cpu(j, policy->cpus) {
-			if (!cpu_online(j))
-				continue;
-			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-		}
-	}
-	retval = 0;
-
-out:
-	free_cpumask_var(covered_cpus);
-	return retval;
-}
-
-static struct freq_attr* centrino_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver centrino_driver = {
-	.name		= "centrino", /* should be speedstep-centrino,
-					 but there's a 16 char limit */
-	.init		= centrino_cpu_init,
-	.exit		= centrino_cpu_exit,
-	.verify		= centrino_verify,
-	.target		= centrino_target,
-	.get		= get_cur_freq,
-	.attr           = centrino_attr,
-	.owner		= THIS_MODULE,
-};
-
-
-/**
- * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
- *
- * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
- * unsupported devices, -ENOENT if there's no voltage table for this
- * particular CPU model, -EINVAL on problems during initiatization,
- * and zero on success.
- *
- * This is quite picky.  Not only does the CPU have to advertise the
- * "est" flag in the cpuid capability flags, we look for a specific
- * CPU model and stepping, and we need to have the exact model name in
- * our voltage tables.  That is, be paranoid about not releasing
- * someone's valuable magic smoke.
- */
-static int __init centrino_init(void)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(0);
-
-	if (!cpu_has(cpu, X86_FEATURE_EST))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&centrino_driver);
-}
-
-static void __exit centrino_exit(void)
-{
-	cpufreq_unregister_driver(&centrino_driver);
-}
-
-MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
-MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
-MODULE_LICENSE ("GPL");
-
-late_initcall(centrino_init);
-module_exit(centrino_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
deleted file mode 100644
index 561758e9518..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * (C) 2001  Dave Jones, Arjan van de ven.
- * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon reverse engineered information, and on Intel documentation
- *  for chipsets ICH2-M and ICH3-M.
- *
- *  Many thanks to Ducrot Bruno for finding and fixing the last
- *  "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
- *  for extensive testing.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-/*********************************************************************
- *                        SPEEDSTEP - DEFINITIONS                    *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-
-#include "speedstep-lib.h"
-
-
-/* speedstep_chipset:
- *   It is necessary to know which chipset is used. As accesses to
- * this device occur at various places in this module, we need a
- * static struct pci_dev * pointing to that device.
- */
-static struct pci_dev *speedstep_chipset_dev;
-
-
-/* speedstep_processor
- */
-static enum speedstep_processor speedstep_processor;
-
-static u32 pmbase;
-
-/*
- *   There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
-	{SPEEDSTEP_HIGH,	0},
-	{SPEEDSTEP_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-ich", msg)
-
-
-/**
- * speedstep_find_register - read the PMBASE address
- *
- * Returns: -ENODEV if no register could be found
- */
-static int speedstep_find_register(void)
-{
-	if (!speedstep_chipset_dev)
-		return -ENODEV;
-
-	/* get PMBASE */
-	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
-	if (!(pmbase & 0x01)) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
-		return -ENODEV;
-	}
-
-	pmbase &= 0xFFFFFFFE;
-	if (!pmbase) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
-		return -ENODEV;
-	}
-
-	dprintk("pmbase is 0x%x\n", pmbase);
-	return 0;
-}
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- *   Tries to change the SpeedStep state.  Can be called from
- *   smp_call_function_single.
- */
-static void speedstep_set_state(unsigned int state)
-{
-	u8 pm2_blk;
-	u8 value;
-	unsigned long flags;
-
-	if (state > 0x1)
-		return;
-
-	/* Disable IRQs */
-	local_irq_save(flags);
-
-	/* read state */
-	value = inb(pmbase + 0x50);
-
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
-	/* write new state */
-	value &= 0xFE;
-	value |= state;
-
-	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
-
-	/* Disable bus master arbitration */
-	pm2_blk = inb(pmbase + 0x20);
-	pm2_blk |= 0x01;
-	outb(pm2_blk, (pmbase + 0x20));
-
-	/* Actual transition */
-	outb(value, (pmbase + 0x50));
-
-	/* Restore bus master arbitration */
-	pm2_blk &= 0xfe;
-	outb(pm2_blk, (pmbase + 0x20));
-
-	/* check if transition was successful */
-	value = inb(pmbase + 0x50);
-
-	/* Enable IRQs */
-	local_irq_restore(flags);
-
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
-	if (state == (value & 0x1))
-		dprintk("change to %u MHz succeeded\n",
-			speedstep_get_frequency(speedstep_processor) / 1000);
-	else
-		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
-
-	return;
-}
-
-/* Wrapper for smp_call_function_single. */
-static void _speedstep_set_state(void *_state)
-{
-	speedstep_set_state(*(unsigned int *)_state);
-}
-
-/**
- * speedstep_activate - activate SpeedStep control in the chipset
- *
- *   Tries to activate the SpeedStep status and control registers.
- * Returns -EINVAL on an unsupported chipset, and zero on success.
- */
-static int speedstep_activate(void)
-{
-	u16 value = 0;
-
-	if (!speedstep_chipset_dev)
-		return -EINVAL;
-
-	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
-	if (!(value & 0x08)) {
-		value |= 0x08;
-		dprintk("activating SpeedStep (TM) registers\n");
-		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
-	}
-
-	return 0;
-}
-
-
-/**
- * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
- *
- *   Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
- * the LPC bridge / PM module which contains all power-management
- * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
- * chipset, or zero on failure.
- */
-static unsigned int speedstep_detect_chipset(void)
-{
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801DB_12,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev)
-		return 4; /* 4-M */
-
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801CA_12,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev)
-		return 3; /* 3-M */
-
-
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801BA_10,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev) {
-		/* speedstep.c causes lockups on Dell Inspirons 8000 and
-		 * 8100 which use a pretty old revision of the 82815
-		 * host brige. Abort on these systems.
-		 */
-		static struct pci_dev *hostbridge;
-
-		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82815_MC,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-
-		if (!hostbridge)
-			return 2; /* 2-M */
-
-		if (hostbridge->revision < 5) {
-			dprintk("hostbridge does not support speedstep\n");
-			speedstep_chipset_dev = NULL;
-			pci_dev_put(hostbridge);
-			return 0;
-		}
-
-		pci_dev_put(hostbridge);
-		return 2; /* 2-M */
-	}
-
-	return 0;
-}
-
-static void get_freq_data(void *_speed)
-{
-	unsigned int *speed = _speed;
-
-	*speed = speedstep_get_frequency(speedstep_processor);
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
-	unsigned int speed;
-
-	/* You're supposed to ensure CPU is online. */
-	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
-		BUG();
-
-	dprintk("detected %u kHz as current frequency\n", speed);
-	return speed;
-}
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int speedstep_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	unsigned int newstate = 0, policy_cpu;
-	struct cpufreq_freqs freqs;
-	int i;
-
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
-	freqs.old = speedstep_get(policy_cpu);
-	freqs.new = speedstep_freqs[newstate].frequency;
-	freqs.cpu = policy->cpu;
-
-	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
-
-	/* no transition necessary */
-	if (freqs.old == freqs.new)
-		return 0;
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
-				 true);
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-struct get_freqs {
-	struct cpufreq_policy *policy;
-	int ret;
-};
-
-static void get_freqs_on_cpu(void *_get_freqs)
-{
-	struct get_freqs *get_freqs = _get_freqs;
-
-	get_freqs->ret =
-		speedstep_get_freqs(speedstep_processor,
-			    &speedstep_freqs[SPEEDSTEP_LOW].frequency,
-			    &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
-			    &get_freqs->policy->cpuinfo.transition_latency,
-			    &speedstep_set_state);
-}
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
-	int result;
-	unsigned int policy_cpu, speed;
-	struct get_freqs gf;
-
-	/* only run on CPU to be set, or on its sibling */
-#ifdef CONFIG_SMP
-	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
-#endif
-	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
-
-	/* detect low and high frequency and transition latency */
-	gf.policy = policy;
-	smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
-	if (gf.ret)
-		return gf.ret;
-
-	/* get current speed setting */
-	speed = speedstep_get(policy_cpu);
-	if (!speed)
-		return -EIO;
-
-	dprintk("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
-		? "low" : "high",
-		(speed / 1000));
-
-	/* cpuinfo and default policy values */
-	policy->cur = speed;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
-	return 0;
-}
-
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static struct freq_attr *speedstep_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver speedstep_driver = {
-	.name	= "speedstep-ich",
-	.verify	= speedstep_verify,
-	.target	= speedstep_target,
-	.init	= speedstep_cpu_init,
-	.exit	= speedstep_cpu_exit,
-	.get	= speedstep_get,
-	.owner	= THIS_MODULE,
-	.attr	= speedstep_attr,
-};
-
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
-	/* detect processor */
-	speedstep_processor = speedstep_detect_processor();
-	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor "
-				"not found\n");
-		return -ENODEV;
-	}
-
-	/* detect chipset */
-	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
-				"(yet) available.\n");
-		return -ENODEV;
-	}
-
-	/* activate speedstep support */
-	if (speedstep_activate()) {
-		pci_dev_put(speedstep_chipset_dev);
-		return -EINVAL;
-	}
-
-	if (speedstep_find_register())
-		return -ENODEV;
-
-	return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- *   Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
-	pci_dev_put(speedstep_chipset_dev);
-	cpufreq_unregister_driver(&speedstep_driver);
-}
-
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
-		"with ICH-M southbridges.");
-MODULE_LICENSE("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
deleted file mode 100644
index a94ec6be69f..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <asm/tsc.h>
-#include "speedstep-lib.h"
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-lib", msg)
-
-#define PFX "speedstep-lib: "
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-static int relaxed_check;
-#else
-#define relaxed_check 0
-#endif
-
-/*********************************************************************
- *                   GET PROCESSOR CORE SPEED IN KHZ                 *
- *********************************************************************/
-
-static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
-{
-	/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
-	struct {
-		unsigned int ratio;	/* Frequency Multiplier (x10) */
-		u8 bitmap;		/* power on configuration bits
-					[27, 25:22] (in MSR 0x2a) */
-	} msr_decode_mult[] = {
-		{ 30, 0x01 },
-		{ 35, 0x05 },
-		{ 40, 0x02 },
-		{ 45, 0x06 },
-		{ 50, 0x00 },
-		{ 55, 0x04 },
-		{ 60, 0x0b },
-		{ 65, 0x0f },
-		{ 70, 0x09 },
-		{ 75, 0x0d },
-		{ 80, 0x0a },
-		{ 85, 0x26 },
-		{ 90, 0x20 },
-		{ 100, 0x2b },
-		{ 0, 0xff }	/* error or unknown value */
-	};
-
-	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
-	struct {
-		unsigned int value;	/* Front Side Bus speed in MHz */
-		u8 bitmap;		/* power on configuration bits [18: 19]
-					(in MSR 0x2a) */
-	} msr_decode_fsb[] = {
-		{  66, 0x0 },
-		{ 100, 0x2 },
-		{ 133, 0x1 },
-		{   0, 0xff}
-	};
-
-	u32 msr_lo, msr_tmp;
-	int i = 0, j = 0;
-
-	/* read MSR 0x2a - we only need the low 32 bits */
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
-	msr_tmp = msr_lo;
-
-	/* decode the FSB */
-	msr_tmp &= 0x00c0000;
-	msr_tmp >>= 18;
-	while (msr_tmp != msr_decode_fsb[i].bitmap) {
-		if (msr_decode_fsb[i].bitmap == 0xff)
-			return 0;
-		i++;
-	}
-
-	/* decode the multiplier */
-	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		dprintk("workaround for early PIIIs\n");
-		msr_lo &= 0x03c00000;
-	} else
-		msr_lo &= 0x0bc00000;
-	msr_lo >>= 22;
-	while (msr_lo != msr_decode_mult[j].bitmap) {
-		if (msr_decode_mult[j].bitmap == 0xff)
-			return 0;
-		j++;
-	}
-
-	dprintk("speed is %u\n",
-		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
-
-	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
-}
-
-
-static unsigned int pentiumM_get_frequency(void)
-{
-	u32 msr_lo, msr_tmp;
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
-
-	/* see table B-2 of 24547212.pdf */
-	if (msr_lo & 0x00040000) {
-		printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
-				msr_lo, msr_tmp);
-		return 0;
-	}
-
-	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
-			msr_tmp, (msr_tmp * 100 * 1000));
-
-	return msr_tmp * 100 * 1000;
-}
-
-static unsigned int pentium_core_get_frequency(void)
-{
-	u32 fsb = 0;
-	u32 msr_lo, msr_tmp;
-	int ret;
-
-	rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
-	/* see table B-2 of 25366920.pdf */
-	switch (msr_lo & 0x07) {
-	case 5:
-		fsb = 100000;
-		break;
-	case 1:
-		fsb = 133333;
-		break;
-	case 3:
-		fsb = 166667;
-		break;
-	case 2:
-		fsb = 200000;
-		break;
-	case 0:
-		fsb = 266667;
-		break;
-	case 4:
-		fsb = 333333;
-		break;
-	default:
-		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
-	}
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
-			msr_lo, msr_tmp);
-
-	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
-			msr_tmp, (msr_tmp * fsb));
-
-	ret = (msr_tmp * fsb);
-	return ret;
-}
-
-
-static unsigned int pentium4_get_frequency(void)
-{
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-	u32 msr_lo, msr_hi, mult;
-	unsigned int fsb = 0;
-	unsigned int ret;
-	u8 fsb_code;
-
-	/* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
-	 * to System Bus Frequency Ratio Field in the Processor Frequency
-	 * Configuration Register of the MSR. Therefore the current
-	 * frequency cannot be calculated and has to be measured.
-	 */
-	if (c->x86_model < 2)
-		return cpu_khz;
-
-	rdmsr(0x2c, msr_lo, msr_hi);
-
-	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
-
-	/* decode the FSB: see IA-32 Intel (C) Architecture Software
-	 * Developer's Manual, Volume 3: System Prgramming Guide,
-	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
-	 * Intel Xeon Processors, on page B-4 and B-5.
-	 */
-	fsb_code = (msr_lo >> 16) & 0x7;
-	switch (fsb_code) {
-	case 0:
-		fsb = 100 * 1000;
-		break;
-	case 1:
-		fsb = 13333 * 10;
-		break;
-	case 2:
-		fsb = 200 * 1000;
-		break;
-	}
-
-	if (!fsb)
-		printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
-				"Please send an e-mail to <linux@brodo.de>\n");
-
-	/* Multiplier. */
-	mult = msr_lo >> 24;
-
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
-			fsb, mult, (fsb * mult));
-
-	ret = (fsb * mult);
-	return ret;
-}
-
-
-/* Warning: may get called from smp_call_function_single. */
-unsigned int speedstep_get_frequency(enum speedstep_processor processor)
-{
-	switch (processor) {
-	case SPEEDSTEP_CPU_PCORE:
-		return pentium_core_get_frequency();
-	case SPEEDSTEP_CPU_PM:
-		return pentiumM_get_frequency();
-	case SPEEDSTEP_CPU_P4D:
-	case SPEEDSTEP_CPU_P4M:
-		return pentium4_get_frequency();
-	case SPEEDSTEP_CPU_PIII_T:
-	case SPEEDSTEP_CPU_PIII_C:
-	case SPEEDSTEP_CPU_PIII_C_EARLY:
-		return pentium3_get_frequency(processor);
-	default:
-		return 0;
-	};
-	return 0;
-}
-EXPORT_SYMBOL_GPL(speedstep_get_frequency);
-
-
-/*********************************************************************
- *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
- *********************************************************************/
-
-unsigned int speedstep_detect_processor(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	u32 ebx, msr_lo, msr_hi;
-
-	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
-
-	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
-	    ((c->x86 != 6) && (c->x86 != 0xF)))
-		return 0;
-
-	if (c->x86 == 0xF) {
-		/* Intel Mobile Pentium 4-M
-		 * or Intel Mobile Pentium 4 with 533 MHz FSB */
-		if (c->x86_model != 2)
-			return 0;
-
-		ebx = cpuid_ebx(0x00000001);
-		ebx &= 0x000000FF;
-
-		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
-
-		switch (c->x86_mask) {
-		case 4:
-			/*
-			 * B-stepping [M-P4-M]
-			 * sample has ebx = 0x0f, production has 0x0e.
-			 */
-			if ((ebx == 0x0e) || (ebx == 0x0f))
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		case 7:
-			/*
-			 * C-stepping [M-P4-M]
-			 * needs to have ebx=0x0e, else it's a celeron:
-			 * cf. 25130917.pdf / page 7, footnote 5 even
-			 * though 25072120.pdf / page 7 doesn't say
-			 * samples are only of B-stepping...
-			 */
-			if (ebx == 0x0e)
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		case 9:
-			/*
-			 * D-stepping [M-P4-M or M-P4/533]
-			 *
-			 * this is totally strange: CPUID 0x0F29 is
-			 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
-			 * The latter need to be sorted out as they don't
-			 * support speedstep.
-			 * Celerons with CPUID 0x0F29 may have either
-			 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
-			 * specific.
-			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
-			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
-			 * also, M-P4M HTs have ebx=0x8, too
-			 * For now, they are distinguished by the model_id
-			 * string
-			 */
-			if ((ebx == 0x0e) ||
-				(strstr(c->x86_model_id,
-				    "Mobile Intel(R) Pentium(R) 4") != NULL))
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		default:
-			break;
-		}
-		return 0;
-	}
-
-	switch (c->x86_model) {
-	case 0x0B: /* Intel PIII [Tualatin] */
-		/* cpuid_ebx(1) is 0x04 for desktop PIII,
-		 * 0x06 for mobile PIII-M */
-		ebx = cpuid_ebx(0x00000001);
-		dprintk("ebx is %x\n", ebx);
-
-		ebx &= 0x000000FF;
-
-		if (ebx != 0x06)
-			return 0;
-
-		/* So far all PIII-M processors support SpeedStep. See
-		 * Intel's 24540640.pdf of June 2003
-		 */
-		return SPEEDSTEP_CPU_PIII_T;
-
-	case 0x08: /* Intel PIII [Coppermine] */
-
-		/* all mobile PIII Coppermines have FSB 100 MHz
-		 * ==> sort out a few desktop PIIIs. */
-		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
-				msr_lo, msr_hi);
-		msr_lo &= 0x00c0000;
-		if (msr_lo != 0x0080000)
-			return 0;
-
-		/*
-		 * If the processor is a mobile version,
-		 * platform ID has bit 50 set
-		 * it has SpeedStep technology if either
-		 * bit 56 or 57 is set
-		 */
-		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
-				msr_lo, msr_hi);
-		if ((msr_hi & (1<<18)) &&
-		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
-			if (c->x86_mask == 0x01) {
-				dprintk("early PIII version\n");
-				return SPEEDSTEP_CPU_PIII_C_EARLY;
-			} else
-				return SPEEDSTEP_CPU_PIII_C;
-		}
-
-	default:
-		return 0;
-	}
-}
-EXPORT_SYMBOL_GPL(speedstep_detect_processor);
-
-
-/*********************************************************************
- *                     DETECT SPEEDSTEP SPEEDS                       *
- *********************************************************************/
-
-unsigned int speedstep_get_freqs(enum speedstep_processor processor,
-				  unsigned int *low_speed,
-				  unsigned int *high_speed,
-				  unsigned int *transition_latency,
-				  void (*set_state) (unsigned int state))
-{
-	unsigned int prev_speed;
-	unsigned int ret = 0;
-	unsigned long flags;
-	struct timeval tv1, tv2;
-
-	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
-		return -EINVAL;
-
-	dprintk("trying to determine both speeds\n");
-
-	/* get current speed */
-	prev_speed = speedstep_get_frequency(processor);
-	if (!prev_speed)
-		return -EIO;
-
-	dprintk("previous speed is %u\n", prev_speed);
-
-	local_irq_save(flags);
-
-	/* switch to low state */
-	set_state(SPEEDSTEP_LOW);
-	*low_speed = speedstep_get_frequency(processor);
-	if (!*low_speed) {
-		ret = -EIO;
-		goto out;
-	}
-
-	dprintk("low speed is %u\n", *low_speed);
-
-	/* start latency measurement */
-	if (transition_latency)
-		do_gettimeofday(&tv1);
-
-	/* switch to high state */
-	set_state(SPEEDSTEP_HIGH);
-
-	/* end latency measurement */
-	if (transition_latency)
-		do_gettimeofday(&tv2);
-
-	*high_speed = speedstep_get_frequency(processor);
-	if (!*high_speed) {
-		ret = -EIO;
-		goto out;
-	}
-
-	dprintk("high speed is %u\n", *high_speed);
-
-	if (*low_speed == *high_speed) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	/* switch to previous state, if necessary */
-	if (*high_speed != prev_speed)
-		set_state(SPEEDSTEP_LOW);
-
-	if (transition_latency) {
-		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
-			tv2.tv_usec - tv1.tv_usec;
-		dprintk("transition latency is %u uSec\n", *transition_latency);
-
-		/* convert uSec to nSec and add 20% for safety reasons */
-		*transition_latency *= 1200;
-
-		/* check if the latency measurement is too high or too low
-		 * and set it to a safe value (500uSec) in that case
-		 */
-		if (*transition_latency > 10000000 ||
-		    *transition_latency < 50000) {
-			printk(KERN_WARNING PFX "frequency transition "
-					"measured seems out of range (%u "
-					"nSec), falling back to a safe one of"
-					"%u nSec.\n",
-					*transition_latency, 500000);
-			*transition_latency = 500000;
-		}
-	}
-
-out:
-	local_irq_restore(flags);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(speedstep_get_freqs);
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-module_param(relaxed_check, int, 0444);
-MODULE_PARM_DESC(relaxed_check,
-		"Don't do all checks for speedstep capability.");
-#endif
-
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
deleted file mode 100644
index 70d9cea1219..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-
-/* processors */
-enum speedstep_processor {
-	SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001,  /* Coppermine core */
-	SPEEDSTEP_CPU_PIII_C	   = 0x00000002,  /* Coppermine core */
-	SPEEDSTEP_CPU_PIII_T	   = 0x00000003,  /* Tualatin core */
-	SPEEDSTEP_CPU_P4M	   = 0x00000004,  /* P4-M  */
-/* the following processors are not speedstep-capable and are not auto-detected
- * in speedstep_detect_processor(). However, their speed can be detected using
- * the speedstep_get_frequency() call. */
-	SPEEDSTEP_CPU_PM	   = 0xFFFFFF03,  /* Pentium M  */
-	SPEEDSTEP_CPU_P4D	   = 0xFFFFFF04,  /* desktop P4  */
-	SPEEDSTEP_CPU_PCORE	   = 0xFFFFFF05,  /* Core */
-};
-
-/* speedstep states -- only two of them */
-
-#define SPEEDSTEP_HIGH	0x00000000
-#define SPEEDSTEP_LOW	0x00000001
-
-
-/* detect a speedstep-capable processor */
-extern enum speedstep_processor speedstep_detect_processor(void);
-
-/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
-
-
-/* detect the low and high speeds of the processor. The callback
- * set_state"'s first argument is either SPEEDSTEP_HIGH or
- * SPEEDSTEP_LOW; the second argument is zero so that no
- * cpufreq_notify_transition calls are initiated.
- */
-extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
-	unsigned int *low_speed,
-	unsigned int *high_speed,
-	unsigned int *transition_latency,
-	void (*set_state) (unsigned int state));
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
deleted file mode 100644
index 91bc25b67bc..00000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Intel SpeedStep SMI driver.
- *
- * (C) 2003  Hiroshi Miura <miura@da-cha.org>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- */
-
-
-/*********************************************************************
- *                        SPEEDSTEP - DEFINITIONS                    *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <asm/ist.h>
-
-#include "speedstep-lib.h"
-
-/* speedstep system management interface port/command.
- *
- * These parameters are got from IST-SMI BIOS call.
- * If user gives it, these are used.
- *
- */
-static int smi_port;
-static int smi_cmd;
-static unsigned int smi_sig;
-
-/* info about the processor */
-static enum speedstep_processor speedstep_processor;
-
-/*
- * There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
-	{SPEEDSTEP_HIGH,	0},
-	{SPEEDSTEP_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-#define GET_SPEEDSTEP_OWNER 0
-#define GET_SPEEDSTEP_STATE 1
-#define SET_SPEEDSTEP_STATE 2
-#define GET_SPEEDSTEP_FREQS 4
-
-/* how often shall the SMI call be tried if it failed, e.g. because
- * of DMA activity going on? */
-#define SMI_TRIES 5
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-smi", msg)
-
-/**
- * speedstep_smi_ownership
- */
-static int speedstep_smi_ownership(void)
-{
-	u32 command, result, magic, dummy;
-	u32 function = GET_SPEEDSTEP_OWNER;
-	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-	magic = virt_to_phys(magic_data);
-
-	dprintk("trying to obtain ownership with command %x at port %x\n",
-			command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp\n"
-		: "=D" (result),
-		  "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
-		  "=S" (dummy)
-		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
-		  "D" (0), "S" (magic)
-		: "memory"
-	);
-
-	dprintk("result is %x\n", result);
-
-	return result;
-}
-
-/**
- * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
- * @low: the low frequency value is placed here
- * @high: the high frequency value is placed here
- *
- * Only available on later SpeedStep-enabled systems, returns false results or
- * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
- * shows that the latter occurs if !(ist_info.event & 0xFFFF).
- */
-static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
-{
-	u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
-	u32 state = 0;
-	u32 function = GET_SPEEDSTEP_FREQS;
-
-	if (!(ist_info.event & 0xFFFF)) {
-		dprintk("bug #1422 -- can't read freqs from BIOS\n");
-		return -ENODEV;
-	}
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	dprintk("trying to determine frequencies with command %x at port %x\n",
-			command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp"
-		: "=a" (result),
-		  "=b" (high_mhz),
-		  "=c" (low_mhz),
-		  "=d" (state), "=D" (edi), "=S" (dummy)
-		: "a" (command),
-		  "b" (function),
-		  "c" (state),
-		  "d" (smi_port), "S" (0), "D" (0)
-	);
-
-	dprintk("result %x, low_freq %u, high_freq %u\n",
-			result, low_mhz, high_mhz);
-
-	/* abort if results are obviously incorrect... */
-	if ((high_mhz + low_mhz) < 600)
-		return -EINVAL;
-
-	*high = high_mhz * 1000;
-	*low  = low_mhz  * 1000;
-
-	return result;
-}
-
-/**
- * speedstep_get_state - set the SpeedStep state
- * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static int speedstep_get_state(void)
-{
-	u32 function = GET_SPEEDSTEP_STATE;
-	u32 result, state, edi, command, dummy;
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	dprintk("trying to determine current setting with command %x "
-		"at port %x\n", command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp\n"
-		: "=a" (result),
-		  "=b" (state), "=D" (edi),
-		  "=c" (dummy), "=d" (dummy), "=S" (dummy)
-		: "a" (command), "b" (function), "c" (0),
-		  "d" (smi_port), "S" (0), "D" (0)
-	);
-
-	dprintk("state is %x, result is %x\n", state, result);
-
-	return state & 1;
-}
-
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static void speedstep_set_state(unsigned int state)
-{
-	unsigned int result = 0, command, new_state, dummy;
-	unsigned long flags;
-	unsigned int function = SET_SPEEDSTEP_STATE;
-	unsigned int retry = 0;
-
-	if (state > 0x1)
-		return;
-
-	/* Disable IRQs */
-	local_irq_save(flags);
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	dprintk("trying to set frequency to state %u "
-		"with command %x at port %x\n",
-		state, command, smi_port);
-
-	do {
-		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n",
-					retry, result);
-			mdelay(retry * 50);
-		}
-		retry++;
-		__asm__ __volatile__(
-			"push %%ebp\n"
-			"out %%al, (%%dx)\n"
-			"pop %%ebp"
-			: "=b" (new_state), "=D" (result),
-			  "=c" (dummy), "=a" (dummy),
-			  "=d" (dummy), "=S" (dummy)
-			: "a" (command), "b" (function), "c" (state),
-			  "d" (smi_port), "S" (0), "D" (0)
-			);
-	} while ((new_state != state) && (retry <= SMI_TRIES));
-
-	/* enable IRQs */
-	local_irq_restore(flags);
-
-	if (new_state == state)
-		dprintk("change to %u MHz succeeded after %u tries "
-			"with result %u\n",
-			(speedstep_freqs[new_state].frequency / 1000),
-			retry, result);
-	else
-		printk(KERN_ERR "cpufreq: change to state %u "
-			"failed with new_state %u and result %u\n",
-			state, new_state, result);
-
-	return;
-}
-
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: new freq
- * @relation:
- *
- * Sets a new CPUFreq policy/freq.
- */
-static int speedstep_target(struct cpufreq_policy *policy,
-			unsigned int target_freq, unsigned int relation)
-{
-	unsigned int newstate = 0;
-	struct cpufreq_freqs freqs;
-
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
-	freqs.new = speedstep_freqs[newstate].frequency;
-	freqs.cpu = 0; /* speedstep.c is UP only driver */
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	speedstep_set_state(newstate);
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
-	int result;
-	unsigned int speed, state;
-	unsigned int *low, *high;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	result = speedstep_smi_ownership();
-	if (result) {
-		dprintk("fails in acquiring ownership of a SMI interface.\n");
-		return -EINVAL;
-	}
-
-	/* detect low and high frequency */
-	low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
-	high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
-
-	result = speedstep_smi_get_freqs(low, high);
-	if (result) {
-		/* fall back to speedstep_lib.c dection mechanism:
-		 * try both states out */
-		dprintk("could not detect low and high frequencies "
-				"by SMI call.\n");
-		result = speedstep_get_freqs(speedstep_processor,
-				low, high,
-				NULL,
-				&speedstep_set_state);
-
-		if (result) {
-			dprintk("could not detect two different speeds"
-					" -- aborting.\n");
-			return result;
-		} else
-			dprintk("workaround worked.\n");
-	}
-
-	/* get current speed setting */
-	state = speedstep_get_state();
-	speed = speedstep_freqs[state].frequency;
-
-	dprintk("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
-		? "low" : "high",
-		(speed / 1000));
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = speed;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
-	return 0;
-}
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
-	if (cpu)
-		return -ENODEV;
-	return speedstep_get_frequency(speedstep_processor);
-}
-
-
-static int speedstep_resume(struct cpufreq_policy *policy)
-{
-	int result = speedstep_smi_ownership();
-
-	if (result)
-		dprintk("fails in re-acquiring ownership of a SMI interface.\n");
-
-	return result;
-}
-
-static struct freq_attr *speedstep_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver speedstep_driver = {
-	.name		= "speedstep-smi",
-	.verify		= speedstep_verify,
-	.target		= speedstep_target,
-	.init		= speedstep_cpu_init,
-	.exit		= speedstep_cpu_exit,
-	.get		= speedstep_get,
-	.resume		= speedstep_resume,
-	.owner		= THIS_MODULE,
-	.attr		= speedstep_attr,
-};
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * BIOS, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
-	speedstep_processor = speedstep_detect_processor();
-
-	switch (speedstep_processor) {
-	case SPEEDSTEP_CPU_PIII_T:
-	case SPEEDSTEP_CPU_PIII_C:
-	case SPEEDSTEP_CPU_PIII_C_EARLY:
-		break;
-	default:
-		speedstep_processor = 0;
-	}
-
-	if (!speedstep_processor) {
-		dprintk("No supported Intel CPU detected.\n");
-		return -ENODEV;
-	}
-
-	dprintk("signature:0x%.8lx, command:0x%.8lx, "
-		"event:0x%.8lx, perf_level:0x%.8lx.\n",
-		ist_info.signature, ist_info.command,
-		ist_info.event, ist_info.perf_level);
-
-	/* Error if no IST-SMI BIOS or no PARM
-		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
-	if ((ist_info.signature !=  0x47534943) && (
-	    (smi_port == 0) || (smi_cmd == 0)))
-		return -ENODEV;
-
-	if (smi_sig == 1)
-		smi_sig = 0x47534943;
-	else
-		smi_sig = ist_info.signature;
-
-	/* setup smi_port from MODLULE_PARM or BIOS */
-	if ((smi_port > 0xff) || (smi_port < 0))
-		return -EINVAL;
-	else if (smi_port == 0)
-		smi_port = ist_info.command & 0xff;
-
-	if ((smi_cmd > 0xff) || (smi_cmd < 0))
-		return -EINVAL;
-	else if (smi_cmd == 0)
-		smi_cmd = (ist_info.command >> 16) & 0xff;
-
-	return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- *   Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
-	cpufreq_unregister_driver(&speedstep_driver);
-}
-
-module_param(smi_port, int, 0444);
-module_param(smi_cmd,  int, 0444);
-module_param(smi_sig, uint, 0444);
-
-MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
-		"-- Intel's default setting is 0xb2");
-MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
-		"-- Intel's default setting is 0x82");
-MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
-		"SMI interface.");
-
-MODULE_AUTHOR("Hiroshi Miura");
-MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
-MODULE_LICENSE("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859..1edf5ba4fb2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,10 +29,10 @@
 
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
+	u64 misc_enable;
+
 	/* Unmask CPUID levels if masked: */
 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	 * (model 2) with the same problem.
 	 */
 	if (c->x86 == 15) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		}
 	}
 #endif
+
+	/*
+	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
+	 * clear the fast string and enhanced fast string CPU capabilities.
+	 */
+	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
+			printk(KERN_INFO "Disabled fast string operations\n");
+			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
+			setup_clear_cpu_cap(X86_FEATURE_ERMS);
+		}
+	}
 }
 
 #ifdef CONFIG_X86_32
@@ -400,12 +411,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 		switch (c->x86_model) {
 		case 5:
-			if (c->x86_mask == 0) {
-				if (l2 == 0)
-					p = "Celeron (Covington)";
-				else if (l2 == 256)
-					p = "Mobile Pentium II (Dixon)";
-			}
+			if (l2 == 0)
+				p = "Celeron (Covington)";
+			else if (l2 == 256)
+				p = "Mobile Pentium II (Dixon)";
 			break;
 
 		case 6:
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1ce1af2899d..c105c533ed9 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
-	l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
@@ -454,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 {
 	int ret = 0;
 
-#define SUBCACHE_MASK	(3UL << 20)
-#define SUBCACHE_INDEX	0xfff
-
-	/*
-	 * check whether this slot is already used or
-	 * the index is already disabled
-	 */
+	/*  check if @slot is already used or the index is already disabled */
 	ret = amd_get_l3_disable_slot(l3, slot);
 	if (ret >= 0)
 		return -EINVAL;
 
-	/*
-	 * check whether the other slot has disabled the
-	 * same index already
-	 */
-	if (index == amd_get_l3_disable_slot(l3, !slot))
+	if (index > l3->indices)
 		return -EINVAL;
 
-	/* do not allow writes outside of allowed bits */
-	if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
-	    ((index & SUBCACHE_INDEX) > l3->indices))
+	/* check whether the other slot has disabled the same index already */
+	if (index == amd_get_l3_disable_slot(l3, !slot))
 		return -EINVAL;
 
 	amd_l3_disable_index(l3, cpu, slot, index);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3385ea26f68..ff1ae9b6464 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,20 +105,6 @@ static int			cpu_missing;
 ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
 EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
 
-static int default_decode_mce(struct notifier_block *nb, unsigned long val,
-			       void *data)
-{
-	pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
-	pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
-
-	return NOTIFY_STOP;
-}
-
-static struct notifier_block mce_dec_nb = {
-	.notifier_call = default_decode_mce,
-	.priority      = -1,
-};
-
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce)
 
 static void print_mce(struct mce *m)
 {
+	int ret = 0;
+
 	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
 
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m)
 	 * Print out human-readable details about the MCE error,
 	 * (if the CPU has an implementation for that)
 	 */
-	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	if (ret == NOTIFY_STOP)
+		return;
+
+	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -590,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
 			mce_log(&m);
 			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
-			add_taint(TAINT_MACHINE_CHECK);
 		}
 
 		/*
@@ -1722,8 +1713,6 @@ __setup("mce", mcheck_enable);
 
 int __init mcheck_init(void)
 {
-	atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
-
 	mcheck_intel_therm_init();
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596..bb0adad3514 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
 out_free:
 	if (b) {
 		kobject_put(&b->kobj);
+		list_del(&b->miscj);
 		kfree(b);
 	}
 	return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..27c625178bf 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level)
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-
-		add_taint(TAINT_MACHINE_CHECK);
 		return 1;
 	}
 	if (old_event) {
@@ -355,7 +353,6 @@ static void notify_thresholds(__u64 msr_val)
 static void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
-	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
@@ -367,19 +364,19 @@ static void intel_thermal_interrupt(void)
 				CORE_LEVEL) != 0)
 		mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PLN))
+	if (this_cpu_has(X86_FEATURE_PLN))
 		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					CORE_LEVEL) != 0)
 			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PTS)) {
+	if (this_cpu_has(X86_FEATURE_PTS)) {
 		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
 		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 					THERMAL_THROTTLING_EVENT,
 					PACKAGE_LEVEL) != 0)
 			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
-		if (cpu_has(c, X86_FEATURE_PLN))
+		if (this_cpu_has(X86_FEATURE_PLN))
 			if (therm_throt_process(msr_val &
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
@@ -393,7 +390,6 @@ static void unexpected_thermal_interrupt(void)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
 			smp_processor_id());
-	add_taint(TAINT_MACHINE_CHECK);
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -446,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 
+	h = lvtthmr_init;
 	/*
 	 * The initial value of thermal LVT entries on all APs always reads
 	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
 	 * sequence to them and LVT registers are reset to 0s except for
 	 * the mask bits which are set to 1s when APs receive INIT IPI.
-	 * Always restore the value that BIOS has programmed on AP based on
-	 * BSP's info we saved since BIOS is always setting the same value
-	 * for all threads/cores
+	 * If BIOS takes over the thermal interrupt and sets its interrupt
+	 * delivery mode to SMI (not fixed), it restores the value that the
+	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
+	 * is always setting the same value for all threads/cores.
 	 */
-	apic_write(APIC_LVTTHMR, lvtthmr_init);
+	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+		apic_write(APIC_LVTTHMR, lvtthmr_init);
 
-	h = lvtthmr_init;
 
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 632e5dc9c9c..3a0338b4b17 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
 #include <asm/nmi.h>
 #include <asm/compat.h>
 #include <asm/smp.h>
+#include <asm/alternative.h>
 
 #if 0
 #undef wrmsrl
@@ -363,12 +364,18 @@ again:
 	return new_raw_count;
 }
 
-/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
 static inline int x86_pmu_addr_offset(int index)
 {
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-		return index << 1;
-	return index;
+	int offset;
+
+	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
+	alternative_io(ASM_NOP2,
+		       "shll $1, %%eax",
+		       X86_FEATURE_PERFCTR_CORE,
+		       "=a" (offset),
+		       "a"  (index));
+
+	return offset;
 }
 
 static inline unsigned int x86_pmu_config_addr(int index)
@@ -613,8 +620,8 @@ static int x86_setup_perfctr(struct perf_event *event)
 	/*
 	 * Branch tracing:
 	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
+	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !attr->freq && hwc->sample_period == 1) {
 		/* BTS is not supported by this architecture. */
 		if (!x86_pmu.bts_active)
 			return -EOPNOTSUPP;
@@ -1288,6 +1295,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This generic handler doesn't seem to have any issues where the
+	 * unmasking occurs so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask)) {
 			/*
@@ -1374,8 +1391,6 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 	}
 
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
 	handled = x86_pmu.handle_irq(args->regs);
 	if (!handled)
 		return NOTIFY_DONE;
@@ -1758,17 +1773,6 @@ static struct pmu pmu = {
  * callchain support
  */
 
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	return 0;
@@ -1782,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
 	.stack			= backtrace_stack,
 	.address		= backtrace_address,
 	.walk_stack		= print_context_stack_bp,
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea6..fe29c1d2219 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+  [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]			= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
 };
 
 static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 43fa20b1381..41178c826c4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
 /*
  * Intel PerfMon, used on Core and later.
  */
-static const u64 intel_perfmon_event_map[] =
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
 {
   [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
@@ -36,7 +36,7 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_core2_event_constraints[] =
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_nehalem_extra_regs[] =
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_nehalem_percore_constraints[] =
+static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_westmere_event_constraints[] =
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_snb_event_constraints[] =
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_westmere_extra_regs[] =
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_westmere_percore_constraints[] =
+static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	INTEL_EVENT_CONSTRAINT(0xbb, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_gen_event_constraints[] =
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
 	},
  },
  [ C(LL  ) ] = {
-	/*
-	 * TBD: Need Off-core Response Performance Monitoring support
-	 */
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	/*
 	 * Use RFO, not WRITEBACK, because a write miss would typically occur
 	 * on RFO.
 	 */
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
-		[ C(RESULT_ACCESS) ] = 0x01bb,
-		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
 };
 
 /*
- * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
  */
 
-#define DMND_DATA_RD     (1 << 0)
-#define DMND_RFO         (1 << 1)
-#define DMND_WB          (1 << 3)
-#define PF_DATA_RD       (1 << 4)
-#define PF_DATA_RFO      (1 << 5)
-#define RESP_UNCORE_HIT  (1 << 8)
-#define RESP_MISS        (0xf600) /* non uncore hit */
+#define NHM_DMND_DATA_RD	(1 << 0)
+#define NHM_DMND_RFO		(1 << 1)
+#define NHM_DMND_IFETCH		(1 << 2)
+#define NHM_DMND_WB		(1 << 3)
+#define NHM_PF_DATA_RD		(1 << 4)
+#define NHM_PF_DATA_RFO		(1 << 5)
+#define NHM_PF_IFETCH		(1 << 6)
+#define NHM_OFFCORE_OTHER	(1 << 7)
+#define NHM_UNCORE_HIT		(1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP	(1 << 9)
+#define NHM_OTHER_CORE_HITM	(1 << 10)
+        			/* reserved */
+#define NHM_REMOTE_CACHE_FWD	(1 << 12)
+#define NHM_REMOTE_DRAM		(1 << 13)
+#define NHM_LOCAL_DRAM		(1 << 14)
+#define NHM_NON_DRAM		(1 << 15)
+
+#define NHM_ALL_DRAM		(NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+
+#define NHM_DMND_READ		(NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE		(NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH	(NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT	(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS	(NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS	(NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 {
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
 	},
  }
 };
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This handler doesn't seem to have any issues with the unmasking
+	 * so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int hw_event, bts_event;
 
+	if (event->attr.freq)
+		return NULL;
+
 	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
 	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void)
 	 * AJ106 could possibly be worked around by not allowing LBR
 	 *       usage from PEBS, including the fixup.
 	 * AJ68  could possibly be worked around by always programming
-	 * 	 a pebs_event_reset[0] value and coping with the lost events.
+	 *	 a pebs_event_reset[0] value and coping with the lost events.
 	 *
 	 * But taken together it might just make sense to not enable PEBS on
 	 * these chips.
@@ -1409,6 +1439,23 @@ static __init int intel_pmu_init(void)
 		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+
+		if (ebx & 0x40) {
+			/*
+			 * Erratum AAJ80 detected, we work it around by using
+			 * the BR_MISP_EXEC.ANY event. This will over-count
+			 * branch-misses, but it's still much better than the
+			 * architectural event which is often completely bogus:
+			 */
+			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+
+			pr_cont("erratum AAJ80 worked around, ");
+		}
 		pr_cont("Nehalem events, ");
 		break;
 
@@ -1438,6 +1485,12 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
+
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+
 		pr_cont("Westmere events, ");
 		break;
 
@@ -1449,6 +1502,12 @@ static __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
+
+		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+
 		pr_cont("SandyBridge events, ");
 		break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index d1f77e2934a..ead584fb6a7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_emask	=
-		P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+			P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	[P4_EVENT_X87_ASSIST] = {
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -950,11 +949,20 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 			x86_pmu_stop(event, 0);
 	}
 
-	if (handled) {
-		/* p4 quirk: unmask it again */
-		apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+	if (handled)
 		inc_irq_stat(apic_perf_irqs);
-	}
+
+	/*
+	 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+	 * been observed that the OVF bit flag has to be cleared first _before_
+	 * the LVTPC can be unmasked.
+	 *
+	 * The reason is the NMI line will continue to be asserted while the OVF
+	 * bit is set.  This causes a second NMI to generate if the LVTPC is
+	 * unmasked before the OVF bit is cleared, leading to unknown NMI
+	 * messages.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	return handled;
 }
@@ -1188,7 +1196,7 @@ static __init int p4_pmu_init(void)
 {
 	unsigned int low, high;
 
-	/* If we get stripped -- indexig fails */
+	/* If we get stripped -- indexing fails */
 	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a5..e90f08458e6 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
 
 	set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
 
-	return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+	return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
 }
 
 static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da..1aae78f775f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 }
 EXPORT_SYMBOL_GPL(print_context_stack_bp);
 
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	printk(data);
-	print_symbol(msg, symbol);
-	printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
-	printk("%s%s\n", (char *)data, msg);
-}
-
 static int print_trace_stack(void *data, char *name)
 {
 	printk("%s <%s> ", (char *)data, name);
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops print_trace_ops = {
-	.warning		= print_trace_warning,
-	.warning_symbol		= print_trace_warning_symbol,
 	.stack			= print_trace_stack,
 	.address		= print_trace_address,
 	.walk_stack		= print_context_stack,
@@ -279,7 +263,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index a93742a5746..0ba15a6cc57 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	return mod_code_status;
 }
 
-static unsigned char *ftrace_nop_replace(void)
+static const unsigned char *ftrace_nop_replace(void)
 {
-	return ideal_nop5;
+	return ideal_nops[NOP_ATOMIC5];
 }
 
 static int
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d6d6bb36193..3bb08509a7a 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -23,7 +23,6 @@
 static void __init i386_default_early_setup(void)
 {
 	/* Initialize 32bit specific setup functions */
-	x86_init.resources.probe_roms = probe_roms;
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index bfe8f729e08..6781765b3a0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
 /*
  * Common hpet info
  */
-static unsigned long hpet_period;
+static unsigned long hpet_freq;
 
 static void hpet_legacy_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt);
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= hpet_legacy_set_mode,
 	.set_next_event = hpet_legacy_next_event,
-	.shift		= 32,
 	.irq		= 0,
 	.rating		= 50,
 };
@@ -290,28 +289,12 @@ static void hpet_legacy_clockevent_register(void)
 	hpet_enable_legacy_int();
 
 	/*
-	 * The mult factor is defined as (include/linux/clockchips.h)
-	 *  mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
-	 * hpet_period is in units of femtoseconds (per cycle), so
-	 *  mult/2^shift = cyc/ns = 10^6/hpet_period
-	 *  mult = (10^6 * 2^shift)/hpet_period
-	 *  mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
-	 */
-	hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
-				      hpet_period, hpet_clockevent.shift);
-	/* Calculate the min / max delta */
-	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
-							   &hpet_clockevent);
-	/* Setup minimum reprogramming delta. */
-	hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
-							   &hpet_clockevent);
-
-	/*
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
 	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
-	clockevents_register_device(&hpet_clockevent);
+	clockevents_config_and_register(&hpet_clockevent, hpet_freq,
+					HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 {
 	struct clock_event_device *evt = &hdev->evt;
-	uint64_t hpet_freq;
 
 	WARN_ON(cpu != smp_processor_id());
 	if (!(hdev->flags & HPET_DEV_VALID))
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 
 	evt->set_mode = hpet_msi_set_mode;
 	evt->set_next_event = hpet_msi_next_event;
-	evt->shift = 32;
-
-	/*
-	 * The period is a femto seconds value. We need to calculate the
-	 * scaled math multiplication factor for nanosecond to hpet tick
-	 * conversion.
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
-	evt->mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, evt->shift);
-	/* Calculate the max delta */
-	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
-	/* 5 usec minimum reprogramming delta. */
-	evt->min_delta_ns = 5000;
-
 	evt->cpumask = cpumask_of(hdev->cpu);
-	clockevents_register_device(evt);
+
+	clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
+					0x7FFFFFFF);
 }
 
 #ifdef CONFIG_HPET
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = {
 static int hpet_clocksource_register(void)
 {
 	u64 start, now;
-	u64 hpet_freq;
 	cycle_t t1;
 
 	/* Start the counter */
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void)
 		return -ENODEV;
 	}
 
-	/*
-	 * The definition of mult is (include/linux/clocksource.h)
-	 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
-	 * so we first need to convert hpet_period to ns/cyc units:
-	 *  mult/2^shift = ns/cyc = hpet_period/10^6
-	 *  mult = (hpet_period * 2^shift)/10^6
-	 *  mult = (hpet_period << shift)/FSEC_PER_NSEC
-	 */
-
-	/* Need to convert hpet_period (fsec/cyc) to cyc/sec:
-	 *
-	 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc)
-	 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
 	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
-
 	return 0;
 }
 
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void)
  */
 int __init hpet_enable(void)
 {
+	unsigned long hpet_period;
 	unsigned int id;
+	u64 freq;
 	int i;
 
 	if (!is_hpet_capable())
@@ -884,6 +836,14 @@ int __init hpet_enable(void)
 		goto out_nohpet;
 
 	/*
+	 * The period is a femto seconds value. Convert it to a
+	 * frequency.
+	 */
+	freq = FSEC_PER_SEC;
+	do_div(freq, hpet_period);
+	hpet_freq = freq;
+
+	/*
 	 * Read the HPET ID register to retrieve the IRQ routing
 	 * information and the number of channels
 	 */
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd3159744..fb66dc9e36c 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= init_pit_timer,
 	.set_next_event = pit_next_event,
-	.shift		= 32,
 	.irq		= 0,
 };
 
@@ -108,90 +107,12 @@ void __init setup_pit_timer(void)
 	 * IO_APIC has been initialized.
 	 */
 	pit_ce.cpumask = cpumask_of(smp_processor_id());
-	pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
-	pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
-	pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
 
-	clockevents_register_device(&pit_ce);
+	clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF);
 	global_clock_event = &pit_ce;
 }
 
 #ifndef CONFIG_X86_64
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	static int old_count;
-	static u32 old_jifs;
-	unsigned long flags;
-	int count;
-	u32 jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_pit(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_pit(PIT_CH0);	/* read the latched count */
-	count |= inb_pit(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_pit(0x34, PIT_MODE);
-		outb_pit(LATCH & 0xff, PIT_CH0);
-		outb_pit(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= 0,
-	.shift		= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	 /*
@@ -205,10 +126,7 @@ static int __init init_pit_clocksource(void)
 	    pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
-
-	return clocksource_register(&pit_cs);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
-
 #endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1cb0b9fc78d..6c0802eb2f7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -249,7 +249,7 @@ void fixup_irqs(void)
 
 		data = irq_desc_get_irq_data(desc);
 		affinity = data->affinity;
-		if (!irq_has_action(irq) ||
+		if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
 		    cpumask_subset(affinity, cpu_online_mask)) {
 			raw_spin_unlock(&desc->lock);
 			continue;
@@ -276,7 +276,8 @@ void fixup_irqs(void)
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+		if (!irqd_can_move_in_process_context(data) &&
+		    !irqd_irq_disabled(data) && chip->irq_unmask)
 			chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 961b6b30ba9..3fee346ef54 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 		code.offset = entry->target -
 				(entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
 	get_online_cpus();
 	mutex_lock(&text_mutex);
 	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry,
 
 void arch_jump_label_text_poke_early(jump_label_t addr)
 {
-	text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
+			JUMP_LABEL_NOP_SIZE);
 }
 
 #endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d156..f1a6244d7d9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 					 struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long flags;
 
 	/* This is possible if op is under delayed unoptimizing */
 	if (kprobe_disabled(&op->kp))
 		return;
 
-	preempt_disable();
+	local_irq_save(flags);
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(&op->kp);
 	} else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 		opt_pre_handler(&op->kp, regs);
 		__this_cpu_write(current_kprobe, NULL);
 	}
-	preempt_enable_no_resched();
+	local_irq_restore(flags);
 }
 
 static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f98d3eafe07..6389a6bca11 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -26,8 +26,6 @@
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 
-#define KVM_SCALE 22
-
 static int kvmclock = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = {
 	.read = kvm_clock_get_cycles,
 	.rating = 400,
 	.mask = CLOCKSOURCE_MASK(64),
-	.mult = 1 << KVM_SCALE,
-	.shift = KVM_SCALE,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -203,7 +199,7 @@ void __init kvmclock_init(void)
 	machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
 	kvm_get_preset_lpj();
-	clocksource_register(&kvm_clock);
+	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
 	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf..52f256f2cc8 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/jump_label.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5a532ce646b..6f9bfffb272 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -715,17 +715,15 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
 	}
 }
 
-static int
+static int __init
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	int ret = 0;
-
 	if (!mpc_new_phys || count <= mpc_new_length) {
 		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
 		return -1;
 	}
 
-	return ret;
+	return 0;
 }
 #else /* CONFIG_X86_IO_APIC */
 static
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec118..35ccf75696e 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 				struct iommu_table_entry *finish)
 {
 	struct iommu_table_entry *p, *q, *x;
-	char sym_p[KSYM_SYMBOL_LEN];
-	char sym_q[KSYM_SYMBOL_LEN];
 
 	/* Simple cyclic dependency checker. */
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(start, finish, p);
 		x = find_dependents_of(start, finish, q);
 		if (p == x) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
-					" on %s and vice-versa. BREAKING IT.\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
+			       p->detect, q->detect);
 			/* Heavy handed way..*/
 			x->depend = 0;
 		}
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(p, finish, p);
 		if (q && q > p) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
-					"should be called before %s!\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
+			       p->detect, q->detect);
 		}
 	}
 }
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms.c
index 071e7fea42e..ba0a4cce53b 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -73,6 +73,107 @@ static struct resource video_rom_resource = {
 	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
 
+/* does this oprom support the given pci device, or any of the devices
+ * that the driver supports?
+ */
+static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
+{
+	struct pci_driver *drv = pdev->driver;
+	const struct pci_device_id *id;
+
+	if (pdev->vendor == vendor && pdev->device == device)
+		return true;
+
+	for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
+		if (id->vendor == vendor && id->device == device)
+			break;
+
+	return id && id->vendor;
+}
+
+static bool probe_list(struct pci_dev *pdev, unsigned short vendor,
+		       const unsigned char *rom_list)
+{
+	unsigned short device;
+
+	do {
+		if (probe_kernel_address(rom_list, device) != 0)
+			device = 0;
+
+		if (device && match_id(pdev, vendor, device))
+			break;
+
+		rom_list += 2;
+	} while (device);
+
+	return !!device;
+}
+
+static struct resource *find_oprom(struct pci_dev *pdev)
+{
+	struct resource *oprom = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
+		struct resource *res = &adapter_rom_resources[i];
+		unsigned short offset, vendor, device, list, rev;
+		const unsigned char *rom;
+
+		if (res->end == 0)
+			break;
+
+		rom = isa_bus_to_virt(res->start);
+		if (probe_kernel_address(rom + 0x18, offset) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x4, vendor) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x6, device) != 0)
+			continue;
+
+		if (match_id(pdev, vendor, device)) {
+			oprom = res;
+			break;
+		}
+
+		if (probe_kernel_address(rom + offset + 0x8, list) == 0 &&
+		    probe_kernel_address(rom + offset + 0xc, rev) == 0 &&
+		    rev >= 3 && list &&
+		    probe_list(pdev, vendor, rom + offset + list)) {
+			oprom = res;
+			break;
+		}
+	}
+
+	return oprom;
+}
+
+void *pci_map_biosrom(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	if (!oprom)
+		return NULL;
+
+	return ioremap(oprom->start, resource_size(oprom));
+}
+EXPORT_SYMBOL(pci_map_biosrom);
+
+void pci_unmap_biosrom(void __iomem *image)
+{
+	iounmap(image);
+}
+EXPORT_SYMBOL(pci_unmap_biosrom);
+
+size_t pci_biosrom_size(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	return oprom ? resource_size(oprom) : 0;
+}
+EXPORT_SYMBOL(pci_biosrom_size);
+
 #define ROMSIGNATURE 0xaa55
 
 static int __init romsignature(const unsigned char *rom)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d46cbe46b7a..88a90a977f8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -465,7 +465,7 @@ static void mwait_idle(void)
 	if (!need_resched()) {
 		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		trace_cpu_idle(1, smp_processor_id());
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72..f65e5b521db 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 	unsigned len, type;
 	struct perf_event *bp;
 
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
+
 	data &= ~DR_CONTROL_RESERVED;
 	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
@@ -655,6 +658,9 @@ restore:
 		}
 		goto restore;
 	}
+
+	ptrace_put_breakpoints(tsk);
+
 	return ((orig_ret < 0) ? orig_ret : rc);
 }
 
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 
 	if (n < HBP_NUM) {
 		struct perf_event *bp;
+
+		if (ptrace_get_breakpoints(tsk) < 0)
+			return -ESRCH;
+
 		bp = thread->ptrace_bps[n];
 		if (!bp)
-			return 0;
-		val = bp->hw.info.address;
+			val = 0;
+		else
+			val = bp->hw.info.address;
+
+		ptrace_put_breakpoints(tsk);
 	} else if (n == 6) {
 		val = thread->debugreg6;
 	 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
 	struct perf_event_attr attr;
+	int err = 0;
+
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
 
 	if (!t->ptrace_bps[nr]) {
 		ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 		 * writing for the user. And anyway this is the previous
 		 * behaviour.
 		 */
-		if (IS_ERR(bp))
-			return PTR_ERR(bp);
+		if (IS_ERR(bp)) {
+			err = PTR_ERR(bp);
+			goto put;
+		}
 
 		t->ptrace_bps[nr] = bp;
 	} else {
-		int err;
-
 		bp = t->ptrace_bps[nr];
 
 		attr = bp->attr;
 		attr.bp_addr = addr;
 		err = modify_user_hw_breakpoint(bp, &attr);
-		if (err)
-			return err;
 	}
 
-
-	return 0;
+put:
+	ptrace_put_breakpoints(tsk);
+	return err;
 }
 
 /*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 08c44b08bf5..0c016f72769 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
 
+/*
+ * Windows compatible x86 hardware expects the following on reboot:
+ *
+ * 1) If the FADT has the ACPI reboot register flag set, try it
+ * 2) If still alive, write to the keyboard controller
+ * 3) If still alive, write to the ACPI reboot register again
+ * 4) If still alive, write to the keyboard controller again
+ *
+ * If the machine is still alive at this stage, it gives up. We default to
+ * following the same pattern, except that if we're still alive after (4) we'll
+ * try to force a triple fault and then cycle between hitting the keyboard
+ * controller and doing that
+ */
 static void native_machine_emergency_restart(void)
 {
 	int i;
+	int attempt = 0;
+	int orig_reboot_type = reboot_type;
 
 	if (reboot_emergency)
 		emergency_vmx_disable_all();
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void)
 				outb(0xfe, 0x64); /* pulse reset low */
 				udelay(50);
 			}
+			if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
+				attempt = 1;
+				reboot_type = BOOT_ACPI;
+			} else {
+				reboot_type = BOOT_TRIPLE;
+			}
+			break;
 
 		case BOOT_TRIPLE:
 			load_idt(&no_idt);
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d81..1d5c46df0d7 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
 	/* Get our own relocated address */
 	call	1f
 1:	popl	%ebx
-	subl	$1b, %ebx
+	subl	$(1b - r_base), %ebx
 
 	/* Compute the equivalent real-mode segment */
 	movl	%ebx, %ecx
 	shrl	$4, %ecx
 	
 	/* Patch post-real-mode segment jump */
-	movw	dispatch_table(%ebx,%eax,2),%ax
-	movw	%ax, 101f(%ebx)
-	movw	%cx, 102f(%ebx)
+	movw	(dispatch_table - r_base)(%ebx,%eax,2),%ax
+	movw	%ax, (101f - r_base)(%ebx)
+	movw	%cx, (102f - r_base)(%ebx)
 
 	/* Set up the IDT for real mode. */
-	lidtl	machine_real_restart_idt(%ebx)
+	lidtl	(machine_real_restart_idt - r_base)(%ebx)
 
 	/*
 	 * Set up a GDT from which we can load segment descriptors for real
 	 * mode.  The GDT is not used in real mode; it is just needed here to
 	 * prepare the descriptors.
 	 */
-	lgdtl	machine_real_restart_gdt(%ebx)
+	lgdtl	(machine_real_restart_gdt - r_base)(%ebx)
 
 	/*
 	 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4be9b398470..c3050af9306 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
 
 void __init setup_arch(char **cmdline_p)
 {
-	unsigned long flags;
-
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	visws_early_detect();
@@ -1041,9 +1039,7 @@ void __init setup_arch(char **cmdline_p)
 
 	mcheck_init();
 
-	local_irq_save(flags);
-	arch_init_ideal_nop5();
-	local_irq_restore(flags);
+	arch_init_ideal_nops();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4fd173cd8e5..40a24932a8a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
@@ -682,6 +679,7 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	      sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -741,12 +739,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, sig);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228..013e7eba83b 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 }
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2871d3c71b..a3c430bdfb6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
 	void *mwait_ptr;
 	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-	if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
+	if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
 		return;
-	if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+	if (!this_cpu_has(X86_FEATURE_CLFLSH))
 		return;
 	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
 		return;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6515733a289..55d9bc03f69 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -9,15 +9,6 @@
 #include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
-static void save_stack_warning(void *data, char *msg)
-{
-}
-
-static void
-save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-}
-
 static int save_stack_stack(void *data, char *name)
 {
 	return 0;
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops save_stack_ops = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address,
 	.walk_stack	= print_context_stack,
 };
 
 static const struct stacktrace_ops save_stack_ops_nosched = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address_nosched,
 	.walk_stack	= print_context_stack,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79..32cbffb0c49 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_sendmmsg		/* 345 */
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 787a5e499dd..3f92ce07e52 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -161,7 +161,7 @@ static int test_NX(void)
 	}
 
 #endif
-	return 0;
+	return ret;
 }
 
 static void test_exit(void)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128..6f164bd5e14 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { }
 struct x86_init_ops x86_init __initdata = {
 
 	.resources = {
-		.probe_roms		= x86_init_noop,
+		.probe_roms		= probe_roms,
 		.reserve_resources	= reserve_standard_io_resources,
 		.memory_setup		= default_machine_specific_memory_setup,
 	},
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
 		.banner			= default_banner,
 	},
 
+	.mapping = {
+		.pagetable_reserve		= native_pagetable_reserve,
+	},
+
 	.paging = {
 		.pagetable_setup_start	= native_pagetable_setup_start,
 		.pagetable_setup_done	= native_pagetable_setup_done,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0ad47b819a8..d6e2477feb1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -73,9 +73,14 @@
 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
 #define String      (1<<12)     /* String instruction (rep capable) */
 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
+#define GroupMask   (7<<14)     /* Opcode uses one of the group mechanisms */
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
-#define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
+#define GroupDual   (2<<14)     /* Alternate decoding of mod == 3 */
+#define Prefix      (3<<14)     /* Instruction varies with 66/f2/f3 prefix */
+#define RMExt       (4<<14)     /* Opcode extension in ModRM r/m if mod == 3 */
+#define Sse         (1<<17)     /* SSE Vector instruction */
 /* Misc flags */
+#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
 #define VendorSpecific (1<<22) /* Vendor specific instruction */
 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
@@ -102,11 +107,14 @@
 
 struct opcode {
 	u32 flags;
+	u8 intercept;
 	union {
 		int (*execute)(struct x86_emulate_ctxt *ctxt);
 		struct opcode *group;
 		struct group_dual *gdual;
+		struct gprefix *gprefix;
 	} u;
+	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 };
 
 struct group_dual {
@@ -114,6 +122,13 @@ struct group_dual {
 	struct opcode mod3[8];
 };
 
+struct gprefix {
+	struct opcode pfx_no;
+	struct opcode pfx_66;
+	struct opcode pfx_f2;
+	struct opcode pfx_f3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -248,42 +263,42 @@ struct group_dual {
 			     "w", "r", _LO32, "r", "", "r")
 
 /* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
-	do {									\
-		unsigned long _tmp;						\
-		_type _clv  = (_cl).val;  					\
-		_type _srcv = (_src).val;    					\
-		_type _dstv = (_dst).val;					\
-										\
-		__asm__ __volatile__ (						\
-			_PRE_EFLAGS("0", "5", "2")				\
-			_op _suffix " %4,%1 \n"					\
-			_POST_EFLAGS("0", "5", "2")				\
-			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
-			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
-			); 							\
-										\
-		(_cl).val  = (unsigned long) _clv;				\
-		(_src).val = (unsigned long) _srcv;				\
-		(_dst).val = (unsigned long) _dstv;				\
+#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type)	\
+	do {								\
+		unsigned long _tmp;					\
+		_type _clv  = (_cl).val;				\
+		_type _srcv = (_src).val;				\
+		_type _dstv = (_dst).val;				\
+									\
+		__asm__ __volatile__ (					\
+			_PRE_EFLAGS("0", "5", "2")			\
+			_op _suffix " %4,%1 \n"				\
+			_POST_EFLAGS("0", "5", "2")			\
+			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)	\
+			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)	\
+			);						\
+									\
+		(_cl).val  = (unsigned long) _clv;			\
+		(_src).val = (unsigned long) _srcv;			\
+		(_dst).val = (unsigned long) _dstv;			\
 	} while (0)
 
-#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
-	do {									\
-		switch ((_dst).bytes) {						\
-		case 2:								\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
-						"w", unsigned short);         	\
-			break;							\
-		case 4: 							\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
-						"l", unsigned int);           	\
-			break;							\
-		case 8:								\
-			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
-						"q", unsigned long));  		\
-			break;							\
-		}								\
+#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)			\
+	do {								\
+		switch ((_dst).bytes) {					\
+		case 2:							\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
+					 "w", unsigned short);         	\
+			break;						\
+		case 4:							\
+			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
+					 "l", unsigned int);           	\
+			break;						\
+		case 8:							\
+			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
+					      "q", unsigned long));	\
+			break;						\
+		}							\
 	} while (0)
 
 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
@@ -346,13 +361,25 @@ struct group_dual {
 	} while (0)
 
 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
-#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)			\
-	do {									\
-		switch((_src).bytes) {						\
-		case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
-		case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx,  _eflags, "w"); break; \
-		case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
-		case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
+#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)		\
+	do {								\
+		switch((_src).bytes) {					\
+		case 1:							\
+			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
+					      _eflags, "b");		\
+			break;						\
+		case 2:							\
+			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
+					      _eflags, "w");		\
+			break;						\
+		case 4:							\
+			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
+					      _eflags, "l");		\
+			break;						\
+		case 8:							\
+			ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
+						   _eflags, "q"));	\
+			break;						\
 		}							\
 	} while (0)
 
@@ -388,13 +415,33 @@ struct group_dual {
 	(_type)_x;							\
 })
 
-#define insn_fetch_arr(_arr, _size, _eip)                                \
+#define insn_fetch_arr(_arr, _size, _eip)				\
 ({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
 	if (rc != X86EMUL_CONTINUE)					\
 		goto done;						\
 	(_eip) += (_size);						\
 })
 
+static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
+				    enum x86_intercept intercept,
+				    enum x86_intercept_stage stage)
+{
+	struct x86_instruction_info info = {
+		.intercept  = intercept,
+		.rep_prefix = ctxt->decode.rep_prefix,
+		.modrm_mod  = ctxt->decode.modrm_mod,
+		.modrm_reg  = ctxt->decode.modrm_reg,
+		.modrm_rm   = ctxt->decode.modrm_rm,
+		.src_val    = ctxt->decode.src.val64,
+		.src_bytes  = ctxt->decode.src.bytes,
+		.dst_bytes  = ctxt->decode.dst.bytes,
+		.ad_bytes   = ctxt->decode.ad_bytes,
+		.next_rip   = ctxt->eip,
+	};
+
+	return ctxt->ops->intercept(ctxt, &info, stage);
+}
+
 static inline unsigned long ad_mask(struct decode_cache *c)
 {
 	return (1UL << (c->ad_bytes << 3)) - 1;
@@ -430,6 +477,13 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
 	register_address_increment(c, &c->eip, rel);
 }
 
+static u32 desc_limit_scaled(struct desc_struct *desc)
+{
+	u32 limit = get_desc_limit(desc);
+
+	return desc->g ? (limit << 12) | 0xfff : limit;
+}
+
 static void set_seg_override(struct decode_cache *c, int seg)
 {
 	c->has_seg_override = true;
@@ -442,11 +496,10 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
 		return 0;
 
-	return ops->get_cached_segment_base(seg, ctxt->vcpu);
+	return ops->get_cached_segment_base(ctxt, seg);
 }
 
 static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
-			     struct x86_emulate_ops *ops,
 			     struct decode_cache *c)
 {
 	if (!c->has_seg_override)
@@ -455,18 +508,6 @@ static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
 	return c->seg_override;
 }
 
-static ulong linear(struct x86_emulate_ctxt *ctxt,
-		    struct segmented_address addr)
-{
-	struct decode_cache *c = &ctxt->decode;
-	ulong la;
-
-	la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
-	if (c->ad_bytes != 8)
-		la &= (u32)-1;
-	return la;
-}
-
 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
 			     u32 error, bool valid)
 {
@@ -476,11 +517,21 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
 	return X86EMUL_PROPAGATE_FAULT;
 }
 
+static int emulate_db(struct x86_emulate_ctxt *ctxt)
+{
+	return emulate_exception(ctxt, DB_VECTOR, 0, false);
+}
+
 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
 {
 	return emulate_exception(ctxt, GP_VECTOR, err, true);
 }
 
+static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
+{
+	return emulate_exception(ctxt, SS_VECTOR, err, true);
+}
+
 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
 {
 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
@@ -496,6 +547,128 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, DE_VECTOR, 0, false);
 }
 
+static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+{
+	return emulate_exception(ctxt, NM_VECTOR, 0, false);
+}
+
+static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
+{
+	u16 selector;
+	struct desc_struct desc;
+
+	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
+	return selector;
+}
+
+static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
+				 unsigned seg)
+{
+	u16 dummy;
+	u32 base3;
+	struct desc_struct desc;
+
+	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
+	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
+}
+
+static int __linearize(struct x86_emulate_ctxt *ctxt,
+		     struct segmented_address addr,
+		     unsigned size, bool write, bool fetch,
+		     ulong *linear)
+{
+	struct decode_cache *c = &ctxt->decode;
+	struct desc_struct desc;
+	bool usable;
+	ulong la;
+	u32 lim;
+	u16 sel;
+	unsigned cpl, rpl;
+
+	la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
+	switch (ctxt->mode) {
+	case X86EMUL_MODE_REAL:
+		break;
+	case X86EMUL_MODE_PROT64:
+		if (((signed long)la << 16) >> 16 != la)
+			return emulate_gp(ctxt, 0);
+		break;
+	default:
+		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
+						addr.seg);
+		if (!usable)
+			goto bad;
+		/* code segment or read-only data segment */
+		if (((desc.type & 8) || !(desc.type & 2)) && write)
+			goto bad;
+		/* unreadable code segment */
+		if (!fetch && (desc.type & 8) && !(desc.type & 2))
+			goto bad;
+		lim = desc_limit_scaled(&desc);
+		if ((desc.type & 8) || !(desc.type & 4)) {
+			/* expand-up segment */
+			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+				goto bad;
+		} else {
+			/* exapand-down segment */
+			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
+				goto bad;
+			lim = desc.d ? 0xffffffff : 0xffff;
+			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
+				goto bad;
+		}
+		cpl = ctxt->ops->cpl(ctxt);
+		rpl = sel & 3;
+		cpl = max(cpl, rpl);
+		if (!(desc.type & 8)) {
+			/* data segment */
+			if (cpl > desc.dpl)
+				goto bad;
+		} else if ((desc.type & 8) && !(desc.type & 4)) {
+			/* nonconforming code segment */
+			if (cpl != desc.dpl)
+				goto bad;
+		} else if ((desc.type & 8) && (desc.type & 4)) {
+			/* conforming code segment */
+			if (cpl < desc.dpl)
+				goto bad;
+		}
+		break;
+	}
+	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8)
+		la &= (u32)-1;
+	*linear = la;
+	return X86EMUL_CONTINUE;
+bad:
+	if (addr.seg == VCPU_SREG_SS)
+		return emulate_ss(ctxt, addr.seg);
+	else
+		return emulate_gp(ctxt, addr.seg);
+}
+
+static int linearize(struct x86_emulate_ctxt *ctxt,
+		     struct segmented_address addr,
+		     unsigned size, bool write,
+		     ulong *linear)
+{
+	return __linearize(ctxt, addr, size, write, false, linear);
+}
+
+
+static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
+			      struct segmented_address addr,
+			      void *data,
+			      unsigned size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, false, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+}
+
 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 			      struct x86_emulate_ops *ops,
 			      unsigned long eip, u8 *dest)
@@ -505,10 +678,15 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 	int size, cur_size;
 
 	if (eip == fc->end) {
+		unsigned long linear;
+		struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip};
 		cur_size = fc->end - fc->start;
 		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
-		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
-				size, ctxt->vcpu, &ctxt->exception);
+		rc = __linearize(ctxt, addr, size, false, true, &linear);
+		if (rc != X86EMUL_CONTINUE)
+			return rc;
+		rc = ops->fetch(ctxt, linear, fc->data + cur_size,
+				size, &ctxt->exception);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		fc->end += size;
@@ -551,7 +729,6 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs,
 }
 
 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
-			   struct x86_emulate_ops *ops,
 			   struct segmented_address addr,
 			   u16 *size, unsigned long *address, int op_bytes)
 {
@@ -560,13 +737,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (op_bytes == 2)
 		op_bytes = 3;
 	*address = 0;
-	rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
-			   ctxt->vcpu, &ctxt->exception);
+	rc = segmented_read_std(ctxt, addr, size, 2);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	addr.ea += 2;
-	rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
-			   ctxt->vcpu, &ctxt->exception);
+	rc = segmented_read_std(ctxt, addr, address, op_bytes);
 	return rc;
 }
 
@@ -623,7 +798,63 @@ static void fetch_register_operand(struct operand *op)
 	}
 }
 
-static void decode_register_operand(struct operand *op,
+static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+{
+	ctxt->ops->get_fpu(ctxt);
+	switch (reg) {
+	case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
+	case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
+	case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
+	case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
+	case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
+	case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
+	case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
+	case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
+	case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
+	case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
+	case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
+	case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
+	case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
+	case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
+	case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+#endif
+	default: BUG();
+	}
+	ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
+			  int reg)
+{
+	ctxt->ops->get_fpu(ctxt);
+	switch (reg) {
+	case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
+	case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
+	case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
+	case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
+	case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
+	case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
+	case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
+	case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
+	case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
+	case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
+	case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
+	case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
+	case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
+	case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
+	case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+#endif
+	default: BUG();
+	}
+	ctxt->ops->put_fpu(ctxt);
+}
+
+static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
+				    struct operand *op,
 				    struct decode_cache *c,
 				    int inhibit_bytereg)
 {
@@ -632,6 +863,15 @@ static void decode_register_operand(struct operand *op,
 
 	if (!(c->d & ModRM))
 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+
+	if (c->d & Sse) {
+		op->type = OP_XMM;
+		op->bytes = 16;
+		op->addr.xmm = reg;
+		read_sse_reg(ctxt, &op->vec_val, reg);
+		return;
+	}
+
 	op->type = OP_REG;
 	if ((c->d & ByteOp) && !inhibit_bytereg) {
 		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -671,6 +911,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		op->addr.reg = decode_register(c->modrm_rm,
 					       c->regs, c->d & ByteOp);
+		if (c->d & Sse) {
+			op->type = OP_XMM;
+			op->bytes = 16;
+			op->addr.xmm = c->modrm_rm;
+			read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
+			return rc;
+		}
 		fetch_register_operand(op);
 		return rc;
 	}
@@ -819,8 +1066,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
 		if (mc->pos < mc->end)
 			goto read_cached;
 
-		rc = ops->read_emulated(addr, mc->data + mc->end, n,
-					&ctxt->exception, ctxt->vcpu);
+		rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
+					&ctxt->exception);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		mc->end += n;
@@ -834,6 +1081,50 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
+static int segmented_read(struct x86_emulate_ctxt *ctxt,
+			  struct segmented_address addr,
+			  void *data,
+			  unsigned size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, false, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return read_emulated(ctxt, ctxt->ops, linear, data, size);
+}
+
+static int segmented_write(struct x86_emulate_ctxt *ctxt,
+			   struct segmented_address addr,
+			   const void *data,
+			   unsigned size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, true, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return ctxt->ops->write_emulated(ctxt, linear, data, size,
+					 &ctxt->exception);
+}
+
+static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
+			     struct segmented_address addr,
+			     const void *orig_data, const void *data,
+			     unsigned size)
+{
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, addr, size, true, &linear);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
+					   size, &ctxt->exception);
+}
+
 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 			   struct x86_emulate_ops *ops,
 			   unsigned int size, unsigned short port,
@@ -854,7 +1145,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 		if (n == 0)
 			n = 1;
 		rc->pos = rc->end = 0;
-		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
+		if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n))
 			return 0;
 		rc->end = n * size;
 	}
@@ -864,28 +1155,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	return 1;
 }
 
-static u32 desc_limit_scaled(struct desc_struct *desc)
-{
-	u32 limit = get_desc_limit(desc);
-
-	return desc->g ? (limit << 12) | 0xfff : limit;
-}
-
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
 				     struct x86_emulate_ops *ops,
 				     u16 selector, struct desc_ptr *dt)
 {
 	if (selector & 1 << 2) {
 		struct desc_struct desc;
+		u16 sel;
+
 		memset (dt, 0, sizeof *dt);
-		if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR,
-						ctxt->vcpu))
+		if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
 			return;
 
 		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
 		dt->address = get_desc_base(&desc);
 	} else
-		ops->get_gdt(dt, ctxt->vcpu);
+		ops->get_gdt(ctxt, dt);
 }
 
 /* allowed just for 8 bytes segments */
@@ -903,8 +1188,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (dt.size < index * 8 + 7)
 		return emulate_gp(ctxt, selector & 0xfffc);
 	addr = dt.address + index * 8;
-	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,
-			    &ctxt->exception);
+	ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
 
        return ret;
 }
@@ -925,8 +1209,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 		return emulate_gp(ctxt, selector & 0xfffc);
 
 	addr = dt.address + index * 8;
-	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu,
-			     &ctxt->exception);
+	ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception);
 
 	return ret;
 }
@@ -986,7 +1269,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 	rpl = selector & 3;
 	dpl = seg_desc.dpl;
-	cpl = ops->cpl(ctxt->vcpu);
+	cpl = ops->cpl(ctxt);
 
 	switch (seg) {
 	case VCPU_SREG_SS:
@@ -1042,8 +1325,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 			return ret;
 	}
 load:
-	ops->set_segment_selector(selector, seg, ctxt->vcpu);
-	ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu);
+	ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
 	return X86EMUL_CONTINUE;
 exception:
 	emulate_exception(ctxt, err_vec, err_code, true);
@@ -1069,8 +1351,7 @@ static void write_register_operand(struct operand *op)
 	}
 }
 
-static inline int writeback(struct x86_emulate_ctxt *ctxt,
-			    struct x86_emulate_ops *ops)
+static int writeback(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 	struct decode_cache *c = &ctxt->decode;
@@ -1081,23 +1362,22 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
 		break;
 	case OP_MEM:
 		if (c->lock_prefix)
-			rc = ops->cmpxchg_emulated(
-					linear(ctxt, c->dst.addr.mem),
-					&c->dst.orig_val,
-					&c->dst.val,
-					c->dst.bytes,
-					&ctxt->exception,
-					ctxt->vcpu);
+			rc = segmented_cmpxchg(ctxt,
+					       c->dst.addr.mem,
+					       &c->dst.orig_val,
+					       &c->dst.val,
+					       c->dst.bytes);
 		else
-			rc = ops->write_emulated(
-					linear(ctxt, c->dst.addr.mem),
-					&c->dst.val,
-					c->dst.bytes,
-					&ctxt->exception,
-					ctxt->vcpu);
+			rc = segmented_write(ctxt,
+					     c->dst.addr.mem,
+					     &c->dst.val,
+					     c->dst.bytes);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
+	case OP_XMM:
+		write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
+		break;
 	case OP_NONE:
 		/* no writeback */
 		break;
@@ -1107,21 +1387,21 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
-static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
-				struct x86_emulate_ops *ops)
+static int em_push(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
+	struct segmented_address addr;
 
-	c->dst.type  = OP_MEM;
-	c->dst.bytes = c->op_bytes;
-	c->dst.val = c->src.val;
 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
-	c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
-	c->dst.addr.mem.seg = VCPU_SREG_SS;
+	addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
+	addr.seg = VCPU_SREG_SS;
+
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return segmented_write(ctxt, addr, &c->src.val, c->op_bytes);
 }
 
 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
-		       struct x86_emulate_ops *ops,
 		       void *dest, int len)
 {
 	struct decode_cache *c = &ctxt->decode;
@@ -1130,7 +1410,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 
 	addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
 	addr.seg = VCPU_SREG_SS;
-	rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len);
+	rc = segmented_read(ctxt, addr, dest, len);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -1138,6 +1418,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	return rc;
 }
 
+static int em_pop(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	return emulate_pop(ctxt, &c->dst.val, c->op_bytes);
+}
+
 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 		       struct x86_emulate_ops *ops,
 		       void *dest, int len)
@@ -1145,9 +1432,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	unsigned long val, change_mask;
 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
-	int cpl = ops->cpl(ctxt->vcpu);
+	int cpl = ops->cpl(ctxt);
 
-	rc = emulate_pop(ctxt, ops, &val, len);
+	rc = emulate_pop(ctxt, &val, len);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -1179,14 +1466,24 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 	return rc;
 }
 
-static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
-			      struct x86_emulate_ops *ops, int seg)
+static int em_popf(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 
-	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
+	c->dst.type = OP_REG;
+	c->dst.addr.reg = &ctxt->eflags;
+	c->dst.bytes = c->op_bytes;
+	return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
+}
 
-	emulate_push(ctxt, ops);
+static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
+			     struct x86_emulate_ops *ops, int seg)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	c->src.val = get_segment_selector(ctxt, seg);
+
+	return em_push(ctxt);
 }
 
 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
@@ -1196,7 +1493,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
 	unsigned long selector;
 	int rc;
 
-	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
+	rc = emulate_pop(ctxt, &selector, c->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -1204,8 +1501,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
 	return rc;
 }
 
-static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
-			  struct x86_emulate_ops *ops)
+static int em_pusha(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
@@ -1216,23 +1512,25 @@ static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
 		(reg == VCPU_REGS_RSP) ?
 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
 
-		emulate_push(ctxt, ops);
-
-		rc = writeback(ctxt, ops);
+		rc = em_push(ctxt);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 
 		++reg;
 	}
 
-	/* Disable writeback. */
-	c->dst.type = OP_NONE;
-
 	return rc;
 }
 
-static int emulate_popa(struct x86_emulate_ctxt *ctxt,
-			struct x86_emulate_ops *ops)
+static int em_pushf(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	c->src.val =  (unsigned long)ctxt->eflags;
+	return em_push(ctxt);
+}
+
+static int em_popa(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 	int rc = X86EMUL_CONTINUE;
@@ -1245,7 +1543,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt,
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
+		rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
 		--reg;
@@ -1265,37 +1563,32 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt,
 
 	/* TODO: Add limit checks */
 	c->src.val = ctxt->eflags;
-	emulate_push(ctxt, ops);
-	rc = writeback(ctxt, ops);
+	rc = em_push(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
 	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
 
-	c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
-	emulate_push(ctxt, ops);
-	rc = writeback(ctxt, ops);
+	c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
+	rc = em_push(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
 	c->src.val = c->eip;
-	emulate_push(ctxt, ops);
-	rc = writeback(ctxt, ops);
+	rc = em_push(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	c->dst.type = OP_NONE;
-
-	ops->get_idt(&dt, ctxt->vcpu);
+	ops->get_idt(ctxt, &dt);
 
 	eip_addr = dt.address + (irq << 2);
 	cs_addr = dt.address + (irq << 2) + 2;
 
-	rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception);
+	rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception);
+	rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -1339,7 +1632,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
 
 	/* TODO: Add stack limit check */
 
-	rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
+	rc = emulate_pop(ctxt, &temp_eip, c->op_bytes);
 
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
@@ -1347,12 +1640,12 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
 	if (temp_eip & ~0xffff)
 		return emulate_gp(ctxt, 0);
 
-	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+	rc = emulate_pop(ctxt, &cs, c->op_bytes);
 
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
+	rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes);
 
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
@@ -1394,15 +1687,31 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
 	}
 }
 
-static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
-				struct x86_emulate_ops *ops)
+static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+	unsigned short sel;
+
+	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
+
+	rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	c->eip = 0;
+	memcpy(&c->eip, c->src.valptr, c->op_bytes);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_grp1a(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 
-	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
+	return emulate_pop(ctxt, &c->dst.val, c->dst.bytes);
 }
 
-static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
+static int em_grp2(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 	switch (c->modrm_reg) {
@@ -1429,10 +1738,10 @@ static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
 		break;
 	}
+	return X86EMUL_CONTINUE;
 }
 
-static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
-			       struct x86_emulate_ops *ops)
+static int em_grp3(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 	unsigned long *rax = &c->regs[VCPU_REGS_RAX];
@@ -1471,10 +1780,10 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 }
 
-static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
-			       struct x86_emulate_ops *ops)
+static int em_grp45(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
+	int rc = X86EMUL_CONTINUE;
 
 	switch (c->modrm_reg) {
 	case 0:	/* inc */
@@ -1488,21 +1797,23 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
 		old_eip = c->eip;
 		c->eip = c->src.val;
 		c->src.val = old_eip;
-		emulate_push(ctxt, ops);
+		rc = em_push(ctxt);
 		break;
 	}
 	case 4: /* jmp abs */
 		c->eip = c->src.val;
 		break;
+	case 5: /* jmp far */
+		rc = em_jmp_far(ctxt);
+		break;
 	case 6:	/* push */
-		emulate_push(ctxt, ops);
+		rc = em_push(ctxt);
 		break;
 	}
-	return X86EMUL_CONTINUE;
+	return rc;
 }
 
-static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
-			       struct x86_emulate_ops *ops)
+static int em_grp9(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
 	u64 old = c->dst.orig_val64;
@@ -1528,12 +1839,12 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	unsigned long cs;
 
-	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+	rc = emulate_pop(ctxt, &c->eip, c->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	if (c->op_bytes == 4)
 		c->eip = (u32)c->eip;
-	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+	rc = emulate_pop(ctxt, &cs, c->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
@@ -1562,8 +1873,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 			struct x86_emulate_ops *ops, struct desc_struct *cs,
 			struct desc_struct *ss)
 {
+	u16 selector;
+
 	memset(cs, 0, sizeof(struct desc_struct));
-	ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu);
+	ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
 	memset(ss, 0, sizeof(struct desc_struct));
 
 	cs->l = 0;		/* will be adjusted later */
@@ -1593,44 +1906,44 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
+	u64 efer = 0;
 
 	/* syscall is not available in real mode */
 	if (ctxt->mode == X86EMUL_MODE_REAL ||
 	    ctxt->mode == X86EMUL_MODE_VM86)
 		return emulate_ud(ctxt);
 
+	ops->get_msr(ctxt, MSR_EFER, &efer);
 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
 
-	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+	ops->get_msr(ctxt, MSR_STAR, &msr_data);
 	msr_data >>= 32;
 	cs_sel = (u16)(msr_data & 0xfffc);
 	ss_sel = (u16)(msr_data + 8);
 
-	if (is_long_mode(ctxt->vcpu)) {
+	if (efer & EFER_LMA) {
 		cs.d = 0;
 		cs.l = 1;
 	}
-	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
-	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
 	c->regs[VCPU_REGS_RCX] = c->eip;
-	if (is_long_mode(ctxt->vcpu)) {
+	if (efer & EFER_LMA) {
 #ifdef CONFIG_X86_64
 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
 
-		ops->get_msr(ctxt->vcpu,
+		ops->get_msr(ctxt,
 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
 			     MSR_LSTAR : MSR_CSTAR, &msr_data);
 		c->eip = msr_data;
 
-		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
+		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~(msr_data | EFLG_RF);
 #endif
 	} else {
 		/* legacy mode */
-		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+		ops->get_msr(ctxt, MSR_STAR, &msr_data);
 		c->eip = (u32)msr_data;
 
 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -1646,7 +1959,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
+	u64 efer = 0;
 
+	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 	/* inject #GP if in real mode */
 	if (ctxt->mode == X86EMUL_MODE_REAL)
 		return emulate_gp(ctxt, 0);
@@ -1659,7 +1974,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
 
-	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
 	switch (ctxt->mode) {
 	case X86EMUL_MODE_PROT32:
 		if ((msr_data & 0xfffc) == 0x0)
@@ -1676,21 +1991,18 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	cs_sel &= ~SELECTOR_RPL_MASK;
 	ss_sel = cs_sel + 8;
 	ss_sel &= ~SELECTOR_RPL_MASK;
-	if (ctxt->mode == X86EMUL_MODE_PROT64
-		|| is_long_mode(ctxt->vcpu)) {
+	if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
 		cs.d = 0;
 		cs.l = 1;
 	}
 
-	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
-	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
+	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
 	c->eip = msr_data;
 
-	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
+	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
 	c->regs[VCPU_REGS_RSP] = msr_data;
 
 	return X86EMUL_CONTINUE;
@@ -1719,7 +2031,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
 	cs.dpl = 3;
 	ss.dpl = 3;
-	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
 	switch (usermode) {
 	case X86EMUL_MODE_PROT32:
 		cs_sel = (u16)(msr_data + 16);
@@ -1739,10 +2051,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	cs_sel |= SELECTOR_RPL_MASK;
 	ss_sel |= SELECTOR_RPL_MASK;
 
-	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
-	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
+	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
+	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
 	c->eip = c->regs[VCPU_REGS_RDX];
 	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
@@ -1759,7 +2069,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
 	if (ctxt->mode == X86EMUL_MODE_VM86)
 		return true;
 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
-	return ops->cpl(ctxt->vcpu) > iopl;
+	return ops->cpl(ctxt) > iopl;
 }
 
 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
@@ -1769,11 +2079,11 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 	struct desc_struct tr_seg;
 	u32 base3;
 	int r;
-	u16 io_bitmap_ptr, perm, bit_idx = port & 0x7;
+	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
 	unsigned mask = (1 << len) - 1;
 	unsigned long base;
 
-	ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu);
+	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
 	if (!tr_seg.p)
 		return false;
 	if (desc_limit_scaled(&tr_seg) < 103)
@@ -1782,13 +2092,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 #ifdef CONFIG_X86_64
 	base |= ((u64)base3) << 32;
 #endif
-	r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL);
+	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
 		return false;
-	r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu,
-			  NULL);
+	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
 	if (r != X86EMUL_CONTINUE)
 		return false;
 	if ((perm >> bit_idx) & mask)
@@ -1829,11 +2138,11 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
 	tss->si = c->regs[VCPU_REGS_RSI];
 	tss->di = c->regs[VCPU_REGS_RDI];
 
-	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
-	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
-	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
-	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
-	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
+	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
+	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
+	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
+	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
+	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
 }
 
 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
@@ -1858,11 +2167,11 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 	 * SDM says that segment selectors are loaded before segment
 	 * descriptors
 	 */
-	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
-	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
-	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
-	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
+	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
+	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
+	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
+	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
+	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
 
 	/*
 	 * Now load segment descriptors. If fault happenes at this stage
@@ -1896,7 +2205,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 
-	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
@@ -1904,13 +2213,13 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 
 	save_state_to_tss16(ctxt, ops, &tss_seg);
 
-	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
 		return ret;
 
-	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
@@ -1919,10 +2228,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(new_tss_base,
+		ret = ops->write_std(ctxt, new_tss_base,
 				     &tss_seg.prev_task_link,
 				     sizeof tss_seg.prev_task_link,
-				     ctxt->vcpu, &ctxt->exception);
+				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			/* FIXME: need to provide precise fault address */
 			return ret;
@@ -1937,7 +2246,7 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
 {
 	struct decode_cache *c = &ctxt->decode;
 
-	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
+	tss->cr3 = ops->get_cr(ctxt, 3);
 	tss->eip = c->eip;
 	tss->eflags = ctxt->eflags;
 	tss->eax = c->regs[VCPU_REGS_RAX];
@@ -1949,13 +2258,13 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
 	tss->esi = c->regs[VCPU_REGS_RSI];
 	tss->edi = c->regs[VCPU_REGS_RDI];
 
-	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
-	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
-	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
-	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
-	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
-	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
-	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
+	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
+	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
+	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
+	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
+	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
+	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
+	tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
 }
 
 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
@@ -1965,7 +2274,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int ret;
 
-	if (ops->set_cr(3, tss->cr3, ctxt->vcpu))
+	if (ops->set_cr(ctxt, 3, tss->cr3))
 		return emulate_gp(ctxt, 0);
 	c->eip = tss->eip;
 	ctxt->eflags = tss->eflags | 2;
@@ -1982,13 +2291,13 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	 * SDM says that segment selectors are loaded before segment
 	 * descriptors
 	 */
-	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
-	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
-	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
-	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
-	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
-	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
-	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
+	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
+	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
+	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
+	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
+	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
+	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
 
 	/*
 	 * Now load segment descriptors. If fault happenes at this stage
@@ -2028,7 +2337,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 
-	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
@@ -2036,13 +2345,13 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
 	save_state_to_tss32(ctxt, ops, &tss_seg);
 
-	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
 		return ret;
 
-	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
+	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
 		/* FIXME: need to provide precise fault address */
@@ -2051,10 +2360,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 	if (old_tss_sel != 0xffff) {
 		tss_seg.prev_task_link = old_tss_sel;
 
-		ret = ops->write_std(new_tss_base,
+		ret = ops->write_std(ctxt, new_tss_base,
 				     &tss_seg.prev_task_link,
 				     sizeof tss_seg.prev_task_link,
-				     ctxt->vcpu, &ctxt->exception);
+				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			/* FIXME: need to provide precise fault address */
 			return ret;
@@ -2070,9 +2379,9 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 {
 	struct desc_struct curr_tss_desc, next_tss_desc;
 	int ret;
-	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
+	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
 	ulong old_tss_base =
-		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
+		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
 	u32 desc_limit;
 
 	/* FIXME: old_tss_base == ~0 ? */
@@ -2088,7 +2397,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 
 	if (reason != TASK_SWITCH_IRET) {
 		if ((tss_selector & 3) > next_tss_desc.dpl ||
-		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl)
+		    ops->cpl(ctxt) > next_tss_desc.dpl)
 			return emulate_gp(ctxt, 0);
 	}
 
@@ -2132,9 +2441,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 					 &next_tss_desc);
 	}
 
-	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
-	ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu);
-	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
+	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
+	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
 
 	if (has_error_code) {
 		struct decode_cache *c = &ctxt->decode;
@@ -2142,7 +2450,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
 		c->lock_prefix = 0;
 		c->src.val = (unsigned long) error_code;
-		emulate_push(ctxt, ops);
+		ret = em_push(ctxt);
 	}
 
 	return ret;
@@ -2162,13 +2470,10 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
 				     has_error_code, error_code);
 
-	if (rc == X86EMUL_CONTINUE) {
-		rc = writeback(ctxt, ops);
-		if (rc == X86EMUL_CONTINUE)
-			ctxt->eip = c->eip;
-	}
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->eip = c->eip;
 
-	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
@@ -2182,12 +2487,6 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
 	op->addr.mem.seg = seg;
 }
 
-static int em_push(struct x86_emulate_ctxt *ctxt)
-{
-	emulate_push(ctxt, ctxt->ops);
-	return X86EMUL_CONTINUE;
-}
-
 static int em_das(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
@@ -2234,7 +2533,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	ulong old_eip;
 	int rc;
 
-	old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
+	old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
 	old_eip = c->eip;
 
 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
@@ -2245,20 +2544,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	memcpy(&c->eip, c->src.valptr, c->op_bytes);
 
 	c->src.val = old_cs;
-	emulate_push(ctxt, ctxt->ops);
-	rc = writeback(ctxt, ctxt->ops);
+	rc = em_push(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
 	c->src.val = old_eip;
-	emulate_push(ctxt, ctxt->ops);
-	rc = writeback(ctxt, ctxt->ops);
-	if (rc != X86EMUL_CONTINUE)
-		return rc;
-
-	c->dst.type = OP_NONE;
-
-	return X86EMUL_CONTINUE;
+	return em_push(ctxt);
 }
 
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
@@ -2269,13 +2560,79 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 	c->dst.type = OP_REG;
 	c->dst.addr.reg = &c->eip;
 	c->dst.bytes = c->op_bytes;
-	rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
+	rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
 	return X86EMUL_CONTINUE;
 }
 
+static int em_add(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_or(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_adc(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_sbb(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_and(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_sub(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_xor(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_cmp(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
 static int em_imul(struct x86_emulate_ctxt *ctxt)
 {
 	struct decode_cache *c = &ctxt->decode;
@@ -2306,13 +2663,10 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
 
 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
 	struct decode_cache *c = &ctxt->decode;
 	u64 tsc = 0;
 
-	if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD))
-		return emulate_gp(ctxt, 0);
-	ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
+	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
 	c->regs[VCPU_REGS_RAX] = (u32)tsc;
 	c->regs[VCPU_REGS_RDX] = tsc >> 32;
 	return X86EMUL_CONTINUE;
@@ -2325,22 +2679,375 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_movdqu(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_invlpg(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+	ulong linear;
+
+	rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear);
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->ops->invlpg(ctxt, linear);
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_clts(struct x86_emulate_ctxt *ctxt)
+{
+	ulong cr0;
+
+	cr0 = ctxt->ops->get_cr(ctxt, 0);
+	cr0 &= ~X86_CR0_TS;
+	ctxt->ops->set_cr(ctxt, 0, cr0);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+
+	if (c->modrm_mod != 3 || c->modrm_rm != 1)
+		return X86EMUL_UNHANDLEABLE;
+
+	rc = ctxt->ops->fix_hypercall(ctxt);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	/* Let the processor re-execute the fixed hypercall */
+	c->eip = ctxt->eip;
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	struct desc_ptr desc_ptr;
+	int rc;
+
+	rc = read_descriptor(ctxt, c->src.addr.mem,
+			     &desc_ptr.size, &desc_ptr.address,
+			     c->op_bytes);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int rc;
+
+	rc = ctxt->ops->fix_hypercall(ctxt);
+
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return rc;
+}
+
+static int em_lidt(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	struct desc_ptr desc_ptr;
+	int rc;
+
+	rc = read_descriptor(ctxt, c->src.addr.mem,
+			     &desc_ptr.size, &desc_ptr.address,
+			     c->op_bytes);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->ops->set_idt(ctxt, &desc_ptr);
+	/* Disable writeback. */
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_smsw(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	c->dst.bytes = 2;
+	c->dst.val = ctxt->ops->get_cr(ctxt, 0);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_lmsw(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
+			  | (c->src.val & 0x0f));
+	c->dst.type = OP_NONE;
+	return X86EMUL_CONTINUE;
+}
+
+static bool valid_cr(int nr)
+{
+	switch (nr) {
+	case 0:
+	case 2 ... 4:
+	case 8:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int check_cr_read(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	if (!valid_cr(c->modrm_reg))
+		return emulate_ud(ctxt);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_cr_write(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	u64 new_val = c->src.val64;
+	int cr = c->modrm_reg;
+	u64 efer = 0;
+
+	static u64 cr_reserved_bits[] = {
+		0xffffffff00000000ULL,
+		0, 0, 0, /* CR3 checked later */
+		CR4_RESERVED_BITS,
+		0, 0, 0,
+		CR8_RESERVED_BITS,
+	};
+
+	if (!valid_cr(cr))
+		return emulate_ud(ctxt);
+
+	if (new_val & cr_reserved_bits[cr])
+		return emulate_gp(ctxt, 0);
+
+	switch (cr) {
+	case 0: {
+		u64 cr4;
+		if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
+		    ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
+			return emulate_gp(ctxt, 0);
+
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+		if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
+		    !(cr4 & X86_CR4_PAE))
+			return emulate_gp(ctxt, 0);
+
+		break;
+		}
+	case 3: {
+		u64 rsvd = 0;
+
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+		if (efer & EFER_LMA)
+			rsvd = CR3_L_MODE_RESERVED_BITS;
+		else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
+			rsvd = CR3_PAE_RESERVED_BITS;
+		else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
+			rsvd = CR3_NONPAE_RESERVED_BITS;
+
+		if (new_val & rsvd)
+			return emulate_gp(ctxt, 0);
+
+		break;
+		}
+	case 4: {
+		u64 cr4;
+
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+		if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
+			return emulate_gp(ctxt, 0);
+
+		break;
+		}
+	}
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
+{
+	unsigned long dr7;
+
+	ctxt->ops->get_dr(ctxt, 7, &dr7);
+
+	/* Check if DR7.Global_Enable is set */
+	return dr7 & (1 << 13);
+}
+
+static int check_dr_read(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	int dr = c->modrm_reg;
+	u64 cr4;
+
+	if (dr > 7)
+		return emulate_ud(ctxt);
+
+	cr4 = ctxt->ops->get_cr(ctxt, 4);
+	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
+		return emulate_ud(ctxt);
+
+	if (check_dr7_gd(ctxt))
+		return emulate_db(ctxt);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_dr_write(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	u64 new_val = c->src.val64;
+	int dr = c->modrm_reg;
+
+	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
+		return emulate_gp(ctxt, 0);
+
+	return check_dr_read(ctxt);
+}
+
+static int check_svme(struct x86_emulate_ctxt *ctxt)
+{
+	u64 efer;
+
+	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+
+	if (!(efer & EFER_SVME))
+		return emulate_ud(ctxt);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
+{
+	u64 rax = ctxt->decode.regs[VCPU_REGS_RAX];
+
+	/* Valid physical address? */
+	if (rax & 0xffff000000000000ULL)
+		return emulate_gp(ctxt, 0);
+
+	return check_svme(ctxt);
+}
+
+static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
+{
+	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
+		return emulate_ud(ctxt);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
+{
+	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
+	u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX];
+
+	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
+	    (rcx > 3))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_perm_in(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	c->dst.bytes = min(c->dst.bytes, 4u);
+	if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
+}
+
+static int check_perm_out(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+
+	c->src.bytes = min(c->src.bytes, 4u);
+	if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes))
+		return emulate_gp(ctxt, 0);
+
+	return X86EMUL_CONTINUE;
+}
+
 #define D(_y) { .flags = (_y) }
+#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
+#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
+		      .check_perm = (_p) }
 #define N    D(0)
+#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
-#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
+#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define II(_f, _e, _i) \
+	{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
+#define IIP(_f, _e, _i, _p) \
+	{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
+	  .check_perm = (_p) }
+#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
 
 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
+#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
 
-#define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),			\
-		D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock),		\
-		D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
+#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
+		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
+		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
 
+static struct opcode group7_rm1[] = {
+	DI(SrcNone | ModRM | Priv, monitor),
+	DI(SrcNone | ModRM | Priv, mwait),
+	N, N, N, N, N, N,
+};
+
+static struct opcode group7_rm3[] = {
+	DIP(SrcNone | ModRM | Prot | Priv, vmrun,   check_svme_pa),
+	II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
+	DIP(SrcNone | ModRM | Prot | Priv, vmload,  check_svme_pa),
+	DIP(SrcNone | ModRM | Prot | Priv, vmsave,  check_svme_pa),
+	DIP(SrcNone | ModRM | Prot | Priv, stgi,    check_svme),
+	DIP(SrcNone | ModRM | Prot | Priv, clgi,    check_svme),
+	DIP(SrcNone | ModRM | Prot | Priv, skinit,  check_svme),
+	DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
+};
+
+static struct opcode group7_rm7[] = {
+	N,
+	DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
+	N, N, N, N, N, N,
+};
 
 static struct opcode group1[] = {
-	X7(D(Lock)), N
+	I(Lock, em_add),
+	I(Lock, em_or),
+	I(Lock, em_adc),
+	I(Lock, em_sbb),
+	I(Lock, em_and),
+	I(Lock, em_sub),
+	I(Lock, em_xor),
+	I(0, em_cmp),
 };
 
 static struct opcode group1A[] = {
@@ -2366,16 +3073,28 @@ static struct opcode group5[] = {
 	D(SrcMem | ModRM | Stack), N,
 };
 
+static struct opcode group6[] = {
+	DI(ModRM | Prot,        sldt),
+	DI(ModRM | Prot,        str),
+	DI(ModRM | Prot | Priv, lldt),
+	DI(ModRM | Prot | Priv, ltr),
+	N, N, N, N,
+};
+
 static struct group_dual group7 = { {
-	N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
-	D(SrcNone | ModRM | DstMem | Mov), N,
-	D(SrcMem16 | ModRM | Mov | Priv),
-	D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
+	DI(ModRM | Mov | DstMem | Priv, sgdt),
+	DI(ModRM | Mov | DstMem | Priv, sidt),
+	II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
+	II(ModRM | SrcMem | Priv, em_lidt, lidt),
+	II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
+	II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
+	II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
 }, {
-	D(SrcNone | ModRM | Priv | VendorSpecific), N,
-	N, D(SrcNone | ModRM | Priv | VendorSpecific),
-	D(SrcNone | ModRM | DstMem | Mov), N,
-	D(SrcMem16 | ModRM | Mov | Priv), N,
+	I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
+	EXT(0, group7_rm1),
+	N, EXT(0, group7_rm3),
+	II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
+	II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
 } };
 
 static struct opcode group8[] = {
@@ -2394,35 +3113,40 @@ static struct opcode group11[] = {
 	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
 };
 
+static struct gprefix pfx_0f_6f_0f_7f = {
+	N, N, N, I(Sse, em_movdqu),
+};
+
 static struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
-	D6ALU(Lock),
+	I6ALU(Lock, em_add),
 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
 	/* 0x08 - 0x0F */
-	D6ALU(Lock),
+	I6ALU(Lock, em_or),
 	D(ImplicitOps | Stack | No64), N,
 	/* 0x10 - 0x17 */
-	D6ALU(Lock),
+	I6ALU(Lock, em_adc),
 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
 	/* 0x18 - 0x1F */
-	D6ALU(Lock),
+	I6ALU(Lock, em_sbb),
 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
 	/* 0x20 - 0x27 */
-	D6ALU(Lock), N, N,
+	I6ALU(Lock, em_and), N, N,
 	/* 0x28 - 0x2F */
-	D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
+	I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
 	/* 0x30 - 0x37 */
-	D6ALU(Lock), N, N,
+	I6ALU(Lock, em_xor), N, N,
 	/* 0x38 - 0x3F */
-	D6ALU(0), N, N,
+	I6ALU(0, em_cmp), N, N,
 	/* 0x40 - 0x4F */
 	X16(D(DstReg)),
 	/* 0x50 - 0x57 */
 	X8(I(SrcReg | Stack, em_push)),
 	/* 0x58 - 0x5F */
-	X8(D(DstReg | Stack)),
+	X8(I(DstReg | Stack, em_pop)),
 	/* 0x60 - 0x67 */
-	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+	I(ImplicitOps | Stack | No64, em_pusha),
+	I(ImplicitOps | Stack | No64, em_popa),
 	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
 	N, N, N, N,
 	/* 0x68 - 0x6F */
@@ -2430,8 +3154,8 @@ static struct opcode opcode_table[256] = {
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	D2bv(DstDI | Mov | String), /* insb, insw/insd */
-	D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
+	D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */
+	D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
 	/* 0x80 - 0x87 */
@@ -2446,21 +3170,22 @@ static struct opcode opcode_table[256] = {
 	D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
 	D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
 	/* 0x90 - 0x97 */
-	X8(D(SrcAcc | DstReg)),
+	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
 	/* 0x98 - 0x9F */
 	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
 	I(SrcImmFAddr | No64, em_call_far), N,
-	D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
+	II(ImplicitOps | Stack, em_pushf, pushf),
+	II(ImplicitOps | Stack, em_popf, popf), N, N,
 	/* 0xA0 - 0xA7 */
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
 	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
-	D2bv(SrcSI | DstDI | String),
+	I2bv(SrcSI | DstDI | String, em_cmp),
 	/* 0xA8 - 0xAF */
 	D2bv(DstAcc | SrcImm),
 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	D2bv(SrcAcc | DstDI | String),
+	I2bv(SrcAcc | DstDI | String, em_cmp),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
@@ -2473,7 +3198,8 @@ static struct opcode opcode_table[256] = {
 	G(ByteOp, group11), G(0, group11),
 	/* 0xC8 - 0xCF */
 	N, N, N, D(ImplicitOps | Stack),
-	D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
+	D(ImplicitOps), DI(SrcImmByte, intn),
+	D(ImplicitOps | No64), DI(ImplicitOps, iret),
 	/* 0xD0 - 0xD7 */
 	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
 	N, N, N, N,
@@ -2481,14 +3207,17 @@ static struct opcode opcode_table[256] = {
 	N, N, N, N, N, N, N, N,
 	/* 0xE0 - 0xE7 */
 	X4(D(SrcImmByte)),
-	D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
+	D2bvIP(SrcImmUByte | DstAcc, in,  check_perm_in),
+	D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
 	/* 0xE8 - 0xEF */
 	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
 	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
-	D2bv(SrcNone | DstAcc),	D2bv(SrcAcc | ImplicitOps),
+	D2bvIP(SrcNone | DstAcc,     in,  check_perm_in),
+	D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
-	N, N, N, N,
-	D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
+	N, DI(ImplicitOps, icebp), N, N,
+	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
+	G(ByteOp, group3), G(0, group3),
 	/* 0xF8 - 0xFF */
 	D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
@@ -2496,20 +3225,24 @@ static struct opcode opcode_table[256] = {
 
 static struct opcode twobyte_table[256] = {
 	/* 0x00 - 0x0F */
-	N, GD(0, &group7), N, N,
-	N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N,
-	D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
+	G(0, group6), GD(0, &group7), N, N,
+	N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N,
+	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
 	N, D(ImplicitOps | ModRM), N, N,
 	/* 0x10 - 0x1F */
 	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
 	/* 0x20 - 0x2F */
-	D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
-	D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
+	DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
+	DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
+	DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write),
+	DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write),
 	N, N, N, N,
 	N, N, N, N, N, N, N, N,
 	/* 0x30 - 0x3F */
-	D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
-	D(ImplicitOps | Priv), N,
+	DI(ImplicitOps | Priv, wrmsr),
+	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
+	DI(ImplicitOps | Priv, rdmsr),
+	DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
 	D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific),
 	N, N,
 	N, N, N, N, N, N, N, N,
@@ -2518,21 +3251,27 @@ static struct opcode twobyte_table[256] = {
 	/* 0x50 - 0x5F */
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
 	/* 0x60 - 0x6F */
-	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x70 - 0x7F */
-	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
 	X16(D(SrcImm)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
-	N, D(DstMem | SrcReg | ModRM | BitOp),
+	DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
 	/* 0xA8 - 0xAF */
 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
-	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
+	DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM),
 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
@@ -2564,10 +3303,13 @@ static struct opcode twobyte_table[256] = {
 #undef G
 #undef GD
 #undef I
+#undef GP
+#undef EXT
 
 #undef D2bv
+#undef D2bvIP
 #undef I2bv
-#undef D6ALU
+#undef I6ALU
 
 static unsigned imm_size(struct decode_cache *c)
 {
@@ -2625,8 +3367,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	struct decode_cache *c = &ctxt->decode;
 	int rc = X86EMUL_CONTINUE;
 	int mode = ctxt->mode;
-	int def_op_bytes, def_ad_bytes, dual, goffset;
-	struct opcode opcode, *g_mod012, *g_mod3;
+	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
+	bool op_prefix = false;
+	struct opcode opcode;
 	struct operand memop = { .type = OP_NONE };
 
 	c->eip = ctxt->eip;
@@ -2634,7 +3377,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	c->fetch.end = c->fetch.start + insn_len;
 	if (insn_len > 0)
 		memcpy(c->fetch.data, insn, insn_len);
-	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
 
 	switch (mode) {
 	case X86EMUL_MODE_REAL:
@@ -2662,6 +3404,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	for (;;) {
 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
 		case 0x66:	/* operand-size override */
+			op_prefix = true;
 			/* switch between 2/4 bytes */
 			c->op_bytes = def_op_bytes ^ 6;
 			break;
@@ -2692,10 +3435,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 			c->lock_prefix = 1;
 			break;
 		case 0xf2:	/* REPNE/REPNZ */
-			c->rep_prefix = REPNE_PREFIX;
-			break;
 		case 0xf3:	/* REP/REPE/REPZ */
-			c->rep_prefix = REPE_PREFIX;
+			c->rep_prefix = c->b;
 			break;
 		default:
 			goto done_prefixes;
@@ -2722,29 +3463,49 @@ done_prefixes:
 	}
 	c->d = opcode.flags;
 
-	if (c->d & Group) {
-		dual = c->d & GroupDual;
-		c->modrm = insn_fetch(u8, 1, c->eip);
-		--c->eip;
-
-		if (c->d & GroupDual) {
-			g_mod012 = opcode.u.gdual->mod012;
-			g_mod3 = opcode.u.gdual->mod3;
-		} else
-			g_mod012 = g_mod3 = opcode.u.group;
-
-		c->d &= ~(Group | GroupDual);
-
-		goffset = (c->modrm >> 3) & 7;
+	while (c->d & GroupMask) {
+		switch (c->d & GroupMask) {
+		case Group:
+			c->modrm = insn_fetch(u8, 1, c->eip);
+			--c->eip;
+			goffset = (c->modrm >> 3) & 7;
+			opcode = opcode.u.group[goffset];
+			break;
+		case GroupDual:
+			c->modrm = insn_fetch(u8, 1, c->eip);
+			--c->eip;
+			goffset = (c->modrm >> 3) & 7;
+			if ((c->modrm >> 6) == 3)
+				opcode = opcode.u.gdual->mod3[goffset];
+			else
+				opcode = opcode.u.gdual->mod012[goffset];
+			break;
+		case RMExt:
+			goffset = c->modrm & 7;
+			opcode = opcode.u.group[goffset];
+			break;
+		case Prefix:
+			if (c->rep_prefix && op_prefix)
+				return X86EMUL_UNHANDLEABLE;
+			simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
+			switch (simd_prefix) {
+			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
+			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
+			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
+			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
+			}
+			break;
+		default:
+			return X86EMUL_UNHANDLEABLE;
+		}
 
-		if ((c->modrm >> 6) == 3)
-			opcode = g_mod3[goffset];
-		else
-			opcode = g_mod012[goffset];
+		c->d &= ~GroupMask;
 		c->d |= opcode.flags;
 	}
 
 	c->execute = opcode.u.execute;
+	c->check_perm = opcode.check_perm;
+	c->intercept = opcode.intercept;
 
 	/* Unrecognised? */
 	if (c->d == 0 || (c->d & Undefined))
@@ -2763,6 +3524,9 @@ done_prefixes:
 			c->op_bytes = 4;
 	}
 
+	if (c->d & Sse)
+		c->op_bytes = 16;
+
 	/* ModRM and SIB bytes. */
 	if (c->d & ModRM) {
 		rc = decode_modrm(ctxt, ops, &memop);
@@ -2776,7 +3540,7 @@ done_prefixes:
 	if (!c->has_seg_override)
 		set_seg_override(c, VCPU_SREG_DS);
 
-	memop.addr.mem.seg = seg_override(ctxt, ops, c);
+	memop.addr.mem.seg = seg_override(ctxt, c);
 
 	if (memop.type == OP_MEM && c->ad_bytes != 8)
 		memop.addr.mem.ea = (u32)memop.addr.mem.ea;
@@ -2792,7 +3556,7 @@ done_prefixes:
 	case SrcNone:
 		break;
 	case SrcReg:
-		decode_register_operand(&c->src, c, 0);
+		decode_register_operand(ctxt, &c->src, c, 0);
 		break;
 	case SrcMem16:
 		memop.bytes = 2;
@@ -2836,7 +3600,7 @@ done_prefixes:
 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->src.addr.mem.ea =
 			register_address(c, c->regs[VCPU_REGS_RSI]);
-		c->src.addr.mem.seg = seg_override(ctxt, ops, c),
+		c->src.addr.mem.seg = seg_override(ctxt, c);
 		c->src.val = 0;
 		break;
 	case SrcImmFAddr:
@@ -2883,7 +3647,7 @@ done_prefixes:
 	/* Decode and fetch the destination operand: register or memory. */
 	switch (c->d & DstMask) {
 	case DstReg:
-		decode_register_operand(&c->dst, c,
+		decode_register_operand(ctxt, &c->dst, c,
 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
 		break;
 	case DstImmUByte:
@@ -2926,7 +3690,7 @@ done_prefixes:
 	}
 
 done:
-	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
@@ -2979,12 +3743,51 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		goto done;
 	}
 
+	if ((c->d & Sse)
+	    && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
+		|| !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+		rc = emulate_ud(ctxt);
+		goto done;
+	}
+
+	if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+		rc = emulate_nm(ctxt);
+		goto done;
+	}
+
+	if (unlikely(ctxt->guest_mode) && c->intercept) {
+		rc = emulator_check_intercept(ctxt, c->intercept,
+					      X86_ICPT_PRE_EXCEPT);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
+
 	/* Privileged instruction can be executed only in CPL=0 */
-	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
+	if ((c->d & Priv) && ops->cpl(ctxt)) {
 		rc = emulate_gp(ctxt, 0);
 		goto done;
 	}
 
+	/* Instruction can only be executed in protected mode */
+	if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+		rc = emulate_ud(ctxt);
+		goto done;
+	}
+
+	/* Do instruction specific permission checks */
+	if (c->check_perm) {
+		rc = c->check_perm(ctxt);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
+
+	if (unlikely(ctxt->guest_mode) && c->intercept) {
+		rc = emulator_check_intercept(ctxt, c->intercept,
+					      X86_ICPT_POST_EXCEPT);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
+
 	if (c->rep_prefix && (c->d & String)) {
 		/* All REP prefixes have the same first termination condition */
 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
@@ -2994,16 +3797,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
-		rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem),
-					c->src.valptr, c->src.bytes);
+		rc = segmented_read(ctxt, c->src.addr.mem,
+				    c->src.valptr, c->src.bytes);
 		if (rc != X86EMUL_CONTINUE)
 			goto done;
 		c->src.orig_val64 = c->src.val64;
 	}
 
 	if (c->src2.type == OP_MEM) {
-		rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem),
-					&c->src2.val, c->src2.bytes);
+		rc = segmented_read(ctxt, c->src2.addr.mem,
+				    &c->src2.val, c->src2.bytes);
 		if (rc != X86EMUL_CONTINUE)
 			goto done;
 	}
@@ -3014,7 +3817,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
 		/* optimisation - avoid slow emulated read if Mov */
-		rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem),
+		rc = segmented_read(ctxt, c->dst.addr.mem,
 				   &c->dst.val, c->dst.bytes);
 		if (rc != X86EMUL_CONTINUE)
 			goto done;
@@ -3023,6 +3826,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 special_insn:
 
+	if (unlikely(ctxt->guest_mode) && c->intercept) {
+		rc = emulator_check_intercept(ctxt, c->intercept,
+					      X86_ICPT_POST_MEMACCESS);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+	}
+
 	if (c->execute) {
 		rc = c->execute(ctxt);
 		if (rc != X86EMUL_CONTINUE)
@@ -3034,75 +3844,33 @@ special_insn:
 		goto twobyte_insn;
 
 	switch (c->b) {
-	case 0x00 ... 0x05:
-	      add:		/* add */
-		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
-		break;
 	case 0x06:		/* push es */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
 		break;
 	case 0x07:		/* pop es */
 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
 		break;
-	case 0x08 ... 0x0d:
-	      or:		/* or */
-		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
-		break;
 	case 0x0e:		/* push cs */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
-		break;
-	case 0x10 ... 0x15:
-	      adc:		/* adc */
-		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
 		break;
 	case 0x16:		/* push ss */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
 		break;
 	case 0x17:		/* pop ss */
 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
 		break;
-	case 0x18 ... 0x1d:
-	      sbb:		/* sbb */
-		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
-		break;
 	case 0x1e:		/* push ds */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
 		break;
 	case 0x1f:		/* pop ds */
 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
 		break;
-	case 0x20 ... 0x25:
-	      and:		/* and */
-		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
-		break;
-	case 0x28 ... 0x2d:
-	      sub:		/* sub */
-		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
-		break;
-	case 0x30 ... 0x35:
-	      xor:		/* xor */
-		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
-		break;
-	case 0x38 ... 0x3d:
-	      cmp:		/* cmp */
-		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
-		break;
 	case 0x40 ... 0x47: /* inc r16/r32 */
 		emulate_1op("inc", c->dst, ctxt->eflags);
 		break;
 	case 0x48 ... 0x4f: /* dec r16/r32 */
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
-	case 0x58 ... 0x5f: /* pop reg */
-	pop_instruction:
-		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
-		break;
-	case 0x60:	/* pusha */
-		rc = emulate_pusha(ctxt, ops);
-		break;
-	case 0x61:	/* popa */
-		rc = emulate_popa(ctxt, ops);
-		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
 			goto cannot_emulate;
@@ -3121,26 +3889,6 @@ special_insn:
 		if (test_cc(c->b, ctxt->eflags))
 			jmp_rel(c, c->src.val);
 		break;
-	case 0x80 ... 0x83:	/* Grp1 */
-		switch (c->modrm_reg) {
-		case 0:
-			goto add;
-		case 1:
-			goto or;
-		case 2:
-			goto adc;
-		case 3:
-			goto sbb;
-		case 4:
-			goto and;
-		case 5:
-			goto sub;
-		case 6:
-			goto xor;
-		case 7:
-			goto cmp;
-		}
-		break;
 	case 0x84 ... 0x85:
 	test:
 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
@@ -3162,7 +3910,7 @@ special_insn:
 			rc = emulate_ud(ctxt);
 			goto done;
 		}
-		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
+		c->dst.val = get_segment_selector(ctxt, c->modrm_reg);
 		break;
 	case 0x8d: /* lea r16/r32, m */
 		c->dst.val = c->src.addr.mem.ea;
@@ -3187,7 +3935,7 @@ special_insn:
 		break;
 	}
 	case 0x8f:		/* pop (sole member of Grp1a) */
-		rc = emulate_grp1a(ctxt, ops);
+		rc = em_grp1a(ctxt);
 		break;
 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
 		if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
@@ -3200,31 +3948,17 @@ special_insn:
 		case 8: c->dst.val = (s32)c->dst.val; break;
 		}
 		break;
-	case 0x9c: /* pushf */
-		c->src.val =  (unsigned long) ctxt->eflags;
-		emulate_push(ctxt, ops);
-		break;
-	case 0x9d: /* popf */
-		c->dst.type = OP_REG;
-		c->dst.addr.reg = &ctxt->eflags;
-		c->dst.bytes = c->op_bytes;
-		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
-		break;
-	case 0xa6 ... 0xa7:	/* cmps */
-		c->dst.type = OP_NONE; /* Disable writeback. */
-		goto cmp;
 	case 0xa8 ... 0xa9:	/* test ax, imm */
 		goto test;
-	case 0xae ... 0xaf:	/* scas */
-		goto cmp;
 	case 0xc0 ... 0xc1:
-		emulate_grp2(ctxt);
+		rc = em_grp2(ctxt);
 		break;
 	case 0xc3: /* ret */
 		c->dst.type = OP_REG;
 		c->dst.addr.reg = &c->eip;
 		c->dst.bytes = c->op_bytes;
-		goto pop_instruction;
+		rc = em_pop(ctxt);
+		break;
 	case 0xc4:		/* les */
 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
 		break;
@@ -3252,11 +3986,11 @@ special_insn:
 		rc = emulate_iret(ctxt, ops);
 		break;
 	case 0xd0 ... 0xd1:	/* Grp2 */
-		emulate_grp2(ctxt);
+		rc = em_grp2(ctxt);
 		break;
 	case 0xd2 ... 0xd3:	/* Grp2 */
 		c->src.val = c->regs[VCPU_REGS_RCX];
-		emulate_grp2(ctxt);
+		rc = em_grp2(ctxt);
 		break;
 	case 0xe0 ... 0xe2:	/* loop/loopz/loopnz */
 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
@@ -3278,23 +4012,14 @@ special_insn:
 		long int rel = c->src.val;
 		c->src.val = (unsigned long) c->eip;
 		jmp_rel(c, rel);
-		emulate_push(ctxt, ops);
+		rc = em_push(ctxt);
 		break;
 	}
 	case 0xe9: /* jmp rel */
 		goto jmp;
-	case 0xea: { /* jmp far */
-		unsigned short sel;
-	jump_far:
-		memcpy(&sel, c->src.valptr + c->op_bytes, 2);
-
-		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
-			goto done;
-
-		c->eip = 0;
-		memcpy(&c->eip, c->src.valptr, c->op_bytes);
+	case 0xea: /* jmp far */
+		rc = em_jmp_far(ctxt);
 		break;
-	}
 	case 0xeb:
 	      jmp:		/* jmp rel short */
 		jmp_rel(c, c->src.val);
@@ -3304,11 +4029,6 @@ special_insn:
 	case 0xed: /* in (e/r)ax,dx */
 		c->src.val = c->regs[VCPU_REGS_RDX];
 	do_io_in:
-		c->dst.bytes = min(c->dst.bytes, 4u);
-		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
-			rc = emulate_gp(ctxt, 0);
-			goto done;
-		}
 		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
 				     &c->dst.val))
 			goto done; /* IO is needed */
@@ -3317,25 +4037,19 @@ special_insn:
 	case 0xef: /* out dx,(e/r)ax */
 		c->dst.val = c->regs[VCPU_REGS_RDX];
 	do_io_out:
-		c->src.bytes = min(c->src.bytes, 4u);
-		if (!emulator_io_permited(ctxt, ops, c->dst.val,
-					  c->src.bytes)) {
-			rc = emulate_gp(ctxt, 0);
-			goto done;
-		}
-		ops->pio_out_emulated(c->src.bytes, c->dst.val,
-				      &c->src.val, 1, ctxt->vcpu);
+		ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val,
+				      &c->src.val, 1);
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
 	case 0xf4:              /* hlt */
-		ctxt->vcpu->arch.halt_request = 1;
+		ctxt->ops->halt(ctxt);
 		break;
 	case 0xf5:	/* cmc */
 		/* complement carry flag from eflags reg */
 		ctxt->eflags ^= EFLG_CF;
 		break;
 	case 0xf6 ... 0xf7:	/* Grp3 */
-		rc = emulate_grp3(ctxt, ops);
+		rc = em_grp3(ctxt);
 		break;
 	case 0xf8: /* clc */
 		ctxt->eflags &= ~EFLG_CF;
@@ -3366,13 +4080,11 @@ special_insn:
 		ctxt->eflags |= EFLG_DF;
 		break;
 	case 0xfe: /* Grp4 */
-	grp45:
-		rc = emulate_grp45(ctxt, ops);
+		rc = em_grp45(ctxt);
 		break;
 	case 0xff: /* Grp5 */
-		if (c->modrm_reg == 5)
-			goto jump_far;
-		goto grp45;
+		rc = em_grp45(ctxt);
+		break;
 	default:
 		goto cannot_emulate;
 	}
@@ -3381,7 +4093,7 @@ special_insn:
 		goto done;
 
 writeback:
-	rc = writeback(ctxt, ops);
+	rc = writeback(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		goto done;
 
@@ -3392,7 +4104,7 @@ writeback:
 	c->dst.type = saved_dst_type;
 
 	if ((c->d & SrcMask) == SrcSI)
-		string_addr_inc(ctxt, seg_override(ctxt, ops, c),
+		string_addr_inc(ctxt, seg_override(ctxt, c),
 				VCPU_REGS_RSI, &c->src);
 
 	if ((c->d & DstMask) == DstDI)
@@ -3427,115 +4139,34 @@ writeback:
 done:
 	if (rc == X86EMUL_PROPAGATE_FAULT)
 		ctxt->have_exception = true;
+	if (rc == X86EMUL_INTERCEPTED)
+		return EMULATION_INTERCEPTED;
+
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 
 twobyte_insn:
 	switch (c->b) {
-	case 0x01: /* lgdt, lidt, lmsw */
-		switch (c->modrm_reg) {
-			u16 size;
-			unsigned long address;
-
-		case 0: /* vmcall */
-			if (c->modrm_mod != 3 || c->modrm_rm != 1)
-				goto cannot_emulate;
-
-			rc = kvm_fix_hypercall(ctxt->vcpu);
-			if (rc != X86EMUL_CONTINUE)
-				goto done;
-
-			/* Let the processor re-execute the fixed hypercall */
-			c->eip = ctxt->eip;
-			/* Disable writeback. */
-			c->dst.type = OP_NONE;
-			break;
-		case 2: /* lgdt */
-			rc = read_descriptor(ctxt, ops, c->src.addr.mem,
-					     &size, &address, c->op_bytes);
-			if (rc != X86EMUL_CONTINUE)
-				goto done;
-			realmode_lgdt(ctxt->vcpu, size, address);
-			/* Disable writeback. */
-			c->dst.type = OP_NONE;
-			break;
-		case 3: /* lidt/vmmcall */
-			if (c->modrm_mod == 3) {
-				switch (c->modrm_rm) {
-				case 1:
-					rc = kvm_fix_hypercall(ctxt->vcpu);
-					break;
-				default:
-					goto cannot_emulate;
-				}
-			} else {
-				rc = read_descriptor(ctxt, ops, c->src.addr.mem,
-						     &size, &address,
-						     c->op_bytes);
-				if (rc != X86EMUL_CONTINUE)
-					goto done;
-				realmode_lidt(ctxt->vcpu, size, address);
-			}
-			/* Disable writeback. */
-			c->dst.type = OP_NONE;
-			break;
-		case 4: /* smsw */
-			c->dst.bytes = 2;
-			c->dst.val = ops->get_cr(0, ctxt->vcpu);
-			break;
-		case 6: /* lmsw */
-			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
-				    (c->src.val & 0x0f), ctxt->vcpu);
-			c->dst.type = OP_NONE;
-			break;
-		case 5: /* not defined */
-			emulate_ud(ctxt);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		case 7: /* invlpg*/
-			emulate_invlpg(ctxt->vcpu,
-				       linear(ctxt, c->src.addr.mem));
-			/* Disable writeback. */
-			c->dst.type = OP_NONE;
-			break;
-		default:
-			goto cannot_emulate;
-		}
-		break;
 	case 0x05: 		/* syscall */
 		rc = emulate_syscall(ctxt, ops);
 		break;
 	case 0x06:
-		emulate_clts(ctxt->vcpu);
+		rc = em_clts(ctxt);
 		break;
 	case 0x09:		/* wbinvd */
-		kvm_emulate_wbinvd(ctxt->vcpu);
+		(ctxt->ops->wbinvd)(ctxt);
 		break;
 	case 0x08:		/* invd */
 	case 0x0d:		/* GrpP (prefetch) */
 	case 0x18:		/* Grp16 (prefetch/nop) */
 		break;
 	case 0x20: /* mov cr, reg */
-		switch (c->modrm_reg) {
-		case 1:
-		case 5 ... 7:
-		case 9 ... 15:
-			emulate_ud(ctxt);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-		c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
+		c->dst.val = ops->get_cr(ctxt, c->modrm_reg);
 		break;
 	case 0x21: /* mov from dr to reg */
-		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
-		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
-			emulate_ud(ctxt);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-		ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
+		ops->get_dr(ctxt, c->modrm_reg, &c->dst.val);
 		break;
 	case 0x22: /* mov reg, cr */
-		if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
+		if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) {
 			emulate_gp(ctxt, 0);
 			rc = X86EMUL_PROPAGATE_FAULT;
 			goto done;
@@ -3543,16 +4174,9 @@ twobyte_insn:
 		c->dst.type = OP_NONE;
 		break;
 	case 0x23: /* mov from reg to dr */
-		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
-		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
-			emulate_ud(ctxt);
-			rc = X86EMUL_PROPAGATE_FAULT;
-			goto done;
-		}
-
-		if (ops->set_dr(c->modrm_reg, c->src.val &
+		if (ops->set_dr(ctxt, c->modrm_reg, c->src.val &
 				((ctxt->mode == X86EMUL_MODE_PROT64) ?
-				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
+				 ~0ULL : ~0U)) < 0) {
 			/* #UD condition is already handled by the code above */
 			emulate_gp(ctxt, 0);
 			rc = X86EMUL_PROPAGATE_FAULT;
@@ -3565,7 +4189,7 @@ twobyte_insn:
 		/* wrmsr */
 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
-		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
+		if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) {
 			emulate_gp(ctxt, 0);
 			rc = X86EMUL_PROPAGATE_FAULT;
 			goto done;
@@ -3574,7 +4198,7 @@ twobyte_insn:
 		break;
 	case 0x32:
 		/* rdmsr */
-		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
+		if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) {
 			emulate_gp(ctxt, 0);
 			rc = X86EMUL_PROPAGATE_FAULT;
 			goto done;
@@ -3603,7 +4227,7 @@ twobyte_insn:
 		c->dst.val = test_cc(c->b, ctxt->eflags);
 		break;
 	case 0xa0:	  /* push fs */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
 		break;
 	case 0xa1:	 /* pop fs */
 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
@@ -3620,7 +4244,7 @@ twobyte_insn:
 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
 		break;
 	case 0xa8:	/* push gs */
-		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
+		rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
 		break;
 	case 0xa9:	/* pop gs */
 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
@@ -3727,7 +4351,7 @@ twobyte_insn:
 							(u64) c->src.val;
 		break;
 	case 0xc7:		/* Grp9 (cmpxchg8b) */
-		rc = emulate_grp9(ctxt, ops);
+		rc = em_grp9(ctxt);
 		break;
 	default:
 		goto cannot_emulate;
@@ -3739,5 +4363,5 @@ twobyte_insn:
 	goto writeback;
 
 cannot_emulate:
-	return -1;
+	return EMULATION_FAILED;
 }
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 46d08ca0b48..51a97426e79 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -33,7 +33,6 @@ struct kvm_kpit_state {
 };
 
 struct kvm_pit {
-	unsigned long base_addresss;
 	struct kvm_io_device dev;
 	struct kvm_io_device speaker_dev;
 	struct kvm *kvm;
@@ -51,7 +50,6 @@ struct kvm_pit {
 #define KVM_MAX_PIT_INTR_INTERVAL   HZ / 100
 #define KVM_PIT_CHANNEL_MASK	    0x3
 
-void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ba910d14941..53e2d084bff 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
 void kvm_destroy_pic(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
-void kvm_pic_clear_isr_ack(struct kvm *kvm);
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 {
@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
-int pit_has_pending_timer(struct kvm_vcpu *vcpu);
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 22fae7593ee..28418054b88 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 
 static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
 				 struct kvm_mmu_page *sp, u64 *spte,
-				 const void *pte, unsigned long mmu_seq)
+				 const void *pte)
 {
 	WARN_ON(1);
 }
@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 }
 
 static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu_page *sp,
-				  u64 *spte,
-				  const void *new, unsigned long mmu_seq)
+				  struct kvm_mmu_page *sp, u64 *spte,
+				  const void *new)
 {
 	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
 		++vcpu->kvm->stat.mmu_pde_zapped;
@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
         }
 
 	++vcpu->kvm->stat.mmu_pte_updated;
-	vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
+	vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	struct kvm_mmu_page *sp;
 	struct hlist_node *node;
 	LIST_HEAD(invalid_list);
-	unsigned long mmu_seq;
 	u64 entry, gentry, *spte;
 	unsigned pte_size, page_offset, misaligned, quadrant, offset;
 	int level, npte, invlpg_counter, r, flooded = 0;
@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		break;
 	}
 
-	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	smp_rmb();
-
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
 		gentry = 0;
@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 			if (gentry &&
 			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
 			      & mask.word))
-				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
-						      mmu_seq);
+				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (!remote_flush && need_remote_flush(entry, *spte))
 				remote_flush = true;
 			++spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index c6397795d86..6c4dc010c4c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
 	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
 }
 
-static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
-			 gfn_t table_gfn, unsigned index,
-			 pt_element_t orig_pte, pt_element_t new_pte)
+static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+			       pt_element_t __user *ptep_user, unsigned index,
+			       pt_element_t orig_pte, pt_element_t new_pte)
 {
+	int npages;
 	pt_element_t ret;
 	pt_element_t *table;
 	struct page *page;
 
-	page = gfn_to_page(kvm, table_gfn);
+	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
+	/* Check if the user is doing something meaningless. */
+	if (unlikely(npages != 1))
+		return -EFAULT;
 
 	table = kmap_atomic(page, KM_USER0);
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 				    gva_t addr, u32 access)
 {
 	pt_element_t pte;
+	pt_element_t __user *ptep_user;
 	gfn_t table_gfn;
 	unsigned index, pt_access, uninitialized_var(pte_access);
 	gpa_t pte_gpa;
@@ -152,6 +157,9 @@ walk:
 	pt_access = ACC_ALL;
 
 	for (;;) {
+		gfn_t real_gfn;
+		unsigned long host_addr;
+
 		index = PT_INDEX(addr, walker->level);
 
 		table_gfn = gpte_to_gfn(pte);
@@ -160,43 +168,64 @@ walk:
 		walker->table_gfn[walker->level - 1] = table_gfn;
 		walker->pte_gpa[walker->level - 1] = pte_gpa;
 
-		if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
-					    offset, sizeof(pte),
-					    PFERR_USER_MASK|PFERR_WRITE_MASK)) {
+		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
+					      PFERR_USER_MASK|PFERR_WRITE_MASK);
+		if (unlikely(real_gfn == UNMAPPED_GVA)) {
+			present = false;
+			break;
+		}
+		real_gfn = gpa_to_gfn(real_gfn);
+
+		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+		if (unlikely(kvm_is_error_hva(host_addr))) {
+			present = false;
+			break;
+		}
+
+		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
 			present = false;
 			break;
 		}
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
-		if (!is_present_gpte(pte)) {
+		if (unlikely(!is_present_gpte(pte))) {
 			present = false;
 			break;
 		}
 
-		if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
+		if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
+					      walker->level))) {
 			rsvd_fault = true;
 			break;
 		}
 
-		if (write_fault && !is_writable_pte(pte))
-			if (user_fault || is_write_protection(vcpu))
-				eperm = true;
+		if (unlikely(write_fault && !is_writable_pte(pte)
+			     && (user_fault || is_write_protection(vcpu))))
+			eperm = true;
 
-		if (user_fault && !(pte & PT_USER_MASK))
+		if (unlikely(user_fault && !(pte & PT_USER_MASK)))
 			eperm = true;
 
 #if PTTYPE == 64
-		if (fetch_fault && (pte & PT64_NX_MASK))
+		if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
 			eperm = true;
 #endif
 
-		if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
+		if (!eperm && !rsvd_fault
+		    && unlikely(!(pte & PT_ACCESSED_MASK))) {
+			int ret;
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
-			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
-			    index, pte, pte|PT_ACCESSED_MASK))
+			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+						  pte, pte|PT_ACCESSED_MASK);
+			if (unlikely(ret < 0)) {
+				present = false;
+				break;
+			} else if (ret)
 				goto walk;
+
 			mark_page_dirty(vcpu->kvm, table_gfn);
 			pte |= PT_ACCESSED_MASK;
 		}
@@ -241,17 +270,21 @@ walk:
 		--walker->level;
 	}
 
-	if (!present || eperm || rsvd_fault)
+	if (unlikely(!present || eperm || rsvd_fault))
 		goto error;
 
-	if (write_fault && !is_dirty_gpte(pte)) {
-		bool ret;
+	if (write_fault && unlikely(!is_dirty_gpte(pte))) {
+		int ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
-			    pte|PT_DIRTY_MASK);
-		if (ret)
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
+					  pte, pte|PT_DIRTY_MASK);
+		if (unlikely(ret < 0)) {
+			present = false;
+			goto error;
+		} else if (ret)
 			goto walk;
+
 		mark_page_dirty(vcpu->kvm, table_gfn);
 		pte |= PT_DIRTY_MASK;
 		walker->ptes[walker->level - 1] = pte;
@@ -325,7 +358,7 @@ no_present:
 }
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			      u64 *spte, const void *pte, unsigned long mmu_seq)
+			      u64 *spte, const void *pte)
 {
 	pt_element_t gpte;
 	unsigned pte_access;
@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		kvm_release_pfn_clean(pfn);
 		return;
 	}
-	if (mmu_notifier_retry(vcpu, mmu_seq))
-		return;
 
 	/*
 	 * we call mmu_set_spte() with host_writable = true because that
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6bb15d583e4..506e4fe23ad 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -63,6 +63,10 @@ MODULE_LICENSE("GPL");
 
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
+#define TSC_RATIO_RSVD          0xffffff0000000000ULL
+#define TSC_RATIO_MIN		0x0000000000000001ULL
+#define TSC_RATIO_MAX		0x000000ffffffffffULL
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -93,14 +97,6 @@ struct nested_state {
 	/* A VMEXIT is required but not yet emulated */
 	bool exit_required;
 
-	/*
-	 * If we vmexit during an instruction emulation we need this to restore
-	 * the l1 guest rip after the emulation
-	 */
-	unsigned long vmexit_rip;
-	unsigned long vmexit_rsp;
-	unsigned long vmexit_rax;
-
 	/* cache for intercepts of the guest */
 	u32 intercept_cr;
 	u32 intercept_dr;
@@ -144,8 +140,13 @@ struct vcpu_svm {
 	unsigned int3_injected;
 	unsigned long int3_rip;
 	u32 apf_reason;
+
+	u64  tsc_ratio;
 };
 
+static DEFINE_PER_CPU(u64, current_tsc_ratio);
+#define TSC_RATIO_DEFAULT	0x0100000000ULL
+
 #define MSR_INVALID			0xffffffffU
 
 static struct svm_direct_access_msrs {
@@ -190,6 +191,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 				      bool has_error_code, u32 error_code);
+static u64 __scale_tsc(u64 ratio, u64 tsc);
 
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -376,7 +378,6 @@ struct svm_cpu_data {
 };
 
 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
-static uint32_t svm_features;
 
 struct svm_init_data {
 	int cpu;
@@ -569,6 +570,10 @@ static int has_svm(void)
 
 static void svm_hardware_disable(void *garbage)
 {
+	/* Make sure we clean up behind us */
+	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
+		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
+
 	cpu_svm_disable();
 }
 
@@ -610,6 +615,11 @@ static int svm_hardware_enable(void *garbage)
 
 	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
 
+	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
+		__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
+	}
+
 	svm_init_erratum_383();
 
 	return 0;
@@ -791,6 +801,23 @@ static __init int svm_hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
 		kvm_enable_efer_bits(EFER_FFXSR);
 
+	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+		u64 max;
+
+		kvm_has_tsc_control = true;
+
+		/*
+		 * Make sure the user can only configure tsc_khz values that
+		 * fit into a signed integer.
+		 * A min value is not calculated needed because it will always
+		 * be 1 on all machines and a value of 0 is used to disable
+		 * tsc-scaling for the vcpu.
+		 */
+		max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
+
+		kvm_max_guest_tsc_khz = max;
+	}
+
 	if (nested) {
 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -802,8 +829,6 @@ static __init int svm_hardware_setup(void)
 			goto err;
 	}
 
-	svm_features = cpuid_edx(SVM_CPUID_FUNC);
-
 	if (!boot_cpu_has(X86_FEATURE_NPT))
 		npt_enabled = false;
 
@@ -854,6 +879,64 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
+static u64 __scale_tsc(u64 ratio, u64 tsc)
+{
+	u64 mult, frac, _tsc;
+
+	mult  = ratio >> 32;
+	frac  = ratio & ((1ULL << 32) - 1);
+
+	_tsc  = tsc;
+	_tsc *= mult;
+	_tsc += (tsc >> 32) * frac;
+	_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
+
+	return _tsc;
+}
+
+static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 _tsc = tsc;
+
+	if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+		_tsc = __scale_tsc(svm->tsc_ratio, tsc);
+
+	return _tsc;
+}
+
+static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 ratio;
+	u64 khz;
+
+	/* TSC scaling supported? */
+	if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
+		return;
+
+	/* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
+	if (user_tsc_khz == 0) {
+		vcpu->arch.virtual_tsc_khz = 0;
+		svm->tsc_ratio = TSC_RATIO_DEFAULT;
+		return;
+	}
+
+	khz = user_tsc_khz;
+
+	/* TSC scaling required  - calculate ratio */
+	ratio = khz << 32;
+	do_div(ratio, tsc_khz);
+
+	if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
+		WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
+				user_tsc_khz);
+		return;
+	}
+	vcpu->arch.virtual_tsc_khz = user_tsc_khz;
+	svm->tsc_ratio             = ratio;
+}
+
 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -880,6 +963,15 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
+static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+	u64 tsc;
+
+	tsc = svm_scale_tsc(vcpu, native_read_tsc());
+
+	return target_tsc - tsc;
+}
+
 static void init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -975,7 +1067,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	svm_set_efer(&svm->vcpu, 0);
 	save->dr6 = 0xffff0ff0;
 	save->dr7 = 0x400;
-	save->rflags = 2;
+	kvm_set_rflags(&svm->vcpu, 2);
 	save->rip = 0x0000fff0;
 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
 
@@ -1048,6 +1140,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto out;
 	}
 
+	svm->tsc_ratio = TSC_RATIO_DEFAULT;
+
 	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
 	if (err)
 		goto free_svm;
@@ -1141,6 +1235,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
 		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+
+	if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
+	    svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
+		__get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
+		wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
+	}
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1365,31 +1465,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (is_guest_mode(vcpu)) {
-		/*
-		 * We are here because we run in nested mode, the host kvm
-		 * intercepts cr0 writes but the l1 hypervisor does not.
-		 * But the L1 hypervisor may intercept selective cr0 writes.
-		 * This needs to be checked here.
-		 */
-		unsigned long old, new;
-
-		/* Remove bits that would trigger a real cr0 write intercept */
-		old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
-		new = cr0 & SVM_CR0_SELECTIVE_MASK;
-
-		if (old == new) {
-			/* cr0 write with ts and mp unchanged */
-			svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
-			if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
-				svm->nested.vmexit_rip = kvm_rip_read(vcpu);
-				svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
-				svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
-				return;
-			}
-		}
-	}
-
 #ifdef CONFIG_X86_64
 	if (vcpu->arch.efer & EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@@ -2127,7 +2202,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
 	nested_vmcb->save.cr2    = vmcb->save.cr2;
 	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
-	nested_vmcb->save.rflags = vmcb->save.rflags;
+	nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
 	nested_vmcb->save.rip    = vmcb->save.rip;
 	nested_vmcb->save.rsp    = vmcb->save.rsp;
 	nested_vmcb->save.rax    = vmcb->save.rax;
@@ -2184,7 +2259,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 	svm->vmcb->save.ds = hsave->save.ds;
 	svm->vmcb->save.gdtr = hsave->save.gdtr;
 	svm->vmcb->save.idtr = hsave->save.idtr;
-	svm->vmcb->save.rflags = hsave->save.rflags;
+	kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
 	svm_set_efer(&svm->vcpu, hsave->save.efer);
 	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
 	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
@@ -2312,7 +2387,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 	hsave->save.efer   = svm->vcpu.arch.efer;
 	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
 	hsave->save.cr4    = svm->vcpu.arch.cr4;
-	hsave->save.rflags = vmcb->save.rflags;
+	hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
 	hsave->save.rip    = kvm_rip_read(&svm->vcpu);
 	hsave->save.rsp    = vmcb->save.rsp;
 	hsave->save.rax    = vmcb->save.rax;
@@ -2323,7 +2398,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
 	copy_vmcb_control_area(hsave, vmcb);
 
-	if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+	if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
 		svm->vcpu.arch.hflags |= HF_HIF_MASK;
 	else
 		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
@@ -2341,7 +2416,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 	svm->vmcb->save.ds = nested_vmcb->save.ds;
 	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
 	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
-	svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+	kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
 	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
 	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
 	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
@@ -2443,13 +2518,13 @@ static int vmload_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
-
 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
 	if (!nested_vmcb)
 		return 1;
 
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
 	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
 	nested_svm_unmap(page);
 
@@ -2464,13 +2539,13 @@ static int vmsave_interception(struct vcpu_svm *svm)
 	if (nested_svm_check_permissions(svm))
 		return 1;
 
-	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-	skip_emulated_instruction(&svm->vcpu);
-
 	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
 	if (!nested_vmcb)
 		return 1;
 
+	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+	skip_emulated_instruction(&svm->vcpu);
+
 	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
 	nested_svm_unmap(page);
 
@@ -2676,6 +2751,29 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+{
+	unsigned long cr0 = svm->vcpu.arch.cr0;
+	bool ret = false;
+	u64 intercept;
+
+	intercept = svm->nested.intercept;
+
+	if (!is_guest_mode(&svm->vcpu) ||
+	    (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
+		return false;
+
+	cr0 &= ~SVM_CR0_SELECTIVE_MASK;
+	val &= ~SVM_CR0_SELECTIVE_MASK;
+
+	if (cr0 ^ val) {
+		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
+	}
+
+	return ret;
+}
+
 #define CR_VALID (1ULL << 63)
 
 static int cr_interception(struct vcpu_svm *svm)
@@ -2699,7 +2797,11 @@ static int cr_interception(struct vcpu_svm *svm)
 		val = kvm_register_read(&svm->vcpu, reg);
 		switch (cr) {
 		case 0:
-			err = kvm_set_cr0(&svm->vcpu, val);
+			if (!check_selective_cr0_intercepted(svm, val))
+				err = kvm_set_cr0(&svm->vcpu, val);
+			else
+				return 1;
+
 			break;
 		case 3:
 			err = kvm_set_cr3(&svm->vcpu, val);
@@ -2744,23 +2846,6 @@ static int cr_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int cr0_write_interception(struct vcpu_svm *svm)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-	int r;
-
-	r = cr_interception(svm);
-
-	if (svm->nested.vmexit_rip) {
-		kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
-		kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
-		kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
-		svm->nested.vmexit_rip = 0;
-	}
-
-	return r;
-}
-
 static int dr_interception(struct vcpu_svm *svm)
 {
 	int reg, dr;
@@ -2813,7 +2898,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 	case MSR_IA32_TSC: {
 		struct vmcb *vmcb = get_host_vmcb(svm);
 
-		*data = vmcb->control.tsc_offset + native_read_tsc();
+		*data = vmcb->control.tsc_offset +
+			svm_scale_tsc(vcpu, native_read_tsc());
+
 		break;
 	}
 	case MSR_STAR:
@@ -3048,7 +3135,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR4]			= cr_interception,
 	[SVM_EXIT_READ_CR8]			= cr_interception,
 	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
-	[SVM_EXIT_WRITE_CR0]			= cr0_write_interception,
+	[SVM_EXIT_WRITE_CR0]			= cr_interception,
 	[SVM_EXIT_WRITE_CR3]			= cr_interception,
 	[SVM_EXIT_WRITE_CR4]			= cr_interception,
 	[SVM_EXIT_WRITE_CR8]			= cr8_write_interception,
@@ -3104,97 +3191,109 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_NPF]				= pf_interception,
 };
 
-void dump_vmcb(struct kvm_vcpu *vcpu)
+static void dump_vmcb(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
 
 	pr_err("VMCB Control Area:\n");
-	pr_err("cr_read:            %04x\n", control->intercept_cr & 0xffff);
-	pr_err("cr_write:           %04x\n", control->intercept_cr >> 16);
-	pr_err("dr_read:            %04x\n", control->intercept_dr & 0xffff);
-	pr_err("dr_write:           %04x\n", control->intercept_dr >> 16);
-	pr_err("exceptions:         %08x\n", control->intercept_exceptions);
-	pr_err("intercepts:         %016llx\n", control->intercept);
-	pr_err("pause filter count: %d\n", control->pause_filter_count);
-	pr_err("iopm_base_pa:       %016llx\n", control->iopm_base_pa);
-	pr_err("msrpm_base_pa:      %016llx\n", control->msrpm_base_pa);
-	pr_err("tsc_offset:         %016llx\n", control->tsc_offset);
-	pr_err("asid:               %d\n", control->asid);
-	pr_err("tlb_ctl:            %d\n", control->tlb_ctl);
-	pr_err("int_ctl:            %08x\n", control->int_ctl);
-	pr_err("int_vector:         %08x\n", control->int_vector);
-	pr_err("int_state:          %08x\n", control->int_state);
-	pr_err("exit_code:          %08x\n", control->exit_code);
-	pr_err("exit_info1:         %016llx\n", control->exit_info_1);
-	pr_err("exit_info2:         %016llx\n", control->exit_info_2);
-	pr_err("exit_int_info:      %08x\n", control->exit_int_info);
-	pr_err("exit_int_info_err:  %08x\n", control->exit_int_info_err);
-	pr_err("nested_ctl:         %lld\n", control->nested_ctl);
-	pr_err("nested_cr3:         %016llx\n", control->nested_cr3);
-	pr_err("event_inj:          %08x\n", control->event_inj);
-	pr_err("event_inj_err:      %08x\n", control->event_inj_err);
-	pr_err("lbr_ctl:            %lld\n", control->lbr_ctl);
-	pr_err("next_rip:           %016llx\n", control->next_rip);
+	pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
+	pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
+	pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
+	pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
+	pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
+	pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
+	pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
+	pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
+	pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
+	pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
+	pr_err("%-20s%d\n", "asid:", control->asid);
+	pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
+	pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
+	pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
+	pr_err("%-20s%08x\n", "int_state:", control->int_state);
+	pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
+	pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
+	pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
+	pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
+	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
+	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
+	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
+	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
+	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
+	pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
+	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
 	pr_err("VMCB State Save Area:\n");
-	pr_err("es:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->es.selector, save->es.attrib,
-		save->es.limit, save->es.base);
-	pr_err("cs:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->cs.selector, save->cs.attrib,
-		save->cs.limit, save->cs.base);
-	pr_err("ss:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->ss.selector, save->ss.attrib,
-		save->ss.limit, save->ss.base);
-	pr_err("ds:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->ds.selector, save->ds.attrib,
-		save->ds.limit, save->ds.base);
-	pr_err("fs:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->fs.selector, save->fs.attrib,
-		save->fs.limit, save->fs.base);
-	pr_err("gs:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->gs.selector, save->gs.attrib,
-		save->gs.limit, save->gs.base);
-	pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n",
-		save->gdtr.selector, save->gdtr.attrib,
-		save->gdtr.limit, save->gdtr.base);
-	pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n",
-		save->ldtr.selector, save->ldtr.attrib,
-		save->ldtr.limit, save->ldtr.base);
-	pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n",
-		save->idtr.selector, save->idtr.attrib,
-		save->idtr.limit, save->idtr.base);
-	pr_err("tr:   s: %04x a: %04x l: %08x b: %016llx\n",
-		save->tr.selector, save->tr.attrib,
-		save->tr.limit, save->tr.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "es:",
+	       save->es.selector, save->es.attrib,
+	       save->es.limit, save->es.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "cs:",
+	       save->cs.selector, save->cs.attrib,
+	       save->cs.limit, save->cs.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "ss:",
+	       save->ss.selector, save->ss.attrib,
+	       save->ss.limit, save->ss.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "ds:",
+	       save->ds.selector, save->ds.attrib,
+	       save->ds.limit, save->ds.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "fs:",
+	       save->fs.selector, save->fs.attrib,
+	       save->fs.limit, save->fs.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "gs:",
+	       save->gs.selector, save->gs.attrib,
+	       save->gs.limit, save->gs.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "gdtr:",
+	       save->gdtr.selector, save->gdtr.attrib,
+	       save->gdtr.limit, save->gdtr.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "ldtr:",
+	       save->ldtr.selector, save->ldtr.attrib,
+	       save->ldtr.limit, save->ldtr.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "idtr:",
+	       save->idtr.selector, save->idtr.attrib,
+	       save->idtr.limit, save->idtr.base);
+	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
+	       "tr:",
+	       save->tr.selector, save->tr.attrib,
+	       save->tr.limit, save->tr.base);
 	pr_err("cpl:            %d                efer:         %016llx\n",
 		save->cpl, save->efer);
-	pr_err("cr0:            %016llx cr2:          %016llx\n",
-		save->cr0, save->cr2);
-	pr_err("cr3:            %016llx cr4:          %016llx\n",
-		save->cr3, save->cr4);
-	pr_err("dr6:            %016llx dr7:          %016llx\n",
-		save->dr6, save->dr7);
-	pr_err("rip:            %016llx rflags:       %016llx\n",
-		save->rip, save->rflags);
-	pr_err("rsp:            %016llx rax:          %016llx\n",
-		save->rsp, save->rax);
-	pr_err("star:           %016llx lstar:        %016llx\n",
-		save->star, save->lstar);
-	pr_err("cstar:          %016llx sfmask:       %016llx\n",
-		save->cstar, save->sfmask);
-	pr_err("kernel_gs_base: %016llx sysenter_cs:  %016llx\n",
-		save->kernel_gs_base, save->sysenter_cs);
-	pr_err("sysenter_esp:   %016llx sysenter_eip: %016llx\n",
-		save->sysenter_esp, save->sysenter_eip);
-	pr_err("gpat:           %016llx dbgctl:       %016llx\n",
-		save->g_pat, save->dbgctl);
-	pr_err("br_from:        %016llx br_to:        %016llx\n",
-		save->br_from, save->br_to);
-	pr_err("excp_from:      %016llx excp_to:      %016llx\n",
-		save->last_excp_from, save->last_excp_to);
-
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "cr0:", save->cr0, "cr2:", save->cr2);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "cr3:", save->cr3, "cr4:", save->cr4);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "dr6:", save->dr6, "dr7:", save->dr7);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "rip:", save->rip, "rflags:", save->rflags);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "rsp:", save->rsp, "rax:", save->rax);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "star:", save->star, "lstar:", save->lstar);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "cstar:", save->cstar, "sfmask:", save->sfmask);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "kernel_gs_base:", save->kernel_gs_base,
+	       "sysenter_cs:", save->sysenter_cs);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "sysenter_esp:", save->sysenter_esp,
+	       "sysenter_eip:", save->sysenter_eip);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "br_from:", save->br_from, "br_to:", save->br_to);
+	pr_err("%-15s %016llx %-13s %016llx\n",
+	       "excp_from:", save->last_excp_from,
+	       "excp_to:", save->last_excp_to);
 }
 
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
@@ -3384,7 +3483,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
 		return 0;
 
-	ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
+	ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
 
 	if (is_guest_mode(vcpu))
 		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
@@ -3871,6 +3970,186 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
 	update_cr0_intercept(svm);
 }
 
+#define PRE_EX(exit)  { .exit_code = (exit), \
+			.stage = X86_ICPT_PRE_EXCEPT, }
+#define POST_EX(exit) { .exit_code = (exit), \
+			.stage = X86_ICPT_POST_EXCEPT, }
+#define POST_MEM(exit) { .exit_code = (exit), \
+			.stage = X86_ICPT_POST_MEMACCESS, }
+
+static struct __x86_intercept {
+	u32 exit_code;
+	enum x86_intercept_stage stage;
+} x86_intercept_map[] = {
+	[x86_intercept_cr_read]		= POST_EX(SVM_EXIT_READ_CR0),
+	[x86_intercept_cr_write]	= POST_EX(SVM_EXIT_WRITE_CR0),
+	[x86_intercept_clts]		= POST_EX(SVM_EXIT_WRITE_CR0),
+	[x86_intercept_lmsw]		= POST_EX(SVM_EXIT_WRITE_CR0),
+	[x86_intercept_smsw]		= POST_EX(SVM_EXIT_READ_CR0),
+	[x86_intercept_dr_read]		= POST_EX(SVM_EXIT_READ_DR0),
+	[x86_intercept_dr_write]	= POST_EX(SVM_EXIT_WRITE_DR0),
+	[x86_intercept_sldt]		= POST_EX(SVM_EXIT_LDTR_READ),
+	[x86_intercept_str]		= POST_EX(SVM_EXIT_TR_READ),
+	[x86_intercept_lldt]		= POST_EX(SVM_EXIT_LDTR_WRITE),
+	[x86_intercept_ltr]		= POST_EX(SVM_EXIT_TR_WRITE),
+	[x86_intercept_sgdt]		= POST_EX(SVM_EXIT_GDTR_READ),
+	[x86_intercept_sidt]		= POST_EX(SVM_EXIT_IDTR_READ),
+	[x86_intercept_lgdt]		= POST_EX(SVM_EXIT_GDTR_WRITE),
+	[x86_intercept_lidt]		= POST_EX(SVM_EXIT_IDTR_WRITE),
+	[x86_intercept_vmrun]		= POST_EX(SVM_EXIT_VMRUN),
+	[x86_intercept_vmmcall]		= POST_EX(SVM_EXIT_VMMCALL),
+	[x86_intercept_vmload]		= POST_EX(SVM_EXIT_VMLOAD),
+	[x86_intercept_vmsave]		= POST_EX(SVM_EXIT_VMSAVE),
+	[x86_intercept_stgi]		= POST_EX(SVM_EXIT_STGI),
+	[x86_intercept_clgi]		= POST_EX(SVM_EXIT_CLGI),
+	[x86_intercept_skinit]		= POST_EX(SVM_EXIT_SKINIT),
+	[x86_intercept_invlpga]		= POST_EX(SVM_EXIT_INVLPGA),
+	[x86_intercept_rdtscp]		= POST_EX(SVM_EXIT_RDTSCP),
+	[x86_intercept_monitor]		= POST_MEM(SVM_EXIT_MONITOR),
+	[x86_intercept_mwait]		= POST_EX(SVM_EXIT_MWAIT),
+	[x86_intercept_invlpg]		= POST_EX(SVM_EXIT_INVLPG),
+	[x86_intercept_invd]		= POST_EX(SVM_EXIT_INVD),
+	[x86_intercept_wbinvd]		= POST_EX(SVM_EXIT_WBINVD),
+	[x86_intercept_wrmsr]		= POST_EX(SVM_EXIT_MSR),
+	[x86_intercept_rdtsc]		= POST_EX(SVM_EXIT_RDTSC),
+	[x86_intercept_rdmsr]		= POST_EX(SVM_EXIT_MSR),
+	[x86_intercept_rdpmc]		= POST_EX(SVM_EXIT_RDPMC),
+	[x86_intercept_cpuid]		= PRE_EX(SVM_EXIT_CPUID),
+	[x86_intercept_rsm]		= PRE_EX(SVM_EXIT_RSM),
+	[x86_intercept_pause]		= PRE_EX(SVM_EXIT_PAUSE),
+	[x86_intercept_pushf]		= PRE_EX(SVM_EXIT_PUSHF),
+	[x86_intercept_popf]		= PRE_EX(SVM_EXIT_POPF),
+	[x86_intercept_intn]		= PRE_EX(SVM_EXIT_SWINT),
+	[x86_intercept_iret]		= PRE_EX(SVM_EXIT_IRET),
+	[x86_intercept_icebp]		= PRE_EX(SVM_EXIT_ICEBP),
+	[x86_intercept_hlt]		= POST_EX(SVM_EXIT_HLT),
+	[x86_intercept_in]		= POST_EX(SVM_EXIT_IOIO),
+	[x86_intercept_ins]		= POST_EX(SVM_EXIT_IOIO),
+	[x86_intercept_out]		= POST_EX(SVM_EXIT_IOIO),
+	[x86_intercept_outs]		= POST_EX(SVM_EXIT_IOIO),
+};
+
+#undef PRE_EX
+#undef POST_EX
+#undef POST_MEM
+
+static int svm_check_intercept(struct kvm_vcpu *vcpu,
+			       struct x86_instruction_info *info,
+			       enum x86_intercept_stage stage)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	int vmexit, ret = X86EMUL_CONTINUE;
+	struct __x86_intercept icpt_info;
+	struct vmcb *vmcb = svm->vmcb;
+
+	if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
+		goto out;
+
+	icpt_info = x86_intercept_map[info->intercept];
+
+	if (stage != icpt_info.stage)
+		goto out;
+
+	switch (icpt_info.exit_code) {
+	case SVM_EXIT_READ_CR0:
+		if (info->intercept == x86_intercept_cr_read)
+			icpt_info.exit_code += info->modrm_reg;
+		break;
+	case SVM_EXIT_WRITE_CR0: {
+		unsigned long cr0, val;
+		u64 intercept;
+
+		if (info->intercept == x86_intercept_cr_write)
+			icpt_info.exit_code += info->modrm_reg;
+
+		if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
+			break;
+
+		intercept = svm->nested.intercept;
+
+		if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
+			break;
+
+		cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
+		val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
+
+		if (info->intercept == x86_intercept_lmsw) {
+			cr0 &= 0xfUL;
+			val &= 0xfUL;
+			/* lmsw can't clear PE - catch this here */
+			if (cr0 & X86_CR0_PE)
+				val |= X86_CR0_PE;
+		}
+
+		if (cr0 ^ val)
+			icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
+
+		break;
+	}
+	case SVM_EXIT_READ_DR0:
+	case SVM_EXIT_WRITE_DR0:
+		icpt_info.exit_code += info->modrm_reg;
+		break;
+	case SVM_EXIT_MSR:
+		if (info->intercept == x86_intercept_wrmsr)
+			vmcb->control.exit_info_1 = 1;
+		else
+			vmcb->control.exit_info_1 = 0;
+		break;
+	case SVM_EXIT_PAUSE:
+		/*
+		 * We get this for NOP only, but pause
+		 * is rep not, check this here
+		 */
+		if (info->rep_prefix != REPE_PREFIX)
+			goto out;
+	case SVM_EXIT_IOIO: {
+		u64 exit_info;
+		u32 bytes;
+
+		exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
+
+		if (info->intercept == x86_intercept_in ||
+		    info->intercept == x86_intercept_ins) {
+			exit_info |= SVM_IOIO_TYPE_MASK;
+			bytes = info->src_bytes;
+		} else {
+			bytes = info->dst_bytes;
+		}
+
+		if (info->intercept == x86_intercept_outs ||
+		    info->intercept == x86_intercept_ins)
+			exit_info |= SVM_IOIO_STR_MASK;
+
+		if (info->rep_prefix)
+			exit_info |= SVM_IOIO_REP_MASK;
+
+		bytes = min(bytes, 4u);
+
+		exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
+
+		exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
+
+		vmcb->control.exit_info_1 = exit_info;
+		vmcb->control.exit_info_2 = info->next_rip;
+
+		break;
+	}
+	default:
+		break;
+	}
+
+	vmcb->control.next_rip  = info->next_rip;
+	vmcb->control.exit_code = icpt_info.exit_code;
+	vmexit = nested_svm_exit_handled(svm);
+
+	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
+					   : X86EMUL_CONTINUE;
+
+out:
+	return ret;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -3952,10 +4231,14 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
+	.set_tsc_khz = svm_set_tsc_khz,
 	.write_tsc_offset = svm_write_tsc_offset,
 	.adjust_tsc_offset = svm_adjust_tsc_offset,
+	.compute_tsc_offset = svm_compute_tsc_offset,
 
 	.set_tdp_cr3 = set_tdp_cr3,
+
+	.check_intercept = svm_check_intercept,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5b4cdcbd154..4c3fa0f6746 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -128,8 +128,11 @@ struct vcpu_vmx {
 	unsigned long         host_rsp;
 	int                   launched;
 	u8                    fail;
+	u8                    cpl;
+	bool                  nmi_known_unmasked;
 	u32                   exit_intr_info;
 	u32                   idt_vectoring_info;
+	ulong                 rflags;
 	struct shared_msr_entry *guest_msrs;
 	int                   nmsrs;
 	int                   save_nmsrs;
@@ -159,6 +162,10 @@ struct vcpu_vmx {
 			u32 ar;
 		} tr, es, ds, fs, gs;
 	} rmode;
+	struct {
+		u32 bitmask; /* 4 bits per segment (1 bit per field) */
+		struct kvm_save_segment seg[8];
+	} segment_cache;
 	int vpid;
 	bool emulation_required;
 
@@ -171,6 +178,15 @@ struct vcpu_vmx {
 	bool rdtscp_enabled;
 };
 
+enum segment_cache_field {
+	SEG_FIELD_SEL = 0,
+	SEG_FIELD_BASE = 1,
+	SEG_FIELD_LIMIT = 2,
+	SEG_FIELD_AR = 3,
+
+	SEG_FIELD_NR = 4
+};
+
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
 	return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -643,6 +659,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
 	vmcs_writel(field, vmcs_readl(field) | mask);
 }
 
+static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
+{
+	vmx->segment_cache.bitmask = 0;
+}
+
+static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
+				       unsigned field)
+{
+	bool ret;
+	u32 mask = 1 << (seg * SEG_FIELD_NR + field);
+
+	if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
+		vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
+		vmx->segment_cache.bitmask = 0;
+	}
+	ret = vmx->segment_cache.bitmask & mask;
+	vmx->segment_cache.bitmask |= mask;
+	return ret;
+}
+
+static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
+{
+	u16 *p = &vmx->segment_cache.seg[seg].selector;
+
+	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
+		*p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
+	return *p;
+}
+
+static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
+{
+	ulong *p = &vmx->segment_cache.seg[seg].base;
+
+	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
+		*p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
+	return *p;
+}
+
+static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
+{
+	u32 *p = &vmx->segment_cache.seg[seg].limit;
+
+	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
+		*p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
+	return *p;
+}
+
+static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
+{
+	u32 *p = &vmx->segment_cache.seg[seg].ar;
+
+	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
+		*p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
+	return *p;
+}
+
 static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 	u32 eb;
@@ -970,17 +1042,24 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
 	unsigned long rflags, save_rflags;
 
-	rflags = vmcs_readl(GUEST_RFLAGS);
-	if (to_vmx(vcpu)->rmode.vm86_active) {
-		rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
-		save_rflags = to_vmx(vcpu)->rmode.save_rflags;
-		rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
+		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+		rflags = vmcs_readl(GUEST_RFLAGS);
+		if (to_vmx(vcpu)->rmode.vm86_active) {
+			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+			save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+		}
+		to_vmx(vcpu)->rflags = rflags;
 	}
-	return rflags;
+	return to_vmx(vcpu)->rflags;
 }
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
+	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
+	__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+	to_vmx(vcpu)->rflags = rflags;
 	if (to_vmx(vcpu)->rmode.vm86_active) {
 		to_vmx(vcpu)->rmode.save_rflags = rflags;
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
@@ -1053,7 +1132,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	}
 
 	if (vmx->rmode.vm86_active) {
-		if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
+		int inc_eip = 0;
+		if (kvm_exception_is_soft(nr))
+			inc_eip = vcpu->arch.event_exit_inst_len;
+		if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
 			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		return;
 	}
@@ -1151,6 +1233,16 @@ static u64 guest_read_tsc(void)
 }
 
 /*
+ * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
+ * ioctl. In this case the call-back should update internal vmx state to make
+ * the changes effective.
+ */
+static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+{
+	/* Nothing to do here */
+}
+
+/*
  * writes 'offset' into guest's timestamp counter offset register
  */
 static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1164,6 +1256,11 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
 	vmcs_write64(TSC_OFFSET, offset + adjustment);
 }
 
+static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+{
+	return target_tsc - native_read_tsc();
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -1243,9 +1340,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		break;
 #ifdef CONFIG_X86_64
 	case MSR_FS_BASE:
+		vmx_segment_cache_clear(vmx);
 		vmcs_writel(GUEST_FS_BASE, data);
 		break;
 	case MSR_GS_BASE:
+		vmx_segment_cache_clear(vmx);
 		vmcs_writel(GUEST_GS_BASE, data);
 		break;
 	case MSR_KERNEL_GS_BASE:
@@ -1689,6 +1788,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	vmx->emulation_required = 1;
 	vmx->rmode.vm86_active = 0;
 
+	vmx_segment_cache_clear(vmx);
+
 	vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
 	vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
 	vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
@@ -1712,6 +1813,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
 	fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
 
+	vmx_segment_cache_clear(vmx);
+
 	vmcs_write16(GUEST_SS_SELECTOR, 0);
 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
 
@@ -1775,6 +1878,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	}
 
+	vmx_segment_cache_clear(vmx);
+
 	vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
 	vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1851,6 +1956,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 {
 	u32 guest_tr_ar;
 
+	vmx_segment_cache_clear(to_vmx(vcpu));
+
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
 		printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
@@ -1998,6 +2105,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
 	vcpu->arch.cr0 = cr0;
+	__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
 }
 
 static u64 construct_eptp(unsigned long root_hpa)
@@ -2053,7 +2161,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	struct kvm_save_segment *save;
 	u32 ar;
 
@@ -2075,13 +2182,13 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 		var->limit = save->limit;
 		ar = save->ar;
 		if (seg == VCPU_SREG_TR
-		    || var->selector == vmcs_read16(sf->selector))
+		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
 			goto use_saved_rmode_seg;
 	}
-	var->base = vmcs_readl(sf->base);
-	var->limit = vmcs_read32(sf->limit);
-	var->selector = vmcs_read16(sf->selector);
-	ar = vmcs_read32(sf->ar_bytes);
+	var->base = vmx_read_guest_seg_base(vmx, seg);
+	var->limit = vmx_read_guest_seg_limit(vmx, seg);
+	var->selector = vmx_read_guest_seg_selector(vmx, seg);
+	ar = vmx_read_guest_seg_ar(vmx, seg);
 use_saved_rmode_seg:
 	if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
 		ar = 0;
@@ -2098,27 +2205,37 @@ use_saved_rmode_seg:
 
 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	struct kvm_segment s;
 
 	if (to_vmx(vcpu)->rmode.vm86_active) {
 		vmx_get_segment(vcpu, &s, seg);
 		return s.base;
 	}
-	return vmcs_readl(sf->base);
+	return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
 }
 
-static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
 	if (!is_protmode(vcpu))
 		return 0;
 
-	if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+	if (!is_long_mode(vcpu)
+	    && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
 		return 3;
 
-	return vmcs_read16(GUEST_CS_SELECTOR) & 3;
+	return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
 }
 
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+	if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
+		__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+		to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu);
+	}
+	return to_vmx(vcpu)->cpl;
+}
+
+
 static u32 vmx_segment_access_rights(struct kvm_segment *var)
 {
 	u32 ar;
@@ -2148,6 +2265,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
+	vmx_segment_cache_clear(vmx);
+
 	if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
 		vmcs_write16(sf->selector, var->selector);
 		vmx->rmode.tr.selector = var->selector;
@@ -2184,11 +2303,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 		ar |= 0x1; /* Accessed */
 
 	vmcs_write32(sf->ar_bytes, ar);
+	__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
 }
 
 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 {
-	u32 ar = vmcs_read32(GUEST_CS_AR_BYTES);
+	u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
 
 	*db = (ar >> 14) & 1;
 	*l = (ar >> 13) & 1;
@@ -2775,6 +2895,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	if (ret != 0)
 		goto out;
 
+	vmx_segment_cache_clear(vmx);
+
 	seg_setup(VCPU_SREG_CS);
 	/*
 	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
@@ -2904,7 +3026,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 
 	++vcpu->stat.irq_injections;
 	if (vmx->rmode.vm86_active) {
-		if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
+		int inc_eip = 0;
+		if (vcpu->arch.interrupt.soft)
+			inc_eip = vcpu->arch.event_exit_inst_len;
+		if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
 			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		return;
 	}
@@ -2937,8 +3062,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 	}
 
 	++vcpu->stat.nmi_injections;
+	vmx->nmi_known_unmasked = false;
 	if (vmx->rmode.vm86_active) {
-		if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
+		if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
 			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		return;
 	}
@@ -2961,6 +3087,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
 	if (!cpu_has_virtual_nmis())
 		return to_vmx(vcpu)->soft_vnmi_blocked;
+	if (to_vmx(vcpu)->nmi_known_unmasked)
+		return false;
 	return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)	& GUEST_INTR_STATE_NMI;
 }
 
@@ -2974,6 +3102,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 			vmx->vnmi_blocked_time = 0;
 		}
 	} else {
+		vmx->nmi_known_unmasked = !masked;
 		if (masked)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
@@ -3091,7 +3220,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	enum emulation_result er;
 
 	vect_info = vmx->idt_vectoring_info;
-	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	intr_info = vmx->exit_intr_info;
 
 	if (is_machine_check(intr_info))
 		return handle_machine_check(vcpu);
@@ -3122,7 +3251,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	}
 
 	error_code = 0;
-	rip = kvm_rip_read(vcpu);
 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
@@ -3169,6 +3297,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		vmx->vcpu.arch.event_exit_inst_len =
 			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		rip = kvm_rip_read(vcpu);
 		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
 		kvm_run->debug.arch.exception = ex_no;
 		break;
@@ -3505,9 +3634,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 		switch (type) {
 		case INTR_TYPE_NMI_INTR:
 			vcpu->arch.nmi_injected = false;
-			if (cpu_has_virtual_nmis())
-				vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-					      GUEST_INTR_STATE_NMI);
+			vmx_set_nmi_mask(vcpu, true);
 			break;
 		case INTR_TYPE_EXT_INTR:
 		case INTR_TYPE_SOFT_INTR:
@@ -3867,12 +3994,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 
 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 {
-	u32 exit_intr_info = vmx->exit_intr_info;
+	u32 exit_intr_info;
+
+	if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+	      || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+		return;
+
+	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	exit_intr_info = vmx->exit_intr_info;
 
 	/* Handle machine checks before interrupts are enabled */
-	if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
-	    || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI
-		&& is_machine_check(exit_intr_info)))
+	if (is_machine_check(exit_intr_info))
 		kvm_machine_check();
 
 	/* We need to handle NMIs before interrupts are enabled */
@@ -3886,7 +4018,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
-	u32 exit_intr_info = vmx->exit_intr_info;
+	u32 exit_intr_info;
 	bool unblock_nmi;
 	u8 vector;
 	bool idtv_info_valid;
@@ -3894,6 +4026,13 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
 	if (cpu_has_virtual_nmis()) {
+		if (vmx->nmi_known_unmasked)
+			return;
+		/*
+		 * Can't use vmx->exit_intr_info since we're not sure what
+		 * the exit reason is.
+		 */
+		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
 		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
 		/*
@@ -3910,6 +4049,10 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 		    vector != DF_VECTOR && !idtv_info_valid)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
+		else
+			vmx->nmi_known_unmasked =
+				!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+				  & GUEST_INTR_STATE_NMI);
 	} else if (unlikely(vmx->soft_vnmi_blocked))
 		vmx->vnmi_blocked_time +=
 			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
@@ -3946,8 +4089,7 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
 		 * Clear bit "block by NMI" before VM entry if a NMI
 		 * delivery faulted.
 		 */
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_NMI);
+		vmx_set_nmi_mask(&vmx->vcpu, false);
 		break;
 	case INTR_TYPE_SOFT_EXCEPTION:
 		vmx->vcpu.arch.event_exit_inst_len =
@@ -4124,7 +4266,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	      );
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
+				  | (1 << VCPU_EXREG_RFLAGS)
+				  | (1 << VCPU_EXREG_CPL)
 				  | (1 << VCPU_EXREG_PDPTR)
+				  | (1 << VCPU_EXREG_SEGMENTS)
 				  | (1 << VCPU_EXREG_CR3));
 	vcpu->arch.regs_dirty = 0;
 
@@ -4134,7 +4279,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->launched = 1;
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
-	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	vmx_complete_atomic_exit(vmx);
 	vmx_recover_nmi_blocking(vmx);
@@ -4195,8 +4339,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto free_vcpu;
 
 	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	err = -ENOMEM;
 	if (!vmx->guest_msrs) {
-		err = -ENOMEM;
 		goto uninit_vcpu;
 	}
 
@@ -4215,7 +4359,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vmcs;
 	if (vm_need_virtualize_apic_accesses(kvm))
-		if (alloc_apic_access_page(kvm) != 0)
+		err = alloc_apic_access_page(kvm);
+		if (err)
 			goto free_vmcs;
 
 	if (enable_ept) {
@@ -4368,6 +4513,13 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
 }
 
+static int vmx_check_intercept(struct kvm_vcpu *vcpu,
+			       struct x86_instruction_info *info,
+			       enum x86_intercept_stage stage)
+{
+	return X86EMUL_CONTINUE;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -4449,10 +4601,14 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
+	.set_tsc_khz = vmx_set_tsc_khz,
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.adjust_tsc_offset = vmx_adjust_tsc_offset,
+	.compute_tsc_offset = vmx_compute_tsc_offset,
 
 	.set_tdp_cr3 = vmx_set_cr3,
+
+	.check_intercept = vmx_check_intercept,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 934b4c6b0bf..77c9d8673dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -60,22 +60,12 @@
 #include <asm/div64.h>
 
 #define MAX_IO_MSRS 256
-#define CR0_RESERVED_BITS						\
-	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
-			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
-			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR4_RESERVED_BITS						\
-	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
-			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
-			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
-			  | X86_CR4_OSXSAVE \
-			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
-
-#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
-
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
 
+#define emul_to_vcpu(ctxt) \
+	container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -100,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+bool kvm_has_tsc_control;
+EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
+u32  kvm_max_guest_tsc_khz;
+EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -157,6 +152,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 u64 __read_mostly host_xcr0;
 
+int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
 	int i;
@@ -361,8 +358,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
-	kvm_make_request(KVM_REQ_NMI, vcpu);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	vcpu->arch.nmi_pending = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
@@ -982,7 +979,15 @@ static inline int kvm_tsc_changes_freq(void)
 	return ret;
 }
 
-static inline u64 nsec_to_cycles(u64 nsec)
+static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.virtual_tsc_khz)
+		return vcpu->arch.virtual_tsc_khz;
+	else
+		return __this_cpu_read(cpu_tsc_khz);
+}
+
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 {
 	u64 ret;
 
@@ -990,25 +995,24 @@ static inline u64 nsec_to_cycles(u64 nsec)
 	if (kvm_tsc_changes_freq())
 		printk_once(KERN_WARNING
 		 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
-	ret = nsec * __this_cpu_read(cpu_tsc_khz);
+	ret = nsec * vcpu_tsc_khz(vcpu);
 	do_div(ret, USEC_PER_SEC);
 	return ret;
 }
 
-static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
+static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
 {
 	/* Compute a scale to convert nanoseconds in TSC cycles */
 	kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
-			   &kvm->arch.virtual_tsc_shift,
-			   &kvm->arch.virtual_tsc_mult);
-	kvm->arch.virtual_tsc_khz = this_tsc_khz;
+			   &vcpu->arch.tsc_catchup_shift,
+			   &vcpu->arch.tsc_catchup_mult);
 }
 
 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 {
 	u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
-				      vcpu->kvm->arch.virtual_tsc_mult,
-				      vcpu->kvm->arch.virtual_tsc_shift);
+				      vcpu->arch.tsc_catchup_mult,
+				      vcpu->arch.tsc_catchup_shift);
 	tsc += vcpu->arch.last_tsc_write;
 	return tsc;
 }
@@ -1021,7 +1025,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 	s64 sdiff;
 
 	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
-	offset = data - native_read_tsc();
+	offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
 	ns = get_kernel_ns();
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 	sdiff = data - kvm->arch.last_tsc_write;
@@ -1037,13 +1041,13 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 	 * In that case, for a reliable TSC, we can match TSC offsets,
 	 * or make a best guest using elapsed value.
 	 */
-	if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) &&
+	if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
 	    elapsed < 5ULL * NSEC_PER_SEC) {
 		if (!check_tsc_unstable()) {
 			offset = kvm->arch.last_tsc_offset;
 			pr_debug("kvm: matched tsc offset for %llu\n", data);
 		} else {
-			u64 delta = nsec_to_cycles(elapsed);
+			u64 delta = nsec_to_cycles(vcpu, elapsed);
 			offset += delta;
 			pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
 		}
@@ -1075,8 +1079,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	local_irq_save(flags);
 	kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
 	kernel_ns = get_kernel_ns();
-	this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
-
+	this_tsc_khz = vcpu_tsc_khz(v);
 	if (unlikely(this_tsc_khz == 0)) {
 		local_irq_restore(flags);
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1993,6 +1996,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_GET_TSC_KHZ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2019,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XCRS:
 		r = cpu_has_xsave;
 		break;
+	case KVM_CAP_TSC_CONTROL:
+		r = kvm_has_tsc_control;
+		break;
 	default:
 		r = 0;
 		break;
@@ -2120,8 +2127,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
 	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
 		/* Make sure TSC doesn't go backwards */
-		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
-				native_read_tsc() - vcpu->arch.last_host_tsc;
+		s64 tsc_delta;
+		u64 tsc;
+
+		kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
+		tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
+			     tsc - vcpu->arch.last_guest_tsc;
+
 		if (tsc_delta < 0)
 			mark_tsc_unstable("KVM discovered backwards TSC");
 		if (check_tsc_unstable()) {
@@ -2139,7 +2151,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
-	vcpu->arch.last_host_tsc = native_read_tsc();
+	kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
 }
 
 static int is_efer_nx(void)
@@ -2324,6 +2336,12 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
 
+	/* cpuid 0xC0000001.edx */
+	const u32 kvm_supported_word5_x86_features =
+		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+		F(PMM) | F(PMM_EN);
+
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 	do_cpuid_1_ent(entry, function, index);
@@ -2418,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
 			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
+			     (1 << KVM_FEATURE_ASYNC_PF) |
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
 		entry->ebx = 0;
 		entry->ecx = 0;
@@ -2432,6 +2451,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		entry->ecx &= kvm_supported_word6_x86_features;
 		cpuid_mask(&entry->ecx, 6);
 		break;
+	/*Add support for Centaur's CPUID instruction*/
+	case 0xC0000000:
+		/*Just support up to 0xC0000004 now*/
+		entry->eax = min(entry->eax, 0xC0000004);
+		break;
+	case 0xC0000001:
+		entry->edx &= kvm_supported_word5_x86_features;
+		cpuid_mask(&entry->edx, 5);
+		break;
+	case 0xC0000002:
+	case 0xC0000003:
+	case 0xC0000004:
+		/*Now nothing to do, reserved for the future*/
+		break;
 	}
 
 	kvm_x86_ops->set_supported_cpuid(function, entry);
@@ -2478,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 	if (nent >= cpuid->nent)
 		goto out_free;
 
+	/* Add support for Centaur's CPUID instruction. */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
+		do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
+				&nent, cpuid->nent);
+
+		r = -E2BIG;
+		if (nent >= cpuid->nent)
+			goto out_free;
+
+		limit = cpuid_entries[nent - 1].eax;
+		for (func = 0xC0000001;
+			func <= limit && nent < cpuid->nent; ++func)
+			do_cpuid_ent(&cpuid_entries[nent], func, 0,
+					&nent, cpuid->nent);
+
+		r = -E2BIG;
+		if (nent >= cpuid->nent)
+			goto out_free;
+	}
+
 	do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
 		     cpuid->nent);
 
@@ -3046,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
 		break;
 	}
+	case KVM_SET_TSC_KHZ: {
+		u32 user_tsc_khz;
+
+		r = -EINVAL;
+		if (!kvm_has_tsc_control)
+			break;
+
+		user_tsc_khz = (u32)arg;
+
+		if (user_tsc_khz >= kvm_max_guest_tsc_khz)
+			goto out;
+
+		kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+
+		r = 0;
+		goto out;
+	}
+	case KVM_GET_TSC_KHZ: {
+		r = -EIO;
+		if (check_tsc_unstable())
+			goto out;
+
+		r = vcpu_tsc_khz(vcpu);
+
+		goto out;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -3595,20 +3674,43 @@ static void kvm_init_msr_list(void)
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 			   const void *v)
 {
-	if (vcpu->arch.apic &&
-	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
-		return 0;
+	int handled = 0;
+	int n;
 
-	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	do {
+		n = min(len, 8);
+		if (!(vcpu->arch.apic &&
+		      !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+			break;
+		handled += n;
+		addr += n;
+		len -= n;
+		v += n;
+	} while (len);
+
+	return handled;
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 {
-	if (vcpu->arch.apic &&
-	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
-		return 0;
+	int handled = 0;
+	int n;
+
+	do {
+		n = min(len, 8);
+		if (!(vcpu->arch.apic &&
+		      !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+			break;
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+		handled += n;
+		addr += n;
+		len -= n;
+		v += n;
+	} while (len);
 
-	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	return handled;
 }
 
 static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3703,37 +3805,43 @@ out:
 }
 
 /* used for instruction fetching */
-static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
-				struct kvm_vcpu *vcpu,
+static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
+				gva_t addr, void *val, unsigned int bytes,
 				struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
 					  access | PFERR_FETCH_MASK,
 					  exception);
 }
 
-static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
-			       struct kvm_vcpu *vcpu,
+static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+			       gva_t addr, void *val, unsigned int bytes,
 			       struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
 					  exception);
 }
 
-static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
-				      struct kvm_vcpu *vcpu,
+static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+				      gva_t addr, void *val, unsigned int bytes,
 				      struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
 }
 
-static int kvm_write_guest_virt_system(gva_t addr, void *val,
+static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+				       gva_t addr, void *val,
 				       unsigned int bytes,
-				       struct kvm_vcpu *vcpu,
 				       struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
@@ -3761,13 +3869,15 @@ out:
 	return r;
 }
 
-static int emulator_read_emulated(unsigned long addr,
+static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
+				  unsigned long addr,
 				  void *val,
 				  unsigned int bytes,
-				  struct x86_exception *exception,
-				  struct kvm_vcpu *vcpu)
+				  struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	gpa_t                 gpa;
+	int handled;
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
@@ -3786,7 +3896,7 @@ static int emulator_read_emulated(unsigned long addr,
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 		goto mmio;
 
-	if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
+	if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
 	    == X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
@@ -3794,18 +3904,24 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
+	handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+
+	if (handled == bytes)
 		return X86EMUL_CONTINUE;
-	}
+
+	gpa += handled;
+	bytes -= handled;
+	val += handled;
 
 	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 
 	vcpu->mmio_needed = 1;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_IO_NEEDED;
 }
@@ -3829,6 +3945,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 					   struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	int handled;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3847,25 +3964,35 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+	handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+	if (handled == bytes)
 		return X86EMUL_CONTINUE;
 
+	gpa += handled;
+	bytes -= handled;
+	val += handled;
+
 	vcpu->mmio_needed = 1;
+	memcpy(vcpu->mmio_data, val, bytes);
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_CONTINUE;
 }
 
-int emulator_write_emulated(unsigned long addr,
+int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+			    unsigned long addr,
 			    const void *val,
 			    unsigned int bytes,
-			    struct x86_exception *exception,
-			    struct kvm_vcpu *vcpu)
+			    struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
 	/* Crossing a page boundary? */
 	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
 		int rc, now;
@@ -3893,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr,
 	(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
 #endif
 
-static int emulator_cmpxchg_emulated(unsigned long addr,
+static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+				     unsigned long addr,
 				     const void *old,
 				     const void *new,
 				     unsigned int bytes,
-				     struct x86_exception *exception,
-				     struct kvm_vcpu *vcpu)
+				     struct x86_exception *exception)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	gpa_t gpa;
 	struct page *page;
 	char *kaddr;
@@ -3955,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 emul_write:
 	printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
 
-	return emulator_write_emulated(addr, new, bytes, exception, vcpu);
+	return emulator_write_emulated(ctxt, addr, new, bytes, exception);
 }
 
 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3974,9 +4102,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 }
 
 
-static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
-			     unsigned int count, struct kvm_vcpu *vcpu)
+static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+				    int size, unsigned short port, void *val,
+				    unsigned int count)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
 	if (vcpu->arch.pio.count)
 		goto data_avail;
 
@@ -4004,10 +4135,12 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
 	return 0;
 }
 
-static int emulator_pio_out_emulated(int size, unsigned short port,
-			      const void *val, unsigned int count,
-			      struct kvm_vcpu *vcpu)
+static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
+				     int size, unsigned short port,
+				     const void *val, unsigned int count)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
 	trace_kvm_pio(1, port, size, count);
 
 	vcpu->arch.pio.port = port;
@@ -4037,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
 	return kvm_x86_ops->get_segment_base(vcpu, seg);
 }
 
-int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
+static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
 {
-	kvm_mmu_invlpg(vcpu, address);
-	return X86EMUL_CONTINUE;
+	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
@@ -4062,22 +4194,20 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
-int emulate_clts(struct kvm_vcpu *vcpu)
+static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
-	kvm_x86_ops->fpu_activate(vcpu);
-	return X86EMUL_CONTINUE;
+	kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	return _kvm_get_dr(vcpu, dr, dest);
+	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
+int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 {
 
-	return __kvm_set_dr(vcpu, dr, value);
+	return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
 }
 
 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -4085,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val)
 	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
 }
 
-static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
+static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	unsigned long value;
 
 	switch (cr) {
@@ -4113,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
 	return value;
 }
 
-static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
+static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	int res = 0;
 
 	switch (cr) {
@@ -4141,33 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
 	return res;
 }
 
-static int emulator_get_cpl(struct kvm_vcpu *vcpu)
+static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
+{
+	return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
+}
+
+static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
+{
+	kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
+}
+
+static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 {
-	return kvm_x86_ops->get_cpl(vcpu);
+	kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
 }
 
-static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
+static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 {
-	kvm_x86_ops->get_gdt(vcpu, dt);
+	kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
 }
 
-static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
+static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
 {
-	kvm_x86_ops->get_idt(vcpu, dt);
+	kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
 }
 
-static unsigned long emulator_get_cached_segment_base(int seg,
-						      struct kvm_vcpu *vcpu)
+static unsigned long emulator_get_cached_segment_base(
+	struct x86_emulate_ctxt *ctxt, int seg)
 {
-	return get_segment_base(vcpu, seg);
+	return get_segment_base(emul_to_vcpu(ctxt), seg);
 }
 
-static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
-					   int seg, struct kvm_vcpu *vcpu)
+static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
+				 struct desc_struct *desc, u32 *base3,
+				 int seg)
 {
 	struct kvm_segment var;
 
-	kvm_get_segment(vcpu, &var, seg);
+	kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
+	*selector = var.selector;
 
 	if (var.unusable)
 		return false;
@@ -4192,14 +4336,14 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3,
 	return true;
 }
 
-static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
-					   int seg, struct kvm_vcpu *vcpu)
+static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
+				 struct desc_struct *desc, u32 base3,
+				 int seg)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	struct kvm_segment var;
 
-	/* needed to preserve selector */
-	kvm_get_segment(vcpu, &var, seg);
-
+	var.selector = selector;
 	var.base = get_desc_base(desc);
 #ifdef CONFIG_X86_64
 	var.base |= ((u64)base3) << 32;
@@ -4223,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3,
 	return;
 }
 
-static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
+static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
+			    u32 msr_index, u64 *pdata)
 {
-	struct kvm_segment kvm_seg;
+	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+}
 
-	kvm_get_segment(vcpu, &kvm_seg, seg);
-	return kvm_seg.selector;
+static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
+			    u32 msr_index, u64 data)
+{
+	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 }
 
-static void emulator_set_segment_selector(u16 sel, int seg,
-					  struct kvm_vcpu *vcpu)
+static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 {
-	struct kvm_segment kvm_seg;
+	emul_to_vcpu(ctxt)->arch.halt_request = 1;
+}
 
-	kvm_get_segment(vcpu, &kvm_seg, seg);
-	kvm_seg.selector = sel;
-	kvm_set_segment(vcpu, &kvm_seg, seg);
+static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
+{
+	preempt_disable();
+	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
+	/*
+	 * CR0.TS may reference the host fpu state, not the guest fpu state,
+	 * so it may be clear at this point.
+	 */
+	clts();
+}
+
+static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
+{
+	preempt_enable();
+}
+
+static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
+			      struct x86_instruction_info *info,
+			      enum x86_intercept_stage stage)
+{
+	return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
 }
 
 static struct x86_emulate_ops emulate_ops = {
@@ -4248,22 +4414,29 @@ static struct x86_emulate_ops emulate_ops = {
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
 	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
+	.invlpg              = emulator_invlpg,
 	.pio_in_emulated     = emulator_pio_in_emulated,
 	.pio_out_emulated    = emulator_pio_out_emulated,
-	.get_cached_descriptor = emulator_get_cached_descriptor,
-	.set_cached_descriptor = emulator_set_cached_descriptor,
-	.get_segment_selector = emulator_get_segment_selector,
-	.set_segment_selector = emulator_set_segment_selector,
+	.get_segment         = emulator_get_segment,
+	.set_segment         = emulator_set_segment,
 	.get_cached_segment_base = emulator_get_cached_segment_base,
 	.get_gdt             = emulator_get_gdt,
 	.get_idt	     = emulator_get_idt,
+	.set_gdt             = emulator_set_gdt,
+	.set_idt	     = emulator_set_idt,
 	.get_cr              = emulator_get_cr,
 	.set_cr              = emulator_set_cr,
 	.cpl                 = emulator_get_cpl,
 	.get_dr              = emulator_get_dr,
 	.set_dr              = emulator_set_dr,
-	.set_msr             = kvm_set_msr,
-	.get_msr             = kvm_get_msr,
+	.set_msr             = emulator_set_msr,
+	.get_msr             = emulator_get_msr,
+	.halt                = emulator_halt,
+	.wbinvd              = emulator_wbinvd,
+	.fix_hypercall       = emulator_fix_hypercall,
+	.get_fpu             = emulator_get_fpu,
+	.put_fpu             = emulator_put_fpu,
+	.intercept           = emulator_intercept,
 };
 
 static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -4305,12 +4478,17 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
 	int cs_db, cs_l;
 
+	/*
+	 * TODO: fix emulate.c to use guest_read/write_register
+	 * instead of direct ->regs accesses, can save hundred cycles
+	 * on Intel for instructions that don't read/change RSP, for
+	 * for example.
+	 */
 	cache_all_regs(vcpu);
 
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
-	vcpu->arch.emulate_ctxt.vcpu = vcpu;
-	vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+	vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
 	vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
 	vcpu->arch.emulate_ctxt.mode =
 		(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
@@ -4318,11 +4496,13 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 		? X86EMUL_MODE_VM86 : cs_l
 		? X86EMUL_MODE_PROT64 :	cs_db
 		? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+	vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
 	memset(c, 0, sizeof(struct decode_cache));
 	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
 	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
 	int ret;
@@ -4331,7 +4511,8 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
 
 	vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
 	vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
-	vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip;
+	vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip +
+								 inc_eip;
 	ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
 
 	if (ret != X86EMUL_CONTINUE)
@@ -4340,7 +4521,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
 	vcpu->arch.emulate_ctxt.eip = c->eip;
 	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
-	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+	kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 
 	if (irq == NMI_VECTOR)
 		vcpu->arch.nmi_pending = false;
@@ -4402,16 +4583,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 {
 	int r;
 	struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+	bool writeback = true;
 
 	kvm_clear_exception_queue(vcpu);
-	vcpu->arch.mmio_fault_cr2 = cr2;
-	/*
-	 * TODO: fix emulate.c to use guest_read/write_register
-	 * instead of direct ->regs accesses, can save hundred cycles
-	 * on Intel for instructions that don't read/change RSP, for
-	 * for example.
-	 */
-	cache_all_regs(vcpu);
 
 	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
 		init_emulate_ctxt(vcpu);
@@ -4442,13 +4616,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		return EMULATE_DONE;
 	}
 
-	/* this is needed for vmware backdor interface to work since it
+	/* this is needed for vmware backdoor interface to work since it
 	   changes registers values  during IO operation */
-	memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
+		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
+		memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+	}
 
 restart:
 	r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
 
+	if (r == EMULATION_INTERCEPTED)
+		return EMULATE_DONE;
+
 	if (r == EMULATION_FAILED) {
 		if (reexecute_instruction(vcpu, cr2))
 			return EMULATE_DONE;
@@ -4462,21 +4642,28 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
+		else
+			writeback = false;
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
-		if (vcpu->mmio_is_write)
-			vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			writeback = false;
 		r = EMULATE_DO_MMIO;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
 		r = EMULATE_DONE;
 
-	toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
-	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
-	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+	if (writeback) {
+		toggle_interruptibility(vcpu,
+				vcpu->arch.emulate_ctxt.interruptibility);
+		kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+		kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+	} else
+		vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
 
 	return r;
 }
@@ -4485,7 +4672,8 @@ EXPORT_SYMBOL_GPL(x86_emulate_instruction);
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
 {
 	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
-	int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
+	int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
+					    size, port, &val, 1);
 	/* do not return to emulator after return from userspace */
 	vcpu->arch.pio.count = 0;
 	return ret;
@@ -4879,8 +5067,9 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 
-int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
+int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 {
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	char instruction[3];
 	unsigned long rip = kvm_rip_read(vcpu);
 
@@ -4893,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 
 	kvm_x86_ops->patch_hypercall(vcpu, instruction);
 
-	return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
-}
-
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
-	struct desc_ptr dt = { limit, base };
-
-	kvm_x86_ops->set_gdt(vcpu, &dt);
-}
-
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
-	struct desc_ptr dt = { limit, base };
-
-	kvm_x86_ops->set_idt(vcpu, &dt);
+	return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
+				       rip, instruction, 3, NULL);
 }
 
 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
@@ -5170,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
+	bool nmi_pending;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
 
@@ -5207,19 +5384,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 1;
 			goto out;
 		}
-		if (kvm_check_request(KVM_REQ_NMI, vcpu))
-			vcpu->arch.nmi_pending = true;
 	}
 
 	r = kvm_mmu_reload(vcpu);
 	if (unlikely(r))
 		goto out;
 
+	/*
+	 * An NMI can be injected between local nmi_pending read and
+	 * vcpu->arch.nmi_pending read inside inject_pending_event().
+	 * But in that case, KVM_REQ_EVENT will be set, which makes
+	 * the race described above benign.
+	 */
+	nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
+
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
 		inject_pending_event(vcpu);
 
 		/* enable NMI/IRQ window open exits if needed */
-		if (vcpu->arch.nmi_pending)
+		if (nmi_pending)
 			kvm_x86_ops->enable_nmi_window(vcpu);
 		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 			kvm_x86_ops->enable_irq_window(vcpu);
@@ -5399,6 +5582,41 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static int complete_mmio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	int r;
+
+	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
+		return 1;
+
+	if (vcpu->mmio_needed) {
+		vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			memcpy(vcpu->mmio_data + vcpu->mmio_index,
+			       run->mmio.data, 8);
+		vcpu->mmio_index += 8;
+		if (vcpu->mmio_index < vcpu->mmio_size) {
+			run->exit_reason = KVM_EXIT_MMIO;
+			run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+			memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+			run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+			run->mmio.is_write = vcpu->mmio_is_write;
+			vcpu->mmio_needed = 1;
+			return 0;
+		}
+		if (vcpu->mmio_is_write)
+			return 1;
+		vcpu->mmio_read_completed = 1;
+	}
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5425,20 +5643,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	if (vcpu->arch.pio.count || vcpu->mmio_needed) {
-		if (vcpu->mmio_needed) {
-			memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
-			vcpu->mmio_read_completed = 1;
-			vcpu->mmio_needed = 0;
-		}
-		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-		r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-		if (r != EMULATE_DONE) {
-			r = 0;
-			goto out;
-		}
-	}
+	r = complete_mmio(vcpu);
+	if (r <= 0)
+		goto out;
+
 	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
 		kvm_register_write(vcpu, VCPU_REGS_RAX,
 				     kvm_run->hypercall.ret);
@@ -5455,6 +5663,18 @@ out:
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
+	if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
+		/*
+		 * We are here if userspace calls get_regs() in the middle of
+		 * instruction emulation. Registers state needs to be copied
+		 * back from emulation context to vcpu. Usrapace shouldn't do
+		 * that usually, but some bad designed PV devices (vmware
+		 * backdoor interface) need this to work
+		 */
+		struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+		memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+	}
 	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
 	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -5482,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
+	vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
+	vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+
 	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
 	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
 	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
@@ -5592,7 +5815,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
 
 	memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 	kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
-	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+	kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return EMULATE_DONE;
 }
@@ -5974,8 +6197,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.pio_data = page_address(page);
 
-	if (!kvm->arch.virtual_tsc_khz)
-		kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
+	kvm_init_tsc_catchup(vcpu, max_tsc_khz);
 
 	r = kvm_mmu_create(vcpu);
 	if (r < 0)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c600da830ce..e407ed3df81 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -77,7 +77,7 @@ static inline u32 bit(int bitno)
 
 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
+int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cd608973ce..e191c096ab9 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -7,7 +7,7 @@
  * kernel and insert a module (lg.ko) which allows us to run other Linux
  * kernels the same way we'd run processes.  We call the first kernel the Host,
  * and the others the Guests.  The program which sets up and configures Guests
- * (such as the example in Documentation/lguest/lguest.c) is called the
+ * (such as the example in Documentation/virtual/lguest/lguest.c) is called the
  * Launcher.
  *
  * Secondly, we only run specially modified Guests, not normal kernels: setting
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = {
 	.rating		= 200,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1 << 22,
-	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -997,7 +995,7 @@ static void lguest_time_init(void)
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
 	irq_set_handler(0, lguest_time_irq);
 
-	clocksource_register(&lguest_clock);
+	clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24..f2145cfa12a 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Zero a page. 	
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
 	CFI_ENDPROC
 ENDPROC(clear_page_c)
 
+ENTRY(clear_page_c_e)
+	CFI_STARTPROC
+	movl $4096,%ecx
+	xorl %eax,%eax
+	rep stosb
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page_c_e)
+
 ENTRY(clear_page)
 	CFI_STARTPROC
 	xorl   %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
 .Lclear_page_end:
 ENDPROC(clear_page)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
+	 * It is recommended to use this when possible.
+	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+	 * Otherwise, use original function.
+	 *
+	 */
 
 #include <asm/cpufeature.h>
 
 	.section .altinstr_replacement,"ax"
 1:	.byte 0xeb					/* jmp <disp8> */
 	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
-2:
+2:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */
+3:
 	.previous
 	.section .altinstructions,"a"
-	.align 8
-	.quad clear_page
-	.quad 1b
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lclear_page_end - clear_page
-	.byte 2b - 1b
+	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
+			     .Lclear_page_end-clear_page, 2b-1b
+	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
+			     .Lclear_page_end-clear_page,3b-2b
 	.previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e48261519..024840266ba 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
-	.macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
 0:
 	.byte 0xe9	/* 32bit jump */
 	.long \orig-1f	/* by default jump to orig */
 1:
 	.section .altinstr_replacement,"ax"
 2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt-1b /* offset */   /* or alternatively to alt */
+	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
+3:	.byte 0xe9			/* near jump with 32bit immediate */
+	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
 	.previous
+
 	.section .altinstructions,"a"
-	.align 8
-	.quad  0b
-	.quad  2b
-	.word  \feature			/* when feature is set */
-	.byte  5
-	.byte  5
+	altinstruction_entry 0b,2b,\feature1,5,5
+	altinstruction_entry 0b,3b,\feature2,5,5
 	.previous
 	.endm
 
@@ -72,8 +79,10 @@ ENTRY(_copy_to_user)
 	addq %rdx,%rcx
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ja bad_to_user
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -85,8 +94,10 @@ ENTRY(_copy_from_user)
 	addq %rdx,%rcx
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ja bad_from_user
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
 	.previous
 	CFI_ENDPROC
 ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+	CFI_STARTPROC
+	andl %edx,%edx
+	jz 2f
+	movl %edx,%ecx
+1:	rep
+	movsb
+2:	xorl %eax,%eax
+	ret
+
+	.section .fixup,"ax"
+12:	movl %ecx,%edx		/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
+
+	.section __ex_table,"a"
+	.align 8
+	.quad 1b,12b
+	.previous
+	CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e3..efbf2a0ecde 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
 .Lmemcpy_e:
 	.previous
 
+/*
+ * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
+ * memcpy_c. Use memcpy_c_e when possible.
+ *
+ * This gets patched over the unrolled variant (below) via the
+ * alternative instructions framework:
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c_e:
+	movq %rdi, %rax
+
+	movl %edx, %ecx
+	rep movsb
+	ret
+.Lmemcpy_e_e:
+	.previous
+
 ENTRY(__memcpy)
 ENTRY(memcpy)
 	CFI_STARTPROC
@@ -49,7 +67,7 @@ ENTRY(memcpy)
 	jb .Lhandle_tail
 
 	/*
-	 * We check whether memory false dependece could occur,
+	 * We check whether memory false dependence could occur,
 	 * then jump to corresponding copy mode.
 	 */
 	cmp  %dil, %sil
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
 ENDPROC(__memcpy)
 
 	/*
-	 * Some CPUs run faster using the string copy instructions.
-	 * It is also a lot simpler. Use this when possible:
-	 */
-
-	.section .altinstructions, "a"
-	.align 8
-	.quad memcpy
-	.quad .Lmemcpy_c
-	.word X86_FEATURE_REP_GOOD
-
-	/*
+	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
+	 * If the feature is supported, memcpy_c_e() is the first choice.
+	 * If enhanced rep movsb copy is not available, use fast string copy
+	 * memcpy_c() when possible. This is faster and code is simpler than
+	 * original memcpy().
+	 * Otherwise, original memcpy() is used.
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 *
 	 * Replace only beginning, memcpy is used to apply alternatives,
 	 * so it is silly to overwrite itself with nops - reboot is the
 	 * only outcome...
 	 */
-	.byte .Lmemcpy_e - .Lmemcpy_c
-	.byte .Lmemcpy_e - .Lmemcpy_c
+	.section .altinstructions, "a"
+	altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
+			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
+	altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
+			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
 	.previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0ecb8433e5a..d0ec9c2936d 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
 #define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 
 #undef memmove
 
@@ -24,6 +25,7 @@
  */
 ENTRY(memmove)
 	CFI_STARTPROC
+
 	/* Handle more 32bytes in loop */
 	mov %rdi, %rax
 	cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
 
 	/* Decide forward/backward copy mode */
 	cmp %rdi, %rsi
-	jb	2f
+	jge .Lmemmove_begin_forward
+	mov %rsi, %r8
+	add %rdx, %r8
+	cmp %rdi, %r8
+	jg 2f
 
+.Lmemmove_begin_forward:
 	/*
 	 * movsq instruction have many startup latency
 	 * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
 	rep movsq
 	movq %r11, (%r10)
 	jmp 13f
+.Lmemmove_end_forward:
+
 	/*
 	 * Handle data backward by movsq.
 	 */
@@ -194,4 +203,22 @@ ENTRY(memmove)
 13:
 	retq
 	CFI_ENDPROC
+
+	.section .altinstr_replacement,"ax"
+.Lmemmove_begin_forward_efs:
+	/* Forward moving data. */
+	movq %rdx, %rcx
+	rep movsb
+	retq
+.Lmemmove_end_forward_efs:
+	.previous
+
+	.section .altinstructions,"a"
+	.align 8
+	.quad .Lmemmove_begin_forward
+	.quad .Lmemmove_begin_forward_efs
+	.word X86_FEATURE_ERMS
+	.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
+	.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
+	.previous
 ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d34426965..79bd454b78a 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /*
- * ISO C memset - set a memory block to a byte value.
+ * ISO C memset - set a memory block to a byte value. This function uses fast
+ * string to get better performance than the original function. The code is
+ * simpler and shorter than the orignal function as well.
  *	
  * rdi   destination
  * rsi   value (char) 
@@ -31,6 +35,28 @@
 .Lmemset_e:
 	.previous
 
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses
+ * enhanced rep stosb to override the fast string function.
+ * The code is simpler and shorter than the fast string function as well.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemset_c_e:
+	movq %rdi,%r9
+	movb %sil,%al
+	movl %edx,%ecx
+	rep stosb
+	movq %r9,%rax
+	ret
+.Lmemset_e_e:
+	.previous
+
 ENTRY(memset)
 ENTRY(__memset)
 	CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
 ENDPROC(memset)
 ENDPROC(__memset)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
+	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
+	 * It is recommended to use this when possible.
+	 *
+	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
+	 * instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 *
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 */
 	.section .altinstructions,"a"
-	.align 8
-	.quad memset
-	.quad .Lmemset_c
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lfinal - memset
-	.byte .Lmemset_e - .Lmemset_c
+	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
+			     .Lfinal-memset,.Lmemset_e-.Lmemset_c
+	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
+			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
 	.previous
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608edf995..3d11327c9ab 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,8 +23,8 @@ mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
-obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
-obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
+obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
+obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology.c
index 0919c26820d..5247d01329c 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <linux/pci_ids.h>
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void)
 
 int __init amd_numa_init(void)
 {
-	unsigned long start = PFN_PHYS(0);
-	unsigned long end = PFN_PHYS(max_pfn);
+	u64 start = PFN_PHYS(0);
+	u64 end = PFN_PHYS(max_pfn);
 	unsigned numnodes;
-	unsigned long prevbase;
+	u64 prevbase;
 	int i, j, nb;
 	u32 nodeid, reg;
 	unsigned int bits, cores, apicid_base;
@@ -95,7 +96,7 @@ int __init amd_numa_init(void)
 
 	prevbase = 0;
 	for (i = 0; i < 8; i++) {
-		unsigned long base, limit;
+		u64 base, limit;
 
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -107,18 +108,18 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (nodeid >= numnodes) {
-			pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+			pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
 				base, limit);
 			continue;
 		}
 
 		if (!limit) {
-			pr_info("Skipping node entry %d (base %lx)\n",
+			pr_info("Skipping node entry %d (base %Lx)\n",
 				i, base);
 			continue;
 		}
 		if ((base >> 8) & 3 || (limit >> 8) & 3) {
-			pr_err("Node %d using interleaving mode %lx/%lx\n",
+			pr_err("Node %d using interleaving mode %Lx/%Lx\n",
 			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
 			return -EINVAL;
 		}
@@ -150,19 +151,19 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (limit < base) {
-			pr_err("Node %d bogus settings %lx-%lx.\n",
+			pr_err("Node %d bogus settings %Lx-%Lx.\n",
 			       nodeid, base, limit);
 			continue;
 		}
 
 		/* Could sort here, but pun for now. Should not happen anyroads. */
 		if (prevbase > base) {
-			pr_err("Node map not sorted %lx,%lx\n",
+			pr_err("Node map not sorted %Lx,%Lx\n",
 			       prevbase, base);
 			return -EINVAL;
 		}
 
-		pr_info("Node %d MemBase %016lx Limit %016lx\n",
+		pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
 			nodeid, base, limit);
 
 		prevbase = base;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f8702d1..bcb394dfbb3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
+#include <linux/prefetch.h>		/* prefetchw			*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039..37b8b0fe832 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
 }
 
+void __init native_pagetable_reserve(u64 start, u64 end)
+{
+	memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
 struct map_range {
 	unsigned long start;
 	unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 
 	__flush_tlb_all();
 
+	/*
+	 * Reserve the kernel pagetable pages we used (pgt_buf_start -
+	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+	 * so that they can be reused for other purposes.
+	 *
+	 * On native it just means calling memblock_x86_reserve_range, on Xen it
+	 * also means marking RW the pagetable pages that we allocated before
+	 * but that haven't been used.
+	 *
+	 * In fact on xen we mark RO the whole range pgt_buf_start -
+	 * pgt_buf_top, because we have to make sure that when
+	 * init_memory_mapping reaches the pagetable pages area, it maps
+	 * RO all the pagetable pages, including the ones that are beyond
+	 * pgt_buf_end at that time.
+	 */
 	if (!after_bootmem && pgt_buf_end > pgt_buf_start)
-		memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
-				 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+		x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+				PFN_PHYS(pgt_buf_end));
 
 	if (!after_bootmem)
 		early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 80088f99419..29f7c6d9817 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+#endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
@@ -716,6 +718,7 @@ void __init paging_init(void)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 	olpc_dt_build_devicetree();
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 	zone_sizes_init();
 }
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 79423358728..d865c4aeec5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -616,7 +616,9 @@ void __init paging_init(void)
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
-#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0369843511d..be1ef574ce9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -91,13 +91,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
 	/*
-	 * Check if the request spans more than any BAR in the iomem resource
-	 * tree.
-	 */
-	WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
-		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
-
-	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
 	last_pfn = last_addr >> PAGE_SHIFT;
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	ret_addr = (void __iomem *) (vaddr + offset);
 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
+	/*
+	 * Check if the request spans more than any BAR in the iomem resource
+	 * tree.
+	 */
+	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
+		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+
 	return ret_addr;
 err_free_area:
 	free_vm_area(area);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 745258dfc4d..f5510d889a2 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,11 +1,39 @@
 /* Common code for 32 and 64-bit NUMA */
-#include <linux/topology.h>
-#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <asm/numa.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/sched.h>
+#include <linux/topology.h>
+
+#include <asm/e820.h>
+#include <asm/proto.h>
+#include <asm/dma.h>
 #include <asm/acpi.h>
+#include <asm/amd_nb.h>
+
+#include "numa_internal.h"
 
 int __initdata numa_off;
+nodemask_t numa_nodes_parsed __initdata;
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+static struct numa_meminfo numa_meminfo
+#ifndef CONFIG_MEMORY_HOTPLUG
+__initdata
+#endif
+;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
 
 static __init int numa_setup(char *opt)
 {
@@ -32,6 +60,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
@@ -95,6 +132,407 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+				     struct numa_meminfo *mi)
+{
+	/* ignore zero length blks */
+	if (start == end)
+		return 0;
+
+	/* whine about and ignore invalid blks */
+	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+			   nid, start, end);
+		return 0;
+	}
+
+	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+		pr_err("NUMA: too many memblk ranges\n");
+		return -EINVAL;
+	}
+
+	mi->blk[mi->nr_blks].start = start;
+	mi->blk[mi->nr_blks].end = end;
+	mi->blk[mi->nr_blks].nid = nid;
+	mi->nr_blks++;
+	return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+	mi->nr_blks--;
+	memmove(&mi->blk[idx], &mi->blk[idx + 1],
+		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start, u64 end)
+{
+	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
+	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
+	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
+	bool remapped = false;
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
+	/* initialize remap allocator before aligning to ZONE_ALIGN */
+	init_alloc_remap(nid, start, end);
+
+	start = roundup(start, ZONE_ALIGN);
+
+	printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
+	       nid, start, end);
+
+	/*
+	 * Allocate node data.  Try remap allocator first, node-local
+	 * memory and then any node.  Never allocate in DMA zone.
+	 */
+	nd = alloc_remap(nid, nd_size);
+	if (nd) {
+		nd_pa = __pa(nd);
+		remapped = true;
+	} else {
+		nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+		if (nd_pa == MEMBLOCK_ERROR)
+			nd_pa = memblock_find_in_range(nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+		if (nd_pa == MEMBLOCK_ERROR) {
+			pr_err("Cannot find %zu bytes in node %d\n",
+			       nd_size, nid);
+			return;
+		}
+		memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+		nd = __va(nd_pa);
+	}
+
+	/* report and initialize */
+	printk(KERN_INFO "  NODE_DATA [%016Lx - %016Lx]%s\n",
+	       nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (!remapped && tnid != nid)
+		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = nd;
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
+	NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
+
+	node_set_online(nid);
+}
+
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks.  Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
+{
+	const u64 low = 0;
+	const u64 high = PFN_PHYS(max_pfn);
+	int i, j, k;
+
+	/* first, trim all entries */
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
+
+		/* make sure all blocks are inside the limits */
+		bi->start = max(bi->start, low);
+		bi->end = min(bi->end, high);
+
+		/* and there's no empty block */
+		if (bi->start >= bi->end)
+			numa_remove_memblk_from(i--, mi);
+	}
+
+	/* merge neighboring / overlapping entries */
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
+
+		for (j = i + 1; j < mi->nr_blks; j++) {
+			struct numa_memblk *bj = &mi->blk[j];
+			u64 start, end;
+
+			/*
+			 * See whether there are overlapping blocks.  Whine
+			 * about but allow overlaps of the same nid.  They
+			 * will be merged below.
+			 */
+			if (bi->end > bj->start && bi->start < bj->end) {
+				if (bi->nid != bj->nid) {
+					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+					       bi->nid, bi->start, bi->end,
+					       bj->nid, bj->start, bj->end);
+					return -EINVAL;
+				}
+				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+					   bi->nid, bi->start, bi->end,
+					   bj->start, bj->end);
+			}
+
+			/*
+			 * Join together blocks on the same node, holes
+			 * between which don't overlap with memory on other
+			 * nodes.
+			 */
+			if (bi->nid != bj->nid)
+				continue;
+			start = min(bi->start, bj->start);
+			end = max(bi->end, bj->end);
+			for (k = 0; k < mi->nr_blks; k++) {
+				struct numa_memblk *bk = &mi->blk[k];
+
+				if (bi->nid == bk->nid)
+					continue;
+				if (start < bk->end && end > bk->start)
+					break;
+			}
+			if (k < mi->nr_blks)
+				continue;
+			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
+			       bi->nid, bi->start, bi->end, bj->start, bj->end,
+			       start, end);
+			bi->start = start;
+			bi->end = end;
+			numa_remove_memblk_from(j--, mi);
+		}
+	}
+
+	/* clear unused ones */
+	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+		mi->blk[i].start = mi->blk[i].end = 0;
+		mi->blk[i].nid = NUMA_NO_NODE;
+	}
+
+	return 0;
+}
+
+/*
+ * Set nodes, which have memory in @mi, in *@nodemask.
+ */
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+					      const struct numa_meminfo *mi)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+		if (mi->blk[i].start != mi->blk[i].end &&
+		    mi->blk[i].nid != NUMA_NO_NODE)
+			node_set(mi->blk[i].nid, *nodemask);
+}
+
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed.  The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
+
+	/* numa_distance could be 1LU marking allocation failure, test cnt */
+	if (numa_distance_cnt)
+		memblock_x86_free_range(__pa(numa_distance),
+					__pa(numa_distance) + size);
+	numa_distance_cnt = 0;
+	numa_distance = NULL;	/* enable table creation */
+}
+
+static int __init numa_alloc_distance(void)
+{
+	nodemask_t nodes_parsed;
+	size_t size;
+	int i, j, cnt = 0;
+	u64 phys;
+
+	/* size the new table and allocate it */
+	nodes_parsed = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
+
+	for_each_node_mask(i, nodes_parsed)
+		cnt = i;
+	cnt++;
+	size = cnt * cnt * sizeof(numa_distance[0]);
+
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
+				      size, PAGE_SIZE);
+	if (phys == MEMBLOCK_ERROR) {
+		pr_warning("NUMA: Warning: can't allocate distance table!\n");
+		/* don't retry until explicitly reset */
+		numa_distance = (void *)1LU;
+		return -ENOMEM;
+	}
+	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
+
+	numa_distance = __va(phys);
+	numa_distance_cnt = cnt;
+
+	/* fill with the default distances */
+	for (i = 0; i < cnt; i++)
+		for (j = 0; j < cnt; j++)
+			numa_distance[i * cnt + j] = i == j ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
+
+	return 0;
+}
+
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to'  node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.  If distance table
+ * doesn't exist, one which is large enough to accommodate all the currently
+ * known nodes will be created.
+ *
+ * If such table cannot be allocated, a warning is printed and further
+ * calls are ignored until the distance table is reset with
+ * numa_reset_distance().
+ *
+ * If @from or @to is higher than the highest known node at the time of
+ * table creation or @distance doesn't make sense, the call is ignored.
+ * This is to allow simplification of specific NUMA config implementations.
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	if (!numa_distance && numa_alloc_distance() < 0)
+		return;
+
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+			    from, to, distance);
+		return;
+	}
+
+	if ((u8)distance != distance ||
+	    (from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+			     from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+int __node_distance(int from, int to)
+{
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common).  Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+{
+	u64 numaram, e820ram;
+	int i;
+
+	numaram = 0;
+	for (i = 0; i < mi->nr_blks; i++) {
+		u64 s = mi->blk[i].start >> PAGE_SHIFT;
+		u64 e = mi->blk[i].end >> PAGE_SHIFT;
+		numaram += e - s;
+		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+		if ((s64)numaram < 0)
+			numaram = 0;
+	}
+
+	e820ram = max_pfn - (memblock_x86_hole_size(0,
+					PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
+	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+	if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+		printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
+		       (numaram << PAGE_SHIFT) >> 20,
+		       (e820ram << PAGE_SHIFT) >> 20);
+		return false;
+	}
+	return true;
+}
+
+static int __init numa_register_memblks(struct numa_meminfo *mi)
+{
+	int i, nid;
+
+	/* Account for nodes with cpus and no memory */
+	node_possible_map = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&node_possible_map, mi);
+	if (WARN_ON(nodes_empty(node_possible_map)))
+		return -EINVAL;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		memblock_x86_register_active_regions(mi->blk[i].nid,
+					mi->blk[i].start >> PAGE_SHIFT,
+					mi->blk[i].end >> PAGE_SHIFT);
+
+	/* for out of order entries */
+	sort_node_map();
+	if (!numa_meminfo_cover_memory(mi))
+		return -EINVAL;
+
+	/* Finally register nodes. */
+	for_each_node_mask(nid, node_possible_map) {
+		u64 start = PFN_PHYS(max_pfn);
+		u64 end = 0;
+
+		for (i = 0; i < mi->nr_blks; i++) {
+			if (nid != mi->blk[i].nid)
+				continue;
+			start = min(mi->blk[i].start, start);
+			end = max(mi->blk[i].end, end);
+		}
+
+		if (start < end)
+			setup_node_data(nid, start, end);
+	}
+
+	return 0;
+}
+
 /*
  * There are unfortunately some poorly designed mainboards around that
  * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -102,7 +540,7 @@ void __init setup_node_to_cpumask_map(void)
  * as the number of CPUs is not known yet. We round robin the existing
  * nodes.
  */
-void __init numa_init_array(void)
+static void __init numa_init_array(void)
 {
 	int rr, i;
 
@@ -117,6 +555,95 @@ void __init numa_init_array(void)
 	}
 }
 
+static int __init numa_init(int (*init_func)(void))
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
+		set_apicid_to_node(i, NUMA_NO_NODE);
+
+	nodes_clear(numa_nodes_parsed);
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+	remove_all_active_ranges();
+	numa_reset_distance();
+
+	ret = init_func();
+	if (ret < 0)
+		return ret;
+	ret = numa_cleanup_meminfo(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	numa_emulation(&numa_meminfo, numa_distance_cnt);
+
+	ret = numa_register_memblks(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < nr_cpu_ids; i++) {
+		int nid = early_cpu_to_node(i);
+
+		if (nid == NUMA_NO_NODE)
+			continue;
+		if (!node_online(nid))
+			numa_clear_node(i);
+	}
+	numa_init_array();
+	return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node and add memory blocks that cover all
+ * allowed memory.  This function must not fail.
+ */
+static int __init dummy_numa_init(void)
+{
+	printk(KERN_INFO "%s\n",
+	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
+	printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
+	       0LLU, PFN_PHYS(max_pfn));
+
+	node_set(0, numa_nodes_parsed);
+	numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
+
+	return 0;
+}
+
+/**
+ * x86_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds.  The
+ * last fallback is dummy single node config encomapssing whole memory and
+ * never fails.
+ */
+void __init x86_numa_init(void)
+{
+	if (!numa_off) {
+#ifdef CONFIG_X86_NUMAQ
+		if (!numa_init(numaq_numa_init))
+			return;
+#endif
+#ifdef CONFIG_ACPI_NUMA
+		if (!numa_init(x86_acpi_numa_init))
+			return;
+#endif
+#ifdef CONFIG_AMD_NUMA
+		if (!numa_init(amd_numa_init))
+			return;
+#endif
+	}
+
+	numa_init(dummy_numa_init);
+}
+
 static __init int find_near_online_node(int node)
 {
 	int n, val;
@@ -282,3 +809,18 @@ const struct cpumask *cpumask_of_node(int node)
 EXPORT_SYMBOL(cpumask_of_node);
 
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int memory_add_physaddr_to_nid(u64 start)
+{
+	struct numa_meminfo *mi = &numa_meminfo;
+	int nid = mi->blk[0].nid;
+	int i;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		if (mi->blk[i].start <= start && mi->blk[i].end > start)
+			nid = mi->blk[i].nid;
+	return nid;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index bde3906420d..849a975d3fa 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,39 +22,11 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/nodemask.h>
 #include <linux/module.h>
-#include <linux/kexec.h>
-#include <linux/pfn.h>
-#include <linux/swap.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include <asm/mmzone.h>
-#include <asm/bios_ebda.h>
-#include <asm/proto.h>
-
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-/*
- * numa interface - we expect the numa architecture specific code to have
- *                  populated the following initialisation.
- *
- * 1) node_online_map  - the map of all nodes configured (online) in the system
- * 2) node_start_pfn   - the starting page frame number for a node
- * 3) node_end_pfn     - the ending page fram number for a node
- */
-unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
-unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
 
+#include "numa_internal.h"
 
 #ifdef CONFIG_DISCONTIGMEM
 /*
@@ -99,108 +71,46 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 }
 #endif
 
-extern unsigned long find_max_low_pfn(void);
 extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
 
-unsigned long node_remap_size[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-static unsigned long kva_start_pfn;
-static unsigned long kva_pages;
-
-int __cpuinit numa_cpu_node(int cpu)
-{
-	return apic->x86_32_numa_cpu_node(cpu);
-}
-
-/*
- * FLAT - support for basic PC memory model with discontig enabled, essentially
- *        a single node with all available processors in it with a flat
- *        memory map.
- */
-int __init get_memcfg_numa_flat(void)
-{
-	printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
-
-	node_start_pfn[0] = 0;
-	node_end_pfn[0] = max_pfn;
-	memblock_x86_register_active_regions(0, 0, max_pfn);
-	memory_present(0, 0, max_pfn);
-	node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
-
-        /* Indicate there is one node available. */
-	nodes_clear(node_online_map);
-	node_set_online(0);
-	return 1;
-}
-
-/*
- * Find the highest page frame number we have available for the node
- */
-static void __init propagate_e820_map_node(int nid)
-{
-	if (node_end_pfn[nid] > max_pfn)
-		node_end_pfn[nid] = max_pfn;
-	/*
-	 * if a user has given mem=XXXX, then we need to make sure 
-	 * that the node _starts_ before that, too, not just ends
-	 */
-	if (node_start_pfn[nid] > max_pfn)
-		node_start_pfn[nid] = max_pfn;
-	BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]);
-}
-
-/* 
- * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
- * method.  For node zero take this from the bottom of memory, for
- * subsequent nodes place them at node_remap_start_vaddr which contains
- * node local data in physically node local memory.  See setup_memory()
- * for details.
- */
-static void __init allocate_pgdat(int nid)
-{
-	char buf[16];
-
-	if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
-		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
-	else {
-		unsigned long pgdat_phys;
-		pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
-				 max_pfn_mapped<<PAGE_SHIFT,
-				 sizeof(pg_data_t),
-				 PAGE_SIZE);
-		NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
-		memset(buf, 0, sizeof(buf));
-		sprintf(buf, "NODE_DATA %d",  nid);
-		memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
-	}
-	printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
-		nid, (unsigned long)NODE_DATA(nid));
-}
-
 /*
- * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
- * virtual address space (KVA) is reserved and portions of nodes are mapped
- * using it. This is to allow node-local memory to be allocated for
- * structures that would normally require ZONE_NORMAL. The memory is
- * allocated with alloc_remap() and callers should be prepared to allocate
- * from the bootmem allocator instead.
+ * Remap memory allocator
  */
 static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 static void *node_remap_end_vaddr[MAX_NUMNODES];
 static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
 
+/**
+ * alloc_remap - Allocate remapped memory
+ * @nid: NUMA node to allocate memory from
+ * @size: The size of allocation
+ *
+ * Allocate @size bytes from the remap area of NUMA node @nid.  The
+ * size of the remap area is predetermined by init_alloc_remap() and
+ * only the callers considered there should call this function.  For
+ * more info, please read the comment on top of init_alloc_remap().
+ *
+ * The caller must be ready to handle allocation failure from this
+ * function and fall back to regular memory allocator in such cases.
+ *
+ * CONTEXT:
+ * Single CPU early boot context.
+ *
+ * RETURNS:
+ * Pointer to the allocated memory on success, %NULL on failure.
+ */
 void *alloc_remap(int nid, unsigned long size)
 {
 	void *allocation = node_remap_alloc_vaddr[nid];
 
 	size = ALIGN(size, L1_CACHE_BYTES);
 
-	if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
+	if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
 		return NULL;
 
 	node_remap_alloc_vaddr[nid] += size;
@@ -209,26 +119,6 @@ void *alloc_remap(int nid, unsigned long size)
 	return allocation;
 }
 
-static void __init remap_numa_kva(void)
-{
-	void *vaddr;
-	unsigned long pfn;
-	int node;
-
-	for_each_online_node(node) {
-		printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
-		for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
-			vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
-			printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
-				(unsigned long)vaddr,
-				node_remap_start_pfn[node] + pfn);
-			set_pmd_pfn((ulong) vaddr, 
-				node_remap_start_pfn[node] + pfn, 
-				PAGE_KERNEL_LARGE);
-		}
-	}
-}
-
 #ifdef CONFIG_HIBERNATION
 /**
  * resume_map_numa_kva - add KVA mapping to the temporary page tables created
@@ -240,15 +130,16 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 	int node;
 
 	for_each_online_node(node) {
-		unsigned long start_va, start_pfn, size, pfn;
+		unsigned long start_va, start_pfn, nr_pages, pfn;
 
 		start_va = (unsigned long)node_remap_start_vaddr[node];
 		start_pfn = node_remap_start_pfn[node];
-		size = node_remap_size[node];
+		nr_pages = (node_remap_end_vaddr[node] -
+			    node_remap_start_vaddr[node]) >> PAGE_SHIFT;
 
 		printk(KERN_DEBUG "%s: node %d\n", __func__, node);
 
-		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+		for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
 			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
 			pgd_t *pgd = pgd_base + pgd_index(vaddr);
 			pud_t *pud = pud_offset(pgd, vaddr);
@@ -264,132 +155,89 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 }
 #endif
 
-static __init unsigned long calculate_numa_remap_pages(void)
+/**
+ * init_alloc_remap - Initialize remap allocator for a NUMA node
+ * @nid: NUMA node to initizlie remap allocator for
+ *
+ * NUMA nodes may end up without any lowmem.  As allocating pgdat and
+ * memmap on a different node with lowmem is inefficient, a special
+ * remap allocator is implemented which can be used by alloc_remap().
+ *
+ * For each node, the amount of memory which will be necessary for
+ * pgdat and memmap is calculated and two memory areas of the size are
+ * allocated - one in the node and the other in lowmem; then, the area
+ * in the node is remapped to the lowmem area.
+ *
+ * As pgdat and memmap must be allocated in lowmem anyway, this
+ * doesn't waste lowmem address space; however, the actual lowmem
+ * which gets remapped over is wasted.  The amount shouldn't be
+ * problematic on machines this feature will be used.
+ *
+ * Initialization failure isn't fatal.  alloc_remap() is used
+ * opportunistically and the callers will fall back to other memory
+ * allocation mechanisms on failure.
+ */
+void __init init_alloc_remap(int nid, u64 start, u64 end)
 {
-	int nid;
-	unsigned long size, reserve_pages = 0;
-
-	for_each_online_node(nid) {
-		u64 node_kva_target;
-		u64 node_kva_final;
-
-		/*
-		 * The acpi/srat node info can show hot-add memroy zones
-		 * where memory could be added but not currently present.
-		 */
-		printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
-			nid, node_start_pfn[nid], node_end_pfn[nid]);
-		if (node_start_pfn[nid] > max_pfn)
-			continue;
-		if (!node_end_pfn[nid])
-			continue;
-		if (node_end_pfn[nid] > max_pfn)
-			node_end_pfn[nid] = max_pfn;
-
-		/* ensure the remap includes space for the pgdat. */
-		size = node_remap_size[nid] + sizeof(pg_data_t);
-
-		/* convert size to large (pmd size) pages, rounding up */
-		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-		/* now the roundup is correct, convert to PAGE_SIZE pages */
-		size = size * PTRS_PER_PTE;
-
-		node_kva_target = round_down(node_end_pfn[nid] - size,
-						 PTRS_PER_PTE);
-		node_kva_target <<= PAGE_SHIFT;
-		do {
-			node_kva_final = memblock_find_in_range(node_kva_target,
-					((u64)node_end_pfn[nid])<<PAGE_SHIFT,
-						((u64)size)<<PAGE_SHIFT,
-						LARGE_PAGE_BYTES);
-			node_kva_target -= LARGE_PAGE_BYTES;
-		} while (node_kva_final == MEMBLOCK_ERROR &&
-			 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
-
-		if (node_kva_final == MEMBLOCK_ERROR)
-			panic("Can not get kva ram\n");
-
-		node_remap_size[nid] = size;
-		node_remap_offset[nid] = reserve_pages;
-		reserve_pages += size;
-		printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
-				  " node %d at %llx\n",
-				size, nid, node_kva_final>>PAGE_SHIFT);
-
-		/*
-		 *  prevent kva address below max_low_pfn want it on system
-		 *  with less memory later.
-		 *  layout will be: KVA address , KVA RAM
-		 *
-		 *  we are supposed to only record the one less then max_low_pfn
-		 *  but we could have some hole in high memory, and it will only
-		 *  check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
-		 *  to use it as free.
-		 *  So memblock_x86_reserve_range here, hope we don't run out of that array
-		 */
-		memblock_x86_reserve_range(node_kva_final,
-			      node_kva_final+(((u64)size)<<PAGE_SHIFT),
-			      "KVA RAM");
-
-		node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
-	}
-	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
-			reserve_pages);
-	return reserve_pages;
-}
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = end >> PAGE_SHIFT;
+	unsigned long size, pfn;
+	u64 node_pa, remap_pa;
+	void *remap_va;
 
-static void init_remap_allocator(int nid)
-{
-	node_remap_start_vaddr[nid] = pfn_to_kaddr(
-			kva_start_pfn + node_remap_offset[nid]);
-	node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
-		(node_remap_size[nid] * PAGE_SIZE);
-	node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
-		ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
-	printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
-		(ulong) node_remap_start_vaddr[nid],
-		(ulong) node_remap_end_vaddr[nid]);
+	/*
+	 * The acpi/srat node info can show hot-add memroy zones where
+	 * memory could be added but not currently present.
+	 */
+	printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
+	       nid, start_pfn, end_pfn);
+
+	/* calculate the necessary space aligned to large page size */
+	size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
+	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+	size = ALIGN(size, LARGE_PAGE_BYTES);
+
+	/* allocate node memory and the lowmem remap area */
+	node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
+	if (node_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
+			   size, nid);
+		return;
+	}
+	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+
+	remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
+					  max_low_pfn << PAGE_SHIFT,
+					  size, LARGE_PAGE_BYTES);
+	if (remap_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
+			   size, nid);
+		memblock_x86_free_range(node_pa, node_pa + size);
+		return;
+	}
+	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+	remap_va = phys_to_virt(remap_pa);
+
+	/* perform actual remap */
+	for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
+		set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
+			    (node_pa >> PAGE_SHIFT) + pfn,
+			    PAGE_KERNEL_LARGE);
+
+	/* initialize remap allocator parameters */
+	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
+	node_remap_start_vaddr[nid] = remap_va;
+	node_remap_end_vaddr[nid] = remap_va + size;
+	node_remap_alloc_vaddr[nid] = remap_va;
+
+	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
+	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
 void __init initmem_init(void)
 {
-	int nid;
-	long kva_target_pfn;
-
-	/*
-	 * When mapping a NUMA machine we allocate the node_mem_map arrays
-	 * from node local memory.  They are then mapped directly into KVA
-	 * between zone normal and vmalloc space.  Calculate the size of
-	 * this space and use it to adjust the boundary between ZONE_NORMAL
-	 * and ZONE_HIGHMEM.
-	 */
-
-	get_memcfg_numa();
-	numa_init_array();
-
-	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	x86_numa_init();
 
-	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
-	do {
-		kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT,
-					max_low_pfn<<PAGE_SHIFT,
-					kva_pages<<PAGE_SHIFT,
-					PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
-		kva_target_pfn -= PTRS_PER_PTE;
-	} while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn);
-
-	if (kva_start_pfn == MEMBLOCK_ERROR)
-		panic("Can not get kva space\n");
-
-	printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
-		kva_start_pfn, max_low_pfn);
-	printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
-
-	/* avoid clash with initrd */
-	memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
-		      (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
-		     "KVA PG");
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
@@ -409,51 +257,9 @@ void __init initmem_init(void)
 
 	printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
-	for_each_online_node(nid) {
-		init_remap_allocator(nid);
-
-		allocate_pgdat(nid);
-	}
-	remap_numa_kva();
 
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-	for_each_online_node(nid)
-		propagate_e820_map_node(nid);
-
-	for_each_online_node(nid) {
-		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-		NODE_DATA(nid)->node_id = nid;
-	}
 
 	setup_bootmem_allocator();
 }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int paddr_to_nid(u64 addr)
-{
-	int nid;
-	unsigned long pfn = PFN_DOWN(addr);
-
-	for_each_node(nid)
-		if (node_start_pfn[nid] <= pfn &&
-		    pfn < node_end_pfn[nid])
-			return nid;
-
-	return -1;
-}
-
-/*
- * This function is used to ask node id BEFORE memmap and mem_section's
- * initialization (pfn_to_nid() can't be used yet).
- * If _PXM is not defined on ACPI's DSDT, node id must be found by this.
- */
-int memory_add_physaddr_to_nid(u64 addr)
-{
-	int nid = paddr_to_nid(addr);
-	return (nid >= 0) ? nid : 0;
-}
-
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc7203..dd27f401f0a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -2,646 +2,13 @@
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/sched.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/dma.h>
-#include <asm/acpi.h>
-#include <asm/amd_nb.h>
 
 #include "numa_internal.h"
 
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-nodemask_t numa_nodes_parsed __initdata;
-
-struct memnode memnode;
-
-static unsigned long __initdata nodemap_addr;
-static unsigned long __initdata nodemap_size;
-
-static struct numa_meminfo numa_meminfo __initdata;
-
-static int numa_distance_cnt;
-static u8 *numa_distance;
-
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
-{
-	unsigned long addr, end;
-	int i, res = -1;
-
-	memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
-	for (i = 0; i < mi->nr_blks; i++) {
-		addr = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (addr >= end)
-			continue;
-		if ((end >> shift) >= memnodemapsize)
-			return 0;
-		do {
-			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
-				return -1;
-			memnodemap[addr >> shift] = mi->blk[i].nid;
-			addr += (1UL << shift);
-		} while (addr < end);
-		res = 1;
-	}
-	return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-	unsigned long addr;
-
-	memnodemap = memnode.embedded_map;
-	if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
-		return 0;
-
-	addr = 0x8000;
-	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
-	nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
-				      nodemap_size, L1_CACHE_BYTES);
-	if (nodemap_addr == MEMBLOCK_ERROR) {
-		printk(KERN_ERR
-		       "NUMA: Unable to allocate Memory to Node hash map\n");
-		nodemap_addr = nodemap_size = 0;
-		return -1;
-	}
-	memnodemap = phys_to_virt(nodemap_addr);
-	memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
-
-	printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-	       nodemap_addr, nodemap_addr + nodemap_size);
-	return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
-{
-	int i, nodes_used = 0;
-	unsigned long start, end;
-	unsigned long bitfield = 0, memtop = 0;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		start = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (start >= end)
-			continue;
-		bitfield |= start;
-		nodes_used++;
-		if (end > memtop)
-			memtop = end;
-	}
-	if (nodes_used <= 1)
-		i = 63;
-	else
-		i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
-	memnodemapsize = (memtop >> i)+1;
-	return i;
-}
-
-static int __init compute_hash_shift(const struct numa_meminfo *mi)
-{
-	int shift;
-
-	shift = extract_lsb_from_nodes(mi);
-	if (allocate_cachealigned_memnodemap())
-		return -1;
-	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
-		shift);
-
-	if (populate_memnodemap(mi, shift) != 1) {
-		printk(KERN_INFO "Your memory is not aligned you need to "
-		       "rebuild your kernel with a bigger NODEMAPSIZE "
-		       "shift=%d\n", shift);
-		return -1;
-	}
-	return shift;
-}
-
-int __meminit  __early_pfn_to_nid(unsigned long pfn)
-{
-	return phys_to_nid(pfn << PAGE_SHIFT);
-}
-
-static void * __init early_node_mem(int nodeid, unsigned long start,
-				    unsigned long end, unsigned long size,
-				    unsigned long align)
-{
-	unsigned long mem;
-
-	/*
-	 * put it on high as possible
-	 * something will go with NODE_DATA
-	 */
-	if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
-		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
-	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
-		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	/* extend the search scope */
-	end = max_pfn_mapped << PAGE_SHIFT;
-	start = MAX_DMA_PFN << PAGE_SHIFT;
-	mem = memblock_find_in_range(start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
-		       size, nodeid);
-
-	return NULL;
-}
-
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
-				     struct numa_meminfo *mi)
-{
-	/* ignore zero length blks */
-	if (start == end)
-		return 0;
-
-	/* whine about and ignore invalid blks */
-	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
-		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
-			   nid, start, end);
-		return 0;
-	}
-
-	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
-		pr_err("NUMA: too many memblk ranges\n");
-		return -EINVAL;
-	}
-
-	mi->blk[mi->nr_blks].start = start;
-	mi->blk[mi->nr_blks].end = end;
-	mi->blk[mi->nr_blks].nid = nid;
-	mi->nr_blks++;
-	return 0;
-}
-
-/**
- * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
- * @idx: Index of memblk to remove
- * @mi: numa_meminfo to remove memblk from
- *
- * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
- * decrementing @mi->nr_blks.
- */
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
-{
-	mi->nr_blks--;
-	memmove(&mi->blk[idx], &mi->blk[idx + 1],
-		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
-	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
-/* Initialize bootmem allocator for a node */
-void __init
-setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{
-	unsigned long start_pfn, last_pfn, nodedata_phys;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	int nid;
-
-	if (!end)
-		return;
-
-	/*
-	 * Don't confuse VM with a node that doesn't have the
-	 * minimum amount of memory:
-	 */
-	if (end && (end - start) < NODE_MIN_SIZE)
-		return;
-
-	start = roundup(start, ZONE_ALIGN);
-
-	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
-	       start, end);
-
-	start_pfn = start >> PAGE_SHIFT;
-	last_pfn = end >> PAGE_SHIFT;
-
-	node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
-					   SMP_CACHE_BYTES);
-	if (node_data[nodeid] == NULL)
-		return;
-	nodedata_phys = __pa(node_data[nodeid]);
-	memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
-	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
-		nodedata_phys + pgdat_size - 1);
-	nid = phys_to_nid(nodedata_phys);
-	if (nid != nodeid)
-		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
-
-	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->node_id = nodeid;
-	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
-
-	node_set_online(nodeid);
-}
-
-/**
- * numa_cleanup_meminfo - Cleanup a numa_meminfo
- * @mi: numa_meminfo to clean up
- *
- * Sanitize @mi by merging and removing unncessary memblks.  Also check for
- * conflicts and clear unused memblks.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
-{
-	const u64 low = 0;
-	const u64 high = (u64)max_pfn << PAGE_SHIFT;
-	int i, j, k;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		struct numa_memblk *bi = &mi->blk[i];
-
-		/* make sure all blocks are inside the limits */
-		bi->start = max(bi->start, low);
-		bi->end = min(bi->end, high);
-
-		/* and there's no empty block */
-		if (bi->start == bi->end) {
-			numa_remove_memblk_from(i--, mi);
-			continue;
-		}
-
-		for (j = i + 1; j < mi->nr_blks; j++) {
-			struct numa_memblk *bj = &mi->blk[j];
-			unsigned long start, end;
-
-			/*
-			 * See whether there are overlapping blocks.  Whine
-			 * about but allow overlaps of the same nid.  They
-			 * will be merged below.
-			 */
-			if (bi->end > bj->start && bi->start < bj->end) {
-				if (bi->nid != bj->nid) {
-					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
-					       bi->nid, bi->start, bi->end,
-					       bj->nid, bj->start, bj->end);
-					return -EINVAL;
-				}
-				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
-					   bi->nid, bi->start, bi->end,
-					   bj->start, bj->end);
-			}
-
-			/*
-			 * Join together blocks on the same node, holes
-			 * between which don't overlap with memory on other
-			 * nodes.
-			 */
-			if (bi->nid != bj->nid)
-				continue;
-			start = max(min(bi->start, bj->start), low);
-			end = min(max(bi->end, bj->end), high);
-			for (k = 0; k < mi->nr_blks; k++) {
-				struct numa_memblk *bk = &mi->blk[k];
-
-				if (bi->nid == bk->nid)
-					continue;
-				if (start < bk->end && end > bk->start)
-					break;
-			}
-			if (k < mi->nr_blks)
-				continue;
-			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
-			       bi->nid, bi->start, bi->end, bj->start, bj->end,
-			       start, end);
-			bi->start = start;
-			bi->end = end;
-			numa_remove_memblk_from(j--, mi);
-		}
-	}
-
-	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
-		mi->blk[i].start = mi->blk[i].end = 0;
-		mi->blk[i].nid = NUMA_NO_NODE;
-	}
-
-	return 0;
-}
-
-/*
- * Set nodes, which have memory in @mi, in *@nodemask.
- */
-static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
-					      const struct numa_meminfo *mi)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
-		if (mi->blk[i].start != mi->blk[i].end &&
-		    mi->blk[i].nid != NUMA_NO_NODE)
-			node_set(mi->blk[i].nid, *nodemask);
-}
-
-/**
- * numa_reset_distance - Reset NUMA distance table
- *
- * The current table is freed.  The next numa_set_distance() call will
- * create a new one.
- */
-void __init numa_reset_distance(void)
-{
-	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
-
-	/* numa_distance could be 1LU marking allocation failure, test cnt */
-	if (numa_distance_cnt)
-		memblock_x86_free_range(__pa(numa_distance),
-					__pa(numa_distance) + size);
-	numa_distance_cnt = 0;
-	numa_distance = NULL;	/* enable table creation */
-}
-
-static int __init numa_alloc_distance(void)
-{
-	nodemask_t nodes_parsed;
-	size_t size;
-	int i, j, cnt = 0;
-	u64 phys;
-
-	/* size the new table and allocate it */
-	nodes_parsed = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
-
-	for_each_node_mask(i, nodes_parsed)
-		cnt = i;
-	cnt++;
-	size = cnt * cnt * sizeof(numa_distance[0]);
-
-	phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
-				      size, PAGE_SIZE);
-	if (phys == MEMBLOCK_ERROR) {
-		pr_warning("NUMA: Warning: can't allocate distance table!\n");
-		/* don't retry until explicitly reset */
-		numa_distance = (void *)1LU;
-		return -ENOMEM;
-	}
-	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
-
-	numa_distance = __va(phys);
-	numa_distance_cnt = cnt;
-
-	/* fill with the default distances */
-	for (i = 0; i < cnt; i++)
-		for (j = 0; j < cnt; j++)
-			numa_distance[i * cnt + j] = i == j ?
-				LOCAL_DISTANCE : REMOTE_DISTANCE;
-	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
-
-	return 0;
-}
-
-/**
- * numa_set_distance - Set NUMA distance from one NUMA to another
- * @from: the 'from' node to set distance
- * @to: the 'to'  node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance.  If distance table
- * doesn't exist, one which is large enough to accommodate all the currently
- * known nodes will be created.
- *
- * If such table cannot be allocated, a warning is printed and further
- * calls are ignored until the distance table is reset with
- * numa_reset_distance().
- *
- * If @from or @to is higher than the highest known node at the time of
- * table creation or @distance doesn't make sense, the call is ignored.
- * This is to allow simplification of specific NUMA config implementations.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
-	if (!numa_distance && numa_alloc_distance() < 0)
-		return;
-
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
-		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
-			    from, to, distance);
-		return;
-	}
-
-	if ((u8)distance != distance ||
-	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
-			     from, to, distance);
-		return;
-	}
-
-	numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-int __node_distance(int from, int to)
-{
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
-		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
-	return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
-/*
- * Sanity check to catch more bad NUMA configurations (they are amazingly
- * common).  Make sure the nodes cover all memory.
- */
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
-{
-	unsigned long numaram, e820ram;
-	int i;
-
-	numaram = 0;
-	for (i = 0; i < mi->nr_blks; i++) {
-		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
-		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
-		numaram += e - s;
-		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
-		if ((long)numaram < 0)
-			numaram = 0;
-	}
-
-	e820ram = max_pfn - (memblock_x86_hole_size(0,
-					max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
-	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
-	if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
-		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
-		       (numaram << PAGE_SHIFT) >> 20,
-		       (e820ram << PAGE_SHIFT) >> 20);
-		return false;
-	}
-	return true;
-}
-
-static int __init numa_register_memblks(struct numa_meminfo *mi)
-{
-	int i, nid;
-
-	/* Account for nodes with cpus and no memory */
-	node_possible_map = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&node_possible_map, mi);
-	if (WARN_ON(nodes_empty(node_possible_map)))
-		return -EINVAL;
-
-	memnode_shift = compute_hash_shift(mi);
-	if (memnode_shift < 0) {
-		printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < mi->nr_blks; i++)
-		memblock_x86_register_active_regions(mi->blk[i].nid,
-					mi->blk[i].start >> PAGE_SHIFT,
-					mi->blk[i].end >> PAGE_SHIFT);
-
-	/* for out of order entries */
-	sort_node_map();
-	if (!numa_meminfo_cover_memory(mi))
-		return -EINVAL;
-
-	/* Finally register nodes. */
-	for_each_node_mask(nid, node_possible_map) {
-		u64 start = (u64)max_pfn << PAGE_SHIFT;
-		u64 end = 0;
-
-		for (i = 0; i < mi->nr_blks; i++) {
-			if (nid != mi->blk[i].nid)
-				continue;
-			start = min(mi->blk[i].start, start);
-			end = max(mi->blk[i].end, end);
-		}
-
-		if (start < end)
-			setup_node_bootmem(nid, start, end);
-	}
-
-	return 0;
-}
-
-/**
- * dummy_numma_init - Fallback dummy NUMA init
- *
- * Used if there's no underlying NUMA architecture, NUMA initialization
- * fails, or NUMA is disabled on the command line.
- *
- * Must online at least one node and add memory blocks that cover all
- * allowed memory.  This function must not fail.
- */
-static int __init dummy_numa_init(void)
-{
-	printk(KERN_INFO "%s\n",
-	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
-	       0LU, max_pfn << PAGE_SHIFT);
-
-	node_set(0, numa_nodes_parsed);
-	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-
-	return 0;
-}
-
-static int __init numa_init(int (*init_func)(void))
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, NUMA_NO_NODE);
-
-	nodes_clear(numa_nodes_parsed);
-	nodes_clear(node_possible_map);
-	nodes_clear(node_online_map);
-	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-	remove_all_active_ranges();
-	numa_reset_distance();
-
-	ret = init_func();
-	if (ret < 0)
-		return ret;
-	ret = numa_cleanup_meminfo(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	numa_emulation(&numa_meminfo, numa_distance_cnt);
-
-	ret = numa_register_memblks(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	for (i = 0; i < nr_cpu_ids; i++) {
-		int nid = early_cpu_to_node(i);
-
-		if (nid == NUMA_NO_NODE)
-			continue;
-		if (!node_online(nid))
-			numa_clear_node(i);
-	}
-	numa_init_array();
-	return 0;
-}
-
 void __init initmem_init(void)
 {
-	int ret;
-
-	if (!numa_off) {
-#ifdef CONFIG_ACPI_NUMA
-		ret = numa_init(x86_acpi_numa_init);
-		if (!ret)
-			return;
-#endif
-#ifdef CONFIG_AMD_NUMA
-		ret = numa_init(amd_numa_init);
-		if (!ret)
-			return;
-#endif
-	}
-
-	numa_init(dummy_numa_init);
+	x86_numa_init();
 }
 
 unsigned long __init numa_free_all_bootmem(void)
@@ -656,12 +23,3 @@ unsigned long __init numa_free_all_bootmem(void)
 
 	return pages;
 }
-
-int __cpuinit numa_cpu_node(int cpu)
-{
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index de84cc14037..d0ed086b624 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -5,6 +5,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 #include <asm/dma.h>
 
 #include "numa_internal.h"
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 		nr_nodes = MAX_NUMNODES;
 	}
 
-	size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+	/*
+	 * Calculate target node size.  x86_32 freaks on __udivdi3() so do
+	 * the division in ulong number of pages and convert back.
+	 */
+	size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
+	size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
+
 	/*
 	 * Calculate the number of big nodes that can be allocated as a result
 	 * of consolidating the remainder.
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	 */
 	while (nodes_weight(physnode_mask)) {
 		for_each_node_mask(i, physnode_mask) {
-			u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+			u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
 			u64 start, limit, end;
 			int phys_blk;
 
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 {
 	static struct numa_meminfo ei __initdata;
 	static struct numa_meminfo pi __initdata;
-	const u64 max_addr = max_pfn << PAGE_SHIFT;
+	const u64 max_addr = PFN_PHYS(max_pfn);
 	u8 *phys_dist = NULL;
 	size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
 	int max_emu_nid, dfl_phys_nid;
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	if (numa_dist_cnt) {
 		u64 phys;
 
-		phys = memblock_find_in_range(0,
-					      (u64)max_pfn_mapped << PAGE_SHIFT,
+		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
 		if (phys == MEMBLOCK_ERROR) {
 			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index ef2d97377d7..7178c3afe05 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
 void __init numa_reset_distance(void);
 
+void __init x86_numa_init(void);
+
+#ifdef CONFIG_X86_64
+static inline void init_alloc_remap(int nid, u64 start, u64 end)	{ }
+#else
+void __init init_alloc_remap(int nid, u64 start, u64 end);
+#endif
+
 #ifdef CONFIG_NUMA_EMU
 void __init numa_emulation(struct numa_meminfo *numa_meminfo,
 			   int numa_dist_cnt);
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 38e6d174c49..9f0614daea8 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -414,22 +414,17 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 	unsigned char *p;
 	struct prefix_bits prf;
 	int i;
-	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
-		if (reg_rop[i] == opcode) {
-			rv = REG_READ;
+		if (reg_rop[i] == opcode)
 			goto do_work;
-		}
 
 	for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
-		if (reg_wop[i] == opcode) {
-			rv = REG_WRITE;
+		if (reg_wop[i] == opcode)
 			goto do_work;
-		}
 
 	printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
 							"0x%02x\n", opcode);
@@ -474,16 +469,13 @@ unsigned long get_ins_imm_val(unsigned long ins_addr)
 	unsigned char *p;
 	struct prefix_bits prf;
 	int i;
-	unsigned long rv;
 
 	p = (unsigned char *)ins_addr;
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
-		if (imm_wop[i] == opcode) {
-			rv = IMM_WRITE;
+		if (imm_wop[i] == opcode)
 			goto do_work;
-		}
 
 	printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
 							"0x%02x\n", opcode);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat.c
index 8e9d3394f6d..81dbfdeb080 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat.c
@@ -26,8 +26,6 @@
 
 int acpi_numa __initdata;
 
-static struct bootnode nodes_add[MAX_NUMNODES];
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -37,7 +35,6 @@ static __init void bad_srat(void)
 {
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	memset(nodes_add, 0, sizeof(nodes_add));
 }
 
 static __init inline int srat_disabled(void)
@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
-/*
- * Update nodes_add[]
- * This code supports one contiguous hot add area per node
- */
-static void __init
-update_nodes_add(int node, unsigned long start, unsigned long end)
-{
-	unsigned long s_pfn = start >> PAGE_SHIFT;
-	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int changed = 0;
-	struct bootnode *nd = &nodes_add[node];
-
-	/* I had some trouble with strange memory hotadd regions breaking
-	   the boot. Be very strict here and reject anything unexpected.
-	   If you want working memory hotadd write correct SRATs.
-
-	   The node size check is a basic sanity check to guard against
-	   mistakes */
-	if ((signed long)(end - start) < NODE_MIN_SIZE) {
-		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return;
-	}
-
-	/* This check might be a bit too strict, but I'm keeping it for now. */
-	if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
-		printk(KERN_ERR
-			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
-			s_pfn, e_pfn);
-		return;
-	}
-
-	/* Looks good */
-
-	if (nd->start == nd->end) {
-		nd->start = start;
-		nd->end = end;
-		changed = 1;
-	} else {
-		if (nd->start == end) {
-			nd->start = start;
-			changed = 1;
-		}
-		if (nd->end == start) {
-			nd->end = end;
-			changed = 1;
-		}
-		if (!changed)
-			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
-	}
-
-	if (changed) {
-		node_set(node, numa_nodes_parsed);
-		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
-				 nd->start, nd->end);
-	}
-}
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 void __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
-	unsigned long start, end;
+	u64 start, end;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		return;
 	}
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
+	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	       start, end);
-
-	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
-		update_nodes_add(node, start, end);
 }
 
 void __init acpi_numa_arch_fixup(void) {}
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void)
 		return ret;
 	return srat_disabled() ? -EINVAL : 0;
 }
-
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	int i, ret = 0;
-
-	for_each_node(i)
-		if (nodes_add[i].start <= start && nodes_add[i].end > start)
-			ret = i;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
deleted file mode 100644
index 364f36bdfad..00000000000
--- a/arch/x86/mm/srat_32.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit 
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.          
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/acpi.h>
-#include <linux/nodemask.h>
-#include <asm/srat.h>
-#include <asm/topology.h>
-#include <asm/smp.h>
-#include <asm/e820.h>
-
-/*
- * proximity macros and definitions
- */
-#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
-#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
-#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
-#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-/* bitmap length; _PXM is at most 255 */
-#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
-static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
-
-#define MAX_CHUNKS_PER_NODE	3
-#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
-struct node_memory_chunk_s {
-	unsigned long	start_pfn;
-	unsigned long	end_pfn;
-	u8	pxm;		// proximity domain of node
-	u8	nid;		// which cnode contains this chunk?
-	u8	bank;		// which mem bank on this node
-};
-static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
-
-static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
-
-int acpi_numa __initdata;
-
-static __init void bad_srat(void)
-{
-        printk(KERN_ERR "SRAT: SRAT not used.\n");
-        acpi_numa = -1;
-	num_memory_chunks = 0;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return numa_off || acpi_numa < 0;
-}
-
-/* Identify CPU proximity domains */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
-{
-	if (srat_disabled())
-		return;
-	if (cpu_affinity->header.length !=
-	     sizeof(struct acpi_srat_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;		/* empty entry */
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
-
-	/* don't need to check apic_id here, because it is always 8 bits */
-	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
-
-	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
-		cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
-}
-
-/*
- * Identify memory proximity domains and hot-remove capabilities.
- * Fill node memory chunk list structure.
- */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
-{
-	unsigned long long paddr, size;
-	unsigned long start_pfn, end_pfn;
-	u8 pxm;
-	struct node_memory_chunk_s *p, *q, *pend;
-
-	if (srat_disabled())
-		return;
-	if (memory_affinity->header.length !=
-	     sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		return;		/* empty entry */
-
-	pxm = memory_affinity->proximity_domain & 0xff;
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, pxm);
-
-	/* calculate info for memory chunk structure */
-	paddr = memory_affinity->base_address;
-	size = memory_affinity->length;
-
-	start_pfn = paddr >> PAGE_SHIFT;
-	end_pfn = (paddr + size) >> PAGE_SHIFT;
-
-
-	if (num_memory_chunks >= MAXCHUNKS) {
-		printk(KERN_WARNING "Too many mem chunks in SRAT."
-			" Ignoring %lld MBytes at %llx\n",
-			size/(1024*1024), paddr);
-		return;
-	}
-
-	/* Insertion sort based on base address */
-	pend = &node_memory_chunk[num_memory_chunks];
-	for (p = &node_memory_chunk[0]; p < pend; p++) {
-		if (start_pfn < p->start_pfn)
-			break;
-	}
-	if (p < pend) {
-		for (q = pend; q >= p; q--)
-			*(q + 1) = *q;
-	}
-	p->start_pfn = start_pfn;
-	p->end_pfn = end_pfn;
-	p->pxm = pxm;
-
-	num_memory_chunks++;
-
-	printk(KERN_DEBUG "Memory range %08lx to %08lx"
-			  " in proximity domain %02x %s\n",
-		start_pfn, end_pfn,
-		pxm,
-		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
-		 "enabled and removable" : "enabled" ) );
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-}
-
-void acpi_numa_arch_fixup(void)
-{
-}
-/*
- * The SRAT table always lists ascending addresses, so can always
- * assume that the first "start" address that you see is the real
- * start of the node, and that the current "end" address is after
- * the previous one.
- */
-static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
-{
-	/*
-	 * Only add present memory as told by the e820.
-	 * There is no guarantee from the SRAT that the memory it
-	 * enumerates is present at boot time because it represents
-	 * *possible* memory hotplug areas the same as normal RAM.
-	 */
-	if (memory_chunk->start_pfn >= max_pfn) {
-		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
-			memory_chunk->start_pfn, memory_chunk->end_pfn);
-		return -1;
-	}
-	if (memory_chunk->nid != nid)
-		return -1;
-
-	if (!node_has_online_mem(nid))
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_start_pfn[nid] > memory_chunk->start_pfn)
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_end_pfn[nid] < memory_chunk->end_pfn)
-		node_end_pfn[nid] = memory_chunk->end_pfn;
-
-	return 0;
-}
-
-int __init get_memcfg_from_srat(void)
-{
-	int i, j, nid;
-
-	if (srat_disabled())
-		goto out_fail;
-
-	if (acpi_numa_init() < 0)
-		goto out_fail;
-
-	if (num_memory_chunks == 0) {
-		printk(KERN_DEBUG
-			 "could not find any ACPI SRAT memory areas.\n");
-		goto out_fail;
-	}
-
-	/* Calculate total number of nodes in system from PXM bitmap and create
-	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
-	 * to specify the range of _PXM values.)
-	 */
-	/*
-	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
-	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
-	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
-	 * approaches MAX_PXM_DOMAINS for i386.
-	 */
-	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
-		if (BMAP_TEST(pxm_bitmap, i)) {
-			int nid = acpi_map_pxm_to_node(i);
-			node_set_online(nid);
-		}
-	}
-	BUG_ON(num_online_nodes() == 0);
-
-	/* set cnode id in memory chunk structure */
-	for (i = 0; i < num_memory_chunks; i++)
-		node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
-
-	printk(KERN_DEBUG "pxm bitmap: ");
-	for (i = 0; i < sizeof(pxm_bitmap); i++) {
-		printk(KERN_CONT "%02x ", pxm_bitmap[i]);
-	}
-	printk(KERN_CONT "\n");
-	printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
-			 num_online_nodes());
-	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
-			 num_memory_chunks);
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
-
-	for (j = 0; j < num_memory_chunks; j++){
-		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
-		printk(KERN_DEBUG
-			"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
-		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
-		if (node_read_chunk(chunk->nid, chunk))
-			continue;
-
-		memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
-					     min(chunk->end_pfn, max_pfn));
-	}
-	/* for out of order entries in SRAT */
-	sort_node_map();
-
-	for_each_online_node(nid) {
-		unsigned long start = node_start_pfn[nid];
-		unsigned long end = min(node_end_pfn[nid], max_pfn);
-
-		memory_present(nid, start, end);
-		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
-	}
-	return 1;
-out_fail:
-	printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
-			" table\n");
-	return 0;
-}
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 00000000000..90568c33ddb
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 00000000000..66870223f8c
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
+/* bpf_jit.S : BPF JIT helper functions
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Calling convention :
+ * rdi : skb pointer
+ * esi : offset of byte(s) to fetch in skb (can be scratched)
+ * r8  : copy of skb->data
+ * r9d : hlen = skb->len - skb->data_len
+ */
+#define SKBDATA	%r8
+
+sk_load_word_ind:
+	.globl	sk_load_word_ind
+
+	add	%ebx,%esi	/* offset += X */
+#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
+	js	bpf_error
+
+sk_load_word:
+	.globl	sk_load_word
+
+	mov	%r9d,%eax		# hlen
+	sub	%esi,%eax		# hlen - offset
+	cmp	$3,%eax
+	jle	bpf_slow_path_word
+	mov     (SKBDATA,%rsi),%eax
+	bswap   %eax  			/* ntohl() */
+	ret
+
+
+sk_load_half_ind:
+	.globl sk_load_half_ind
+
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_half:
+	.globl	sk_load_half
+
+	mov	%r9d,%eax
+	sub	%esi,%eax		#	hlen - offset
+	cmp	$1,%eax
+	jle	bpf_slow_path_half
+	movzwl	(SKBDATA,%rsi),%eax
+	rol	$8,%ax			# ntohs()
+	ret
+
+sk_load_byte_ind:
+	.globl sk_load_byte_ind
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_byte:
+	.globl	sk_load_byte
+
+	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
+	jle	bpf_slow_path_byte
+	movzbl	(SKBDATA,%rsi),%eax
+	ret
+
+/**
+ * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
+ *
+ * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
+ * Must preserve A accumulator (%eax)
+ * Inputs : %esi is the offset value, already known positive
+ */
+ENTRY(sk_load_byte_msh)
+	CFI_STARTPROC
+	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
+	jle	bpf_slow_path_byte_msh
+	movzbl	(SKBDATA,%rsi),%ebx
+	and	$15,%bl
+	shl	$2,%bl
+	ret
+	CFI_ENDPROC
+ENDPROC(sk_load_byte_msh)
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
+/* rsi contains offset and can be scratched */
+#define bpf_slow_path_common(LEN)		\
+	push	%rdi;    /* save skb */		\
+	push	%r9;				\
+	push	SKBDATA;			\
+/* rsi already has offset */			\
+	mov	$LEN,%ecx;	/* len */	\
+	lea	-12(%rbp),%rdx;			\
+	call	skb_copy_bits;			\
+	test    %eax,%eax;			\
+	pop	SKBDATA;			\
+	pop	%r9;				\
+	pop	%rdi
+
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	js	bpf_error
+	mov	-12(%rbp),%eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	js	bpf_error
+	mov	-12(%rbp),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	ret
+
+bpf_slow_path_byte_msh:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 00000000000..bfab3fa10ed
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
+/* bpf_jit_comp.c : BPF JIT compiler
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+
+/*
+ * Conventions :
+ *  EAX : BPF A accumulator
+ *  EBX : BPF X accumulator
+ *  RDI : pointer to skb   (first argument given to JIT function)
+ *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
+ *  ECX,EDX,ESI : scratch registers
+ *  r9d : skb->len - skb->data_len (headlen)
+ *  r8  : skb->data
+ * -8(RBP) : saved RBX value
+ * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+	if (len == 1)
+		*ptr = bytes;
+	else if (len == 2)
+		*(u16 *)ptr = bytes;
+	else {
+		*(u32 *)ptr = bytes;
+		barrier();
+	}
+	return ptr + len;
+}
+
+#define EMIT(bytes, len)	do { prog = emit_code(prog, bytes, len); } while (0)
+
+#define EMIT1(b1)		EMIT(b1, 1)
+#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
+
+#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
+#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+
+static inline bool is_imm8(int value)
+{
+	return value <= 127 && value >= -128;
+}
+
+static inline bool is_near(int offset)
+{
+	return offset <= 127 && offset >= -128;
+}
+
+#define EMIT_JMP(offset)						\
+do {									\
+	if (offset) {							\
+		if (is_near(offset))					\
+			EMIT2(0xeb, offset); /* jmp .+off8 */		\
+		else							\
+			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
+	}								\
+} while (0)
+
+/* list of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define X86_JB  0x72
+#define X86_JAE 0x73
+#define X86_JE  0x74
+#define X86_JNE 0x75
+#define X86_JBE 0x76
+#define X86_JA  0x77
+
+#define EMIT_COND_JMP(op, offset)				\
+do {								\
+	if (is_near(offset))					\
+		EMIT2(op, offset); /* jxx .+off8 */		\
+	else {							\
+		EMIT2(0x0f, op + 0x10);				\
+		EMIT(offset, 4); /* jxx .+off32 */		\
+	}							\
+} while (0)
+
+#define COND_SEL(CODE, TOP, FOP)	\
+	case CODE:			\
+		t_op = TOP;		\
+		f_op = FOP;		\
+		goto cond_branch
+
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG    2 /* ebx is used */
+#define SEEN_MEM     4 /* use mem[] for temporary storage */
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+	set_fs(old_fs);
+}
+
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	u8 temp[64];
+	u8 *prog;
+	unsigned int proglen, oldproglen = 0;
+	int ilen, i;
+	int t_offset, f_offset;
+	u8 t_op, f_op, seen = 0, pass;
+	u8 *image = NULL;
+	u8 *func;
+	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+	unsigned int cleanup_addr; /* epilogue code offset */
+	unsigned int *addrs;
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/* Before first pass, make a rough estimation of addrs[]
+	 * each bpf instruction is translated to less than 64 bytes
+	 */
+	for (proglen = 0, i = 0; i < flen; i++) {
+		proglen += 64;
+		addrs[i] = proglen;
+	}
+	cleanup_addr = proglen; /* epilogue address */
+
+	for (pass = 0; pass < 10; pass++) {
+		/* no prologue/epilogue for trivial filters (RET something) */
+		proglen = 0;
+		prog = temp;
+
+		if (seen) {
+			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
+			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
+			/* note : must save %rbx in case bpf_error is hit */
+			if (seen & (SEEN_XREG | SEEN_DATAREF))
+				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
+			if (seen & SEEN_XREG)
+				CLEAR_X(); /* make sure we dont leek kernel memory */
+
+			/*
+			 * If this filter needs to access skb data,
+			 * loads r9 and r8 with :
+			 *  r9 = skb->len - skb->data_len
+			 *  r8 = skb->data
+			 */
+			if (seen & SEEN_DATAREF) {
+				if (offsetof(struct sk_buff, len) <= 127)
+					/* mov    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
+				else {
+					/* mov    off32(%rdi),%r9d */
+					EMIT3(0x44, 0x8b, 0x8f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				if (is_imm8(offsetof(struct sk_buff, data_len)))
+					/* sub    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
+				else {
+					EMIT3(0x44, 0x2b, 0x8f);
+					EMIT(offsetof(struct sk_buff, data_len), 4);
+				}
+
+				if (is_imm8(offsetof(struct sk_buff, data)))
+					/* mov off8(%rdi),%r8 */
+					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
+				else {
+					/* mov off32(%rdi),%r8 */
+					EMIT3(0x4c, 0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, data), 4);
+				}
+			}
+		}
+
+		switch (filter[0].code) {
+		case BPF_S_RET_K:
+		case BPF_S_LD_W_LEN:
+		case BPF_S_ANC_PROTOCOL:
+		case BPF_S_ANC_IFINDEX:
+		case BPF_S_ANC_MARK:
+		case BPF_S_ANC_RXHASH:
+		case BPF_S_ANC_CPU:
+		case BPF_S_ANC_QUEUE:
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+			/* first instruction sets A register (or is RET 'constant') */
+			break;
+		default:
+			/* make sure we dont leak kernel information to user */
+			CLEAR_A(); /* A = 0 */
+		}
+
+		for (i = 0; i < flen; i++) {
+			unsigned int K = filter[i].k;
+
+			switch (filter[i].code) {
+			case BPF_S_ALU_ADD_X: /* A += X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
+				break;
+			case BPF_S_ALU_ADD_K: /* A += K; */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
+				else
+					EMIT1_off32(0x05, K);	/* add imm32,%eax */
+				break;
+			case BPF_S_ALU_SUB_X: /* A -= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
+				break;
+			case BPF_S_ALU_SUB_K: /* A -= K */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
+				else
+					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
+				break;
+			case BPF_S_ALU_MUL_X: /* A *= X; */
+				seen |= SEEN_XREG;
+				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
+				break;
+			case BPF_S_ALU_MUL_K: /* A *= K */
+				if (is_imm8(K))
+					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
+				else {
+					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
+					EMIT(K, 4);
+				}
+				break;
+			case BPF_S_ALU_DIV_X: /* A /= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 != -1)
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
+				else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
+				}
+				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
+				break;
+			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
+				EMIT(K, 4);
+				EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+				break;
+			case BPF_S_ALU_AND_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
+				break;
+			case BPF_S_ALU_AND_K:
+				if (K >= 0xFFFFFF00) {
+					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
+				} else if (K >= 0xFFFF0000) {
+					EMIT2(0x66, 0x25);	/* and imm16,%ax */
+					EMIT2(K, 2);
+				} else {
+					EMIT1_off32(0x25, K);	/* and imm32,%eax */
+				}
+				break;
+			case BPF_S_ALU_OR_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
+				break;
+			case BPF_S_ALU_OR_K:
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
+				else
+					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
+				break;
+			case BPF_S_ALU_LSH_X: /* A <<= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
+				break;
+			case BPF_S_ALU_LSH_K:
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe0); /* shl %eax */
+				else
+					EMIT3(0xc1, 0xe0, K);
+				break;
+			case BPF_S_ALU_RSH_X: /* A >>= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
+				break;
+			case BPF_S_ALU_RSH_K: /* A >>= K; */
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe8); /* shr %eax */
+				else
+					EMIT3(0xc1, 0xe8, K);
+				break;
+			case BPF_S_ALU_NEG:
+				EMIT2(0xf7, 0xd8);		/* neg %eax */
+				break;
+			case BPF_S_RET_K:
+				if (!K) {
+					if (pc_ret0 == -1)
+						pc_ret0 = i;
+					CLEAR_A();
+				} else {
+					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
+				}
+				/* fallinto */
+			case BPF_S_RET_A:
+				if (seen) {
+					if (i != flen - 1) {
+						EMIT_JMP(cleanup_addr - addrs[i]);
+						break;
+					}
+					if (seen & SEEN_XREG)
+						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
+					EMIT1(0xc9);		/* leaveq */
+				}
+				EMIT1(0xc3);		/* ret */
+				break;
+			case BPF_S_MISC_TAX: /* X = A */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
+				break;
+			case BPF_S_MISC_TXA: /* A = X */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
+				break;
+			case BPF_S_LD_IMM: /* A = K */
+				if (!K)
+					CLEAR_A();
+				else
+					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+				break;
+			case BPF_S_LDX_IMM: /* X = K */
+				seen |= SEEN_XREG;
+				if (!K)
+					CLEAR_X();
+				else
+					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
+				break;
+			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
+				seen |= SEEN_MEM;
+				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
+				seen |= SEEN_MEM;
+				EMIT3(0x89, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x89, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_LD_W_LEN: /*	A = skb->len; */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov    off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_LDX_W_LEN: /* X = skb->len; */
+				seen |= SEEN_XREG;
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov off8(%rdi),%ebx */
+					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x9f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+				if (is_imm8(offsetof(struct sk_buff, protocol))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, protocol), 4);
+				}
+				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
+				break;
+			case BPF_S_ANC_IFINDEX:
+				if (is_imm8(offsetof(struct sk_buff, dev))) {
+					/* movq off8(%rdi),%rax */
+					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
+				} else {
+					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
+					EMIT(offsetof(struct sk_buff, dev), 4);
+				}
+				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
+				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
+				EMIT(offsetof(struct net_device, ifindex), 4);
+				break;
+			case BPF_S_ANC_MARK:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+				if (is_imm8(offsetof(struct sk_buff, mark))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, mark), 4);
+				}
+				break;
+			case BPF_S_ANC_RXHASH:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+				if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, rxhash), 4);
+				}
+				break;
+			case BPF_S_ANC_QUEUE:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
+				}
+				break;
+			case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
+				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
+#else
+				CLEAR_A();
+#endif
+				break;
+			case BPF_S_LD_W_ABS:
+				func = sk_load_word;
+common_load:			seen |= SEEN_DATAREF;
+				if ((int)K < 0)
+					goto out;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call */
+				break;
+			case BPF_S_LD_H_ABS:
+				func = sk_load_half;
+				goto common_load;
+			case BPF_S_LD_B_ABS:
+				func = sk_load_byte;
+				goto common_load;
+			case BPF_S_LDX_B_MSH:
+				if ((int)K < 0) {
+					if (pc_ret0 != -1) {
+						EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+						break;
+					}
+					CLEAR_A();
+					EMIT_JMP(cleanup_addr - addrs[i]);
+					break;
+				}
+				seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
+				break;
+			case BPF_S_LD_W_IND:
+				func = sk_load_word_ind;
+common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
+				break;
+			case BPF_S_LD_H_IND:
+				func = sk_load_half_ind;
+				goto common_load_ind;
+			case BPF_S_LD_B_IND:
+				func = sk_load_byte_ind;
+				goto common_load_ind;
+			case BPF_S_JMP_JA:
+				t_offset = addrs[i + K] - addrs[i];
+				EMIT_JMP(t_offset);
+				break;
+			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
+			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
+
+cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
+				t_offset = addrs[i + filter[i].jt] - addrs[i];
+
+				/* same targets, can avoid doing the test :) */
+				if (filter[i].jt == filter[i].jf) {
+					EMIT_JMP(t_offset);
+					break;
+				}
+
+				switch (filter[i].code) {
+				case BPF_S_JMP_JGT_X:
+				case BPF_S_JMP_JGE_X:
+				case BPF_S_JMP_JEQ_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
+					break;
+				case BPF_S_JMP_JSET_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
+					break;
+				case BPF_S_JMP_JEQ_K:
+					if (K == 0) {
+						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
+						break;
+					}
+				case BPF_S_JMP_JGT_K:
+				case BPF_S_JMP_JGE_K:
+					if (K <= 127)
+						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+					else
+						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
+					break;
+				case BPF_S_JMP_JSET_K:
+					if (K <= 0xFF)
+						EMIT2(0xa8, K); /* test imm8,%al */
+					else if (!(K & 0xFFFF00FF))
+						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
+					else if (K <= 0xFFFF) {
+						EMIT2(0x66, 0xa9); /* test imm16,%ax */
+						EMIT(K, 2);
+					} else {
+						EMIT1_off32(0xa9, K); /* test imm32,%eax */
+					}
+					break;
+				}
+				if (filter[i].jt != 0) {
+					if (filter[i].jf)
+						t_offset += is_near(f_offset) ? 2 : 6;
+					EMIT_COND_JMP(t_op, t_offset);
+					if (filter[i].jf)
+						EMIT_JMP(f_offset);
+					break;
+				}
+				EMIT_COND_JMP(f_op, f_offset);
+				break;
+			default:
+				/* hmm, too complex filter, give up with jit compiler */
+				goto out;
+			}
+			ilen = prog - temp;
+			if (image) {
+				if (unlikely(proglen + ilen > oldproglen)) {
+					pr_err("bpb_jit_compile fatal error\n");
+					kfree(addrs);
+					module_free(NULL, image);
+					return;
+				}
+				memcpy(image + proglen, temp, ilen);
+			}
+			proglen += ilen;
+			addrs[i] = proglen;
+			prog = temp;
+		}
+		/* last bpf instruction is always a RET :
+		 * use it to give the cleanup instruction(s) addr
+		 */
+		cleanup_addr = proglen - 1; /* ret */
+		if (seen)
+			cleanup_addr -= 1; /* leaveq */
+		if (seen & SEEN_XREG)
+			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
+
+		if (image) {
+			WARN_ON(proglen != oldproglen);
+			break;
+		}
+		if (proglen == oldproglen) {
+			image = module_alloc(max_t(unsigned int,
+						   proglen,
+						   sizeof(struct work_struct)));
+			if (!image)
+				goto out;
+		}
+		oldproglen = proglen;
+	}
+	if (bpf_jit_enable > 1)
+		pr_err("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
+				       16, 1, image, proglen, false);
+
+		bpf_flush_icache(image, image + proglen);
+
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a3..a5b64ab4cd6 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,17 +16,6 @@
 #include <asm/stacktrace.h>
 #include <linux/compat.h>
 
-static void backtrace_warning_symbol(void *data, char *msg,
-				     unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	/* Yes, we want all stacks */
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static struct stacktrace_ops backtrace_ops = {
-	.warning	= backtrace_warning,
-	.warning_symbol	= backtrace_warning_symbol,
 	.stack		= backtrace_stack,
 	.address	= backtrace_address,
 	.walk_stack	= print_context_stack,
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e37b407a0ee..8214724ce54 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+					       DOMID_SELF);
 		if (irq < 0)
 			goto error;
 		dev_dbg(&dev->dev,
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
 					       (type == PCI_CAP_ID_MSIX) ?
 					       "pcifront-msi-x" :
-					       "pcifront-msi");
+					       "pcifront-msi",
+						DOMID_SELF);
 		if (irq < 0)
 			goto free;
 		i++;
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		struct physdev_map_pirq map_irq;
+		domid_t domid;
+
+		domid = ret = xen_find_device_domain_owner(dev);
+		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
+		 * hence check ret value for < 0. */
+		if (ret < 0)
+			domid = DOMID_SELF;
 
 		memset(&map_irq, 0, sizeof(map_irq));
-		map_irq.domid = DOMID_SELF;
+		map_irq.domid = domid;
 		map_irq.type = MAP_PIRQ_TYPE_MSI;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
 		if (ret) {
-			dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
+				 ret, domid);
 			goto out;
 		}
 
 		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
 					       map_irq.pirq, map_irq.index,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+						domid);
 		if (ret < 0)
 			goto out;
 	}
@@ -461,3 +472,78 @@ void __init xen_setup_pirqs(void)
 	}
 }
 #endif
+
+#ifdef CONFIG_XEN_DOM0
+struct xen_device_domain_owner {
+	domid_t domain;
+	struct pci_dev *dev;
+	struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	list_for_each_entry(owner, &dev_domain_list, list) {
+		if (owner->dev == dev)
+			return owner;
+	}
+	return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+	int domain = -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (owner)
+		domain = owner->domain;
+	spin_unlock(&dev_domain_list_spinlock);
+	return domain;
+}
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+	struct xen_device_domain_owner *owner;
+
+	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+	if (!owner)
+		return -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	if (find_device(dev)) {
+		spin_unlock(&dev_domain_list_spinlock);
+		kfree(owner);
+		return -EEXIST;
+	}
+	owner->domain = domain;
+	owner->dev = dev;
+	list_add_tail(&owner->list, &dev_domain_list);
+	spin_unlock(&dev_domain_list_spinlock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (!owner) {
+		spin_unlock(&dev_domain_list_spinlock);
+		return -ENODEV;
+	}
+	list_del(&owner->list);
+	spin_unlock(&dev_domain_list_spinlock);
+	kfree(owner);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+#endif
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index 2d6d226f2b1..e70be38ce03 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -347,7 +347,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16800 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				usb@d,1 {
@@ -357,7 +357,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16900 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				sata@e,0 {
@@ -367,7 +367,7 @@
 						   "pciclass0106";
 
 					reg = <0x17000 0x0 0x0 0x0 0x0>;
-					interrupts = <23 3>;
+					interrupts = <23 1>;
 				};
 
 				flash@f,0 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7c625..b30aa26a8df 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type,
 		       data_size, data);
 }
 
-static efi_status_t virt_efi_set_virtual_address_map(
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
-{
-	return efi_call_virt4(set_virtual_address_map,
-			      memory_map_size, descriptor_size,
-			      descriptor_version, virtual_map);
-}
-
 static efi_status_t __init phys_efi_set_virtual_address_map(
 	unsigned long memory_map_size,
 	unsigned long descriptor_size,
@@ -468,11 +457,25 @@ void __init efi_init(void)
 #endif
 }
 
+void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
+{
+	u64 addr, npages;
+
+	addr = md->virt_addr;
+	npages = md->num_pages;
+
+	memrange_efi_to_native(&addr, &npages);
+
+	if (executable)
+		set_memory_x(addr, npages);
+	else
+		set_memory_nx(addr, npages);
+}
+
 static void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
 	void *p;
-	u64 addr, npages;
 
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -481,10 +484,7 @@ static void __init runtime_code_page_mkexec(void)
 		if (md->type != EFI_RUNTIME_SERVICES_CODE)
 			continue;
 
-		addr = md->virt_addr;
-		npages = md->num_pages;
-		memrange_efi_to_native(&addr, &npages);
-		set_memory_x(addr, npages);
+		efi_set_executable(md, true);
 	}
 }
 
@@ -498,13 +498,42 @@ static void __init runtime_code_page_mkexec(void)
  */
 void __init efi_enter_virtual_mode(void)
 {
-	efi_memory_desc_t *md;
+	efi_memory_desc_t *md, *prev_md = NULL;
 	efi_status_t status;
 	unsigned long size;
 	u64 end, systab, addr, npages, end_pfn;
-	void *p, *va;
+	void *p, *va, *new_memmap = NULL;
+	int count = 0;
 
 	efi.systab = NULL;
+
+	/* Merge contiguous regions of the same type and attribute */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		u64 prev_size;
+		md = p;
+
+		if (!prev_md) {
+			prev_md = md;
+			continue;
+		}
+
+		if (prev_md->type != md->type ||
+		    prev_md->attribute != md->attribute) {
+			prev_md = md;
+			continue;
+		}
+
+		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
+			prev_md->num_pages += md->num_pages;
+			md->type = EFI_RESERVED_TYPE;
+			md->attribute = 0;
+			continue;
+		}
+		prev_md = md;
+	}
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
@@ -541,15 +570,21 @@ void __init efi_enter_virtual_mode(void)
 			systab += md->virt_addr - md->phys_addr;
 			efi.systab = (efi_system_table_t *) (unsigned long) systab;
 		}
+		new_memmap = krealloc(new_memmap,
+				      (count + 1) * memmap.desc_size,
+				      GFP_KERNEL);
+		memcpy(new_memmap + (count * memmap.desc_size), md,
+		       memmap.desc_size);
+		count++;
 	}
 
 	BUG_ON(!efi.systab);
 
 	status = phys_efi_set_virtual_address_map(
-		memmap.desc_size * memmap.nr_map,
+		memmap.desc_size * count,
 		memmap.desc_size,
 		memmap.desc_version,
-		memmap.phys_map);
+		(efi_memory_desc_t *)__pa(new_memmap));
 
 	if (status != EFI_SUCCESS) {
 		printk(KERN_ALERT "Unable to switch EFI into virtual mode "
@@ -572,11 +607,12 @@ void __init efi_enter_virtual_mode(void)
 	efi.set_variable = virt_efi_set_variable;
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
-	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
+	efi.set_virtual_address_map = NULL;
 	if (__supported_pte_mask & _PAGE_NX)
 		runtime_code_page_mkexec();
 	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 	memmap.map = NULL;
+	kfree(new_memmap);
 }
 
 /*
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a7ac3..2649426a790 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,22 +41,7 @@
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
 
-static void __init early_mapping_set_exec(unsigned long start,
-					  unsigned long end,
-					  int executable)
-{
-	unsigned long num_pages;
-
-	start &= PMD_MASK;
-	end = (end + PMD_SIZE - 1) & PMD_MASK;
-	num_pages = (end - start) >> PAGE_SHIFT;
-	if (executable)
-		set_memory_x((unsigned long)__va(start), num_pages);
-	else
-		set_memory_nx((unsigned long)__va(start), num_pages);
-}
-
-static void __init early_runtime_code_mapping_set_exec(int executable)
+static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
 	void *p;
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE) {
-			unsigned long end;
-			end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-			early_mapping_set_exec(md->phys_addr, end, executable);
-		}
+		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+			efi_set_executable(md, executable);
 	}
 }
 
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
 
-	early_runtime_code_mapping_set_exec(1);
+	early_code_mapping_set_exec(1);
 	local_irq_save(efi_flags);
 	vaddress = (unsigned long)__va(0x0UL);
 	save_pgd = *pgd_offset_k(0x0UL);
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void)
 	set_pgd(pgd_offset_k(0x0UL), save_pgd);
 	__flush_tlb_all();
 	local_irq_restore(efi_flags);
-	early_runtime_code_mapping_set_exec(0);
+	early_code_mapping_set_exec(0);
 }
 
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
@@ -107,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 		return ioremap(phys_addr, size);
 
 	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
-		return NULL;
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+		unsigned long top = last_map_pfn << PAGE_SHIFT;
+		efi_ioremap(top, size - (top - phys_addr), type);
+	}
 
 	return (void __iomem *)__va(phys_addr);
 }
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 275dbc19e2c..7000e74b308 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 	return 0;
 }
 
-void __init mrst_time_init(void)
+static void __init mrst_time_init(void)
 {
 	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
 	switch (mrst_timer_options) {
@@ -216,7 +216,7 @@ void __init mrst_time_init(void)
 	apbt_time_init();
 }
 
-void __cpuinit mrst_arch_setup(void)
+static void __cpuinit mrst_arch_setup(void)
 {
 	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
 		__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index c2a8cab65e5..81c5e2165c2 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,2 @@
-obj-$(CONFIG_OLPC)		+= olpc.o
+obj-$(CONFIG_OLPC)		+= olpc.o olpc_ofw.o olpc_dt.o
 obj-$(CONFIG_OLPC_XO1)		+= olpc-xo1.o
-obj-$(CONFIG_OLPC)		+= olpc_ofw.o
-obj-$(CONFIG_OF_PROMTREE)	+= olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index edaf3fe8dc5..0060fd59ea0 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 #include <asm/geode.h>
 #include <asm/setup.h>
@@ -187,41 +188,43 @@ err:
 }
 EXPORT_SYMBOL_GPL(olpc_ec_cmd);
 
-static bool __init check_ofw_architecture(void)
+static bool __init check_ofw_architecture(struct device_node *root)
 {
-	size_t propsize;
-	char olpc_arch[5];
-	const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
-	void *res[] = { &propsize };
+	const char *olpc_arch;
+	int propsize;
 
-	if (olpc_ofw("getprop", args, res)) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return false;
-	}
+	olpc_arch = of_get_property(root, "architecture", &propsize);
 	return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
 }
 
-static u32 __init get_board_revision(void)
+static u32 __init get_board_revision(struct device_node *root)
 {
-	size_t propsize;
-	__be32 rev;
-	const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
-	void *res[] = { &propsize };
-
-	if (olpc_ofw("getprop", args, res) || propsize != 4) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return cpu_to_be32(0);
-	}
-	return be32_to_cpu(rev);
+	int propsize;
+	const __be32 *rev;
+
+	rev = of_get_property(root, "board-revision-int", &propsize);
+	if (propsize != 4)
+		return 0;
+
+	return be32_to_cpu(*rev);
 }
 
 static bool __init platform_detect(void)
 {
-	if (!check_ofw_architecture())
+	struct device_node *root = of_find_node_by_path("/");
+	bool success;
+
+	if (!root)
 		return false;
-	olpc_platform_info.flags |= OLPC_F_PRESENT;
-	olpc_platform_info.boardrev = get_board_revision();
-	return true;
+
+	success = check_ofw_architecture(root);
+	if (success) {
+		olpc_platform_info.boardrev = get_board_revision(root);
+		olpc_platform_info.flags |= OLPC_F_PRESENT;
+	}
+
+	of_node_put(root);
+	return success;
 }
 
 static int __init add_xo1_platform_devices(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 044bda5b317..d39f63d017d 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -19,7 +19,9 @@
 #include <linux/kernel.h>
 #include <linux/bootmem.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/of_pdt.h>
+#include <asm/olpc.h>
 #include <asm/olpc_ofw.h>
 
 static phandle __init olpc_dt_getsibling(phandle node)
@@ -180,3 +182,20 @@ void __init olpc_dt_build_devicetree(void)
 	pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
 			prom_early_allocated);
 }
+
+/* A list of DT node/bus matches that we want to expose as platform devices */
+static struct of_device_id __initdata of_ids[] = {
+	{ .compatible = "olpc,xo1-battery" },
+	{ .compatible = "olpc,xo1-dcon" },
+	{ .compatible = "olpc,xo1-rtc" },
+	{},
+};
+
+static int __init olpc_create_platform_devices(void)
+{
+	if (machine_is_olpc())
+		return of_platform_bus_probe(NULL, of_ids, NULL);
+	else
+		return 0;
+}
+device_initcall(olpc_create_platform_devices);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f..c58e0ea39ef 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 					  struct mm_struct *mm,
 					  unsigned long va, unsigned int cpu)
 {
-	int tcpu;
-	int uvhub;
 	int locals = 0;
 	int remotes = 0;
 	int hubs = 0;
+	int tcpu;
+	int tpnode;
 	struct bau_desc *bau_desc;
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
 	struct bau_control *tbcp;
+	struct hub_and_pnode *hpp;
 
 	/* kernel was booted 'nobau' */
 	if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
-	/* cpu statistics */
 	for_each_cpu(tcpu, flush_mask) {
-		uvhub = uv_cpu_to_blade_id(tcpu);
-		bau_uvhub_set(uvhub, &bau_desc->distribution);
-		if (uvhub == bcp->uvhub)
+		/*
+		 * The distribution vector is a bit map of pnodes, relative
+		 * to the partition base pnode (and the partition base nasid
+		 * in the header).
+		 * Translate cpu to pnode and hub using an array stored
+		 * in local memory.
+		 */
+		hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
+		tpnode = hpp->pnode - bcp->partition_base_pnode;
+		bau_uvhub_set(tpnode, &bau_desc->distribution);
+		if (hpp->uvhub == bcp->uvhub)
 			locals++;
 		else
 			remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
  * an interrupt, but causes an error message to be returned to
  * the sender.
  */
-static void uv_enable_timeouts(void)
+static void __init uv_enable_timeouts(void)
 {
 	int uvhub;
 	int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
 }
 
 /*
- * initialize the sending side's sending buffers
+ * Initialize the sending side's sending buffers.
  */
 static void
-uv_activation_descriptor_init(int node, int pnode)
+uv_activation_descriptor_init(int node, int pnode, int base_pnode)
 {
 	int i;
 	int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
 	n = pa >> uv_nshift;
 	m = pa & uv_mmask;
 
+	/* the 14-bit pnode */
 	uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
 			      (n << UV_DESC_BASE_PNODE_SHIFT | m));
-
 	/*
-	 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
 	 * cpu even though we only use the first one; one descriptor can
 	 * describe a broadcast to 256 uv hubs.
 	 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
 		memset(bd2, 0, sizeof(struct bau_desc));
 		bd2->header.sw_ack_flag = 1;
 		/*
-		 * base_dest_nodeid is the nasid of the first uvhub
-		 * in the partition. The bit map will indicate uvhub numbers,
-		 * which are 0-N in a partition. Pnodes are unique system-wide.
+		 * The base_dest_nasid set in the message header is the nasid
+		 * of the first uvhub in the partition. The bit map will
+		 * indicate destination pnode numbers relative to that base.
+		 * They may not be consecutive if nasid striding is being used.
 		 */
-		bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
-		bd2->header.dest_subnodeid = 0x10; /* the LB */
+		bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
+		bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
 		bd2->header.command = UV_NET_ENDPOINT_INTD;
 		bd2->header.int_both = 1;
 		/*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
 /*
  * Initialization of each UV hub's structures
  */
-static void __init uv_init_uvhub(int uvhub, int vector)
+static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
 {
 	int node;
 	int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
 
 	node = uvhub_to_first_node(uvhub);
 	pnode = uv_blade_to_pnode(uvhub);
-	uv_activation_descriptor_init(node, pnode);
+	uv_activation_descriptor_init(node, pnode, base_pnode);
 	uv_payload_queue_init(node, pnode);
 	/*
-	 * the below initialization can't be in firmware because the
-	 * messaging IRQ will be determined by the OS
+	 * The below initialization can't be in firmware because the
+	 * messaging IRQ will be determined by the OS.
 	 */
 	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
 /*
  * initialize the bau_control structure for each cpu
  */
-static int __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
 {
 	int i;
 	int cpu;
+	int tcpu;
 	int pnode;
 	int uvhub;
 	int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bcp = &per_cpu(bau_control, cpu);
 		memset(bcp, 0, sizeof(struct bau_control));
 		pnode = uv_cpu_hub_info(cpu)->pnode;
+		if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
+			printk(KERN_EMERG
+				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
+				cpu, pnode, base_part_pnode,
+				UV_DISTRIBUTION_SIZE);
+			return 1;
+		}
+		bcp->osnode = cpu_to_node(cpu);
+		bcp->partition_base_pnode = uv_partition_base_pnode;
 		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
 		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
 		bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bdp->pnode = pnode;
 		/* kludge: 'assuming' one node per socket, and assuming that
 		   disabling a socket just leaves a gap in node numbers */
-		socket = (cpu_to_node(cpu) & 1);
+		socket = bcp->osnode & 1;
 		bdp->socket_mask |= (1 << socket);
 		sdp = &bdp->socket[socket];
 		sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
 nextsocket:
 			socket++;
 			socket_mask = (socket_mask >> 1);
+			/* each socket gets a local array of pnodes/hubs */
+			bcp = smaster;
+			bcp->target_hub_and_pnode = kmalloc_node(
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus(), GFP_KERNEL, bcp->osnode);
+			memset(bcp->target_hub_and_pnode, 0,
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus());
+			for_each_present_cpu(tcpu) {
+				bcp->target_hub_and_pnode[tcpu].pnode =
+					uv_cpu_hub_info(tcpu)->pnode;
+				bcp->target_hub_and_pnode[tcpu].uvhub =
+					uv_cpu_hub_info(tcpu)->numa_blade_id;
+			}
 		}
 	}
 	kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
 	spin_lock_init(&disable_lock);
 	congested_cycles = microsec_2_cycles(congested_response_us);
 
-	if (uv_init_per_cpu(nuvhubs)) {
-		nobau = 1;
-		return 0;
-	}
-
 	uv_partition_base_pnode = 0x7fffffff;
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++)
+	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		if (uv_blade_nr_possible_cpus(uvhub) &&
 			(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
 			uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
+	}
+
+	if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
+		nobau = 1;
+		return 0;
+	}
 
 	vector = UV_BAU_MESSAGE;
 	for_each_possible_blade(uvhub)
 		if (uv_blade_nr_possible_cpus(uvhub))
-			uv_init_uvhub(uvhub, vector);
+			uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
 
 	uv_enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9daf5d1af9f..0eb90184515 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = {
 	.rating		= 400,
 	.read		= uv_read_rtc,
 	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void)
 	if (!is_uv_system())
 		return -ENODEV;
 
-	clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-				clocksource_uv.shift);
-
 	/* If single blade, prefer tsc */
 	if (uv_num_possible_blades() == 1)
 		clocksource_uv.rating = 250;
 
-	rc = clocksource_register(&clocksource_uv);
+	rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
 	if (rc)
 		printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
 	else
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3c6a06cf72..dd7b88f2ec7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -235,7 +235,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	*dx &= maskedx;
 }
 
-static __init void xen_init_cpuid_mask(void)
+static void __init xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
 	unsigned int xsave_mask;
@@ -400,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 /*
  * load_gdt for early boot, when the gdt is only mapped once
  */
-static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
@@ -662,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
  * Version of write_gdt_entry for use at early boot-time needed to
  * update an entry as simply as possible.
  */
-static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 					    const void *desc, int type)
 {
 	switch (type) {
@@ -933,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	return ret;
 }
 
-static const struct pv_info xen_info __initdata = {
+static const struct pv_info xen_info __initconst = {
 	.paravirt_enabled = 1,
 	.shared_kernel_pmd = 0,
 
 	.name = "Xen",
 };
 
-static const struct pv_init_ops xen_init_ops __initdata = {
+static const struct pv_init_ops xen_init_ops __initconst = {
 	.patch = xen_patch,
 };
 
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.cpuid = xen_cpuid,
 
 	.set_debugreg = xen_set_debugreg,
@@ -1004,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.end_context_switch = xen_end_context_switch,
 };
 
-static const struct pv_apic_ops xen_apic_ops __initdata = {
+static const struct pv_apic_ops xen_apic_ops __initconst = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
 #endif
@@ -1055,7 +1055,7 @@ int xen_panic_handler_init(void)
 	return 0;
 }
 
-static const struct machine_ops __initdata xen_machine_ops = {
+static const struct machine_ops xen_machine_ops __initconst = {
 	.restart = xen_restart,
 	.halt = xen_machine_halt,
 	.power_off = xen_machine_halt,
@@ -1332,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
 	.notifier_call	= xen_hvm_cpu_notify,
 };
 
@@ -1381,7 +1381,7 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
 	.name			= "Xen HVM",
 	.detect			= xen_hvm_platform,
 	.init_platform		= xen_hvm_guest_init,
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 6a6fe893964..8bbb465b6f0 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -113,7 +113,7 @@ static void xen_halt(void)
 		xen_safe_halt();
 }
 
-static const struct pv_irq_ops xen_irq_ops __initdata = {
+static const struct pv_irq_ops xen_irq_ops __initconst = {
 	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
 	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
 	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aef7af92b28..02d75246037 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void)
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now.
  */
-static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 				  enum pt_level level)
 {
 	SetPagePinned(page);
@@ -1187,7 +1187,7 @@ static void drop_other_mm_ref(void *info)
 
 	active_mm = percpu_read(cpu_tlbstate.active_mm);
 
-	if (active_mm == mm)
+	if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
 		leave_mm(smp_processor_id());
 
 	/* If this cpu still has a stale cr3 reference, then make sure
@@ -1271,13 +1271,27 @@ void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-static __init void xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(pgd_t *base)
 {
 }
 
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+	/* reserve the range used */
+	native_pagetable_reserve(start, end);
+
+	/* set as RW the rest */
+	printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+			PFN_PHYS(pgt_buf_top));
+	while (end < PFN_PHYS(pgt_buf_top)) {
+		make_lowmem_page_readwrite(__va(end));
+		end += PAGE_SIZE;
+	}
+}
+
 static void xen_post_allocator_init(void);
 
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static void __init xen_pagetable_setup_done(pgd_t *base)
 {
 	xen_setup_shared_info();
 	xen_post_allocator_init();
@@ -1474,7 +1488,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 }
 
 #ifdef CONFIG_X86_32
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
@@ -1484,7 +1498,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	return pte;
 }
 #else /* CONFIG_X86_64 */
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
 
@@ -1495,7 +1509,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	 * it is RO.
 	 */
 	if (((!is_early_ioremap_ptep(ptep) &&
-			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
+			pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
 			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
 		pte = pte_wrprotect(pte);
 
@@ -1505,7 +1519,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 
 /* Init-time set_pte while constructing initial pagetables, which
    doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 {
 	pte = mask_rw_pte(ptep, pte);
 
@@ -1523,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1533,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 }
 
 /* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1543,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void __init xen_release_pte_init(unsigned long pfn)
 {
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void __init xen_release_pmd_init(unsigned long pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -1675,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
 		BUG();
 }
 
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
 	unsigned ident_pte;
@@ -1758,7 +1772,7 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pud_t *l3;
@@ -1829,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
 static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
 
-static __init void xen_write_cr3_init(unsigned long cr3)
+static void __init xen_write_cr3_init(unsigned long cr3)
 {
 	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
 
@@ -1866,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
@@ -1972,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
 }
 
-__init void xen_ident_map_ISA(void)
+void __init xen_ident_map_ISA(void)
 {
 	unsigned long pa;
 
@@ -1995,7 +2009,7 @@ __init void xen_ident_map_ISA(void)
 	xen_flush_tlb();
 }
 
-static __init void xen_post_allocator_init(void)
+static void __init xen_post_allocator_init(void)
 {
 #ifdef CONFIG_XEN_DEBUG
 	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
@@ -2032,7 +2046,7 @@ static void xen_leave_lazy_mmu(void)
 	preempt_enable();
 }
 
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.read_cr2 = xen_read_cr2,
 	.write_cr2 = xen_write_cr2,
 
@@ -2105,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 void __init xen_init_mmu_ops(void)
 {
+	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
 	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0de8b0..58efeb9d544 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
 	/* Boundary cross-over for the edges: */
 	if (idx) {
 		unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
 
 		p2m_init(p2m);
 
 		p2m_top[topidx][mididx] = p2m;
 
+		/* For save/restore we need to MFN of the P2M saved */
+		
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+			"P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+			topidx, mididx);
+		mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
 	}
 	return idx != 0;
 }
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 		pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
 	{
 		unsigned topidx = p2m_top_index(pfn);
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
+		unsigned long **mid;
+
+		mid = p2m_top[topidx];
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		if (mid == p2m_mid_missing) {
+			mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
 
 			p2m_mid_init(mid);
 
 			p2m_top[topidx] = mid;
+
+			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+		}
+		/* And the save/restore P2M tables.. */
+		if (mid_mfn_p == p2m_mid_missing_mfn) {
+			mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+			p2m_mid_mfn_init(mid_mfn_p);
+
+			p2m_top_mfn_p[topidx] = mid_mfn_p;
+			p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+			/* Note: we don't set mid_mfn_p[midix] here,
+		 	 * look in __early_alloc_p2m */
 		}
 	}
 
@@ -650,7 +676,7 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page)
+int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -662,7 +688,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (!PageHighMem(page)) {
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
 		ptep = lookup_address(address, &level);
-
 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
@@ -674,18 +699,17 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
 		return -ENOMEM;
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		/* Just zap old mapping for now */
 		pte_clear(&init_mm, address, ptep);
-
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
 
 	return 0;
 }
-
-int m2p_remove_override(struct page *page)
+EXPORT_SYMBOL_GPL(m2p_add_override);
+int m2p_remove_override(struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -713,7 +737,7 @@ int m2p_remove_override(struct page *page)
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
 	set_phys_to_machine(pfn, page->index);
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		set_pte_at(&init_mm, address, ptep,
 				pfn_pte(pfn, PAGE_KERNEL));
 		/* No tlb flush necessary because the caller already
@@ -721,6 +745,7 @@ int m2p_remove_override(struct page *page)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(m2p_remove_override);
 
 struct page *m2p_find_override(unsigned long mfn)
 {
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 90bac0aac3a..be1a464f6d6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
  */
 #define EXTRA_MEM_RATIO		(10)
 
-static __init void xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(unsigned long pages)
 {
 	unsigned long pfn;
 
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
 		if (last > end)
 			continue;
 
-		if (entry->type == E820_RAM) {
+		if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
 			if (start > start_pci)
 				identity += set_phys_range_identity(
 						PFN_UP(start_pci), PFN_DOWN(start));
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void)
 
 	memcpy(map_raw, map, sizeof(map));
 	e820.nr_map = 0;
+#ifdef CONFIG_X86_32
+	xen_extra_mem_start = mem_end;
+#else
 	xen_extra_mem_start = max((1ULL << 32), mem_end);
+#endif
 	for (i = 0; i < memmap.nr_entries; i++) {
 		unsigned long long end;
 
@@ -336,7 +340,7 @@ static void __init fiddle_vdso(void)
 #endif
 }
 
-static __cpuinit int register_callback(unsigned type, const void *func)
+static int __cpuinit register_callback(unsigned type, const void *func)
 {
 	struct callback_register callback = {
 		.type = type,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed9..41038c01de4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,18 +46,17 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
 
-static __cpuinit void cpu_bringup(void)
+static void __cpuinit cpu_bringup(void)
 {
 	int cpu = smp_processor_id();
 
@@ -85,7 +84,7 @@ static __cpuinit void cpu_bringup(void)
 	wmb();			/* make sure everything is out */
 }
 
-static __cpuinit void cpu_bringup_and_idle(void)
+static void __cpuinit cpu_bringup_and_idle(void)
 {
 	cpu_bringup();
 	cpu_idle();
@@ -242,7 +241,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 }
 
-static __cpuinit int
+static int __cpuinit
 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 {
 	struct vcpu_guest_context *ctxt;
@@ -486,7 +485,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static const struct smp_ops xen_smp_ops __initdata = {
+static const struct smp_ops xen_smp_ops __initconst = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
 	.smp_cpus_done = xen_smp_cpus_done,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 2e2d370a47b..5158c505bef 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -26,8 +26,6 @@
 
 #include "xen-ops.h"
 
-#define XEN_SHIFT 22
-
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP	100000
 #define NS_PER_TICK	(1000000000LL / HZ)
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = {
 	.rating = 400,
 	.read = xen_clocksource_get_cycles,
 	.mask = ~0,
-	.mult = 1<<XEN_SHIFT,		/* time directly in nanoseconds */
-	.shift = XEN_SHIFT,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -439,16 +435,16 @@ void xen_timer_resume(void)
 	}
 }
 
-static const struct pv_time_ops xen_time_ops __initdata = {
+static const struct pv_time_ops xen_time_ops __initconst = {
 	.sched_clock = xen_clocksource_read,
 };
 
-static __init void xen_time_init(void)
+static void __init xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 	struct timespec tp;
 
-	clocksource_register(&xen_clocksource);
+	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
 		/* Successfully turned off 100Hz tick, so we have the
@@ -468,7 +464,7 @@ static __init void xen_time_init(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_init_time_ops(void)
+void __init xen_init_time_ops(void)
 {
 	pv_time_ops = xen_time_ops;
 
@@ -490,7 +486,7 @@ static void xen_hvm_setup_cpu_clockevents(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_hvm_init_time_ops(void)
+void __init xen_hvm_init_time_ops(void)
 {
 	/* vector callback is needed otherwise we cannot receive interrupts
 	 * on cpu > 0 and at this point we don't know how many cpus are
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3112f55638c..97dfdc8757b 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {}
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init xen_init_spinlocks(void);
-__cpuinit void xen_init_lock_cpu(int cpu);
+void __cpuinit xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 #else
 static inline void xen_init_spinlocks(void)