From 75da01e127f7db3b23effa6118336d303e7572a7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 Jan 2013 11:25:52 +0000
Subject: ARM: KVM: vgic: force EOIed LRs to the empty state

The VGIC doesn't guarantee that an EOIed LR that has been configured
to generate a maintenance interrupt will appear as empty.

While the code recovers from this situation, it is better to clean
the LR and flag it as empty so it can be quickly recycled.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/vgic.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index c9a17316e9f..76ea1aa5e7d 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -883,8 +883,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 			  lr, irq, vgic_cpu->vgic_lr[lr]);
 		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
 		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-
-		goto out;
+		return true;
 	}
 
 	/* Try to use another LR for this interrupt */
@@ -898,7 +897,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	set_bit(lr, vgic_cpu->lr_used);
 
-out:
 	if (!vgic_irq_is_edge(vcpu, irq))
 		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
 
@@ -1054,6 +1052,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			} else {
 				vgic_cpu_irq_clear(vcpu, irq);
 			}
+
+			/*
+			 * Despite being EOIed, the LR may not have
+			 * been marked as empty.
+			 */
+			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
+			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
 		}
 	}
 
-- 
cgit v1.2.3


From 33c83cb3c1d84b76c8270abe5487e77f83a81b22 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 1 Feb 2013 18:28:30 +0000
Subject: ARM: KVM: vgic: take distributor lock on sync_hwstate path

Now that the maintenance interrupt handling is actually out of the
handler itself, the code becomes quite racy as we can get preempted
while we process the state.

Wrapping this code around the distributor lock ensures that we're not
preempted and relatively race-free.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/vgic.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 76ea1aa5e7d..0e4cfe123b3 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1016,21 +1016,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
 	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
 
-	/*
-	 * We do not need to take the distributor lock here, since the only
-	 * action we perform is clearing the irq_active_bit for an EOIed
-	 * level interrupt.  There is a potential race with
-	 * the queuing of an interrupt in __kvm_vgic_flush_hwstate(), where we
-	 * check if the interrupt is already active. Two possibilities:
-	 *
-	 * - The queuing is occurring on the same vcpu: cannot happen,
-	 *   as we're already in the context of this vcpu, and
-	 *   executing the handler
-	 * - The interrupt has been migrated to another vcpu, and we
-	 *   ignore this interrupt for this run. Big deal. It is still
-	 *   pending though, and will get considered when this vcpu
-	 *   exits.
-	 */
 	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
 		/*
 		 * Some level interrupts have been EOIed. Clear their
@@ -1069,9 +1054,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Sync back the VGIC state after a guest run. We do not really touch
- * the distributor here (the irq_pending_on_cpu bit is safe to set),
- * so there is no need for taking its lock.
+ * Sync back the VGIC state after a guest run. The distributor lock is
+ * needed so we don't get preempted in the middle of the state processing.
  */
 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
@@ -1117,10 +1101,14 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
 	if (!irqchip_in_kernel(vcpu->kvm))
 		return;
 
+	spin_lock(&dist->lock);
 	__kvm_vgic_sync_hwstate(vcpu);
+	spin_unlock(&dist->lock);
 }
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From e651eab0af88aa7a281fe9e8c36c0846552aa7fc Mon Sep 17 00:00:00 2001
From: Sricharan R <r.sricharan@ti.com>
Date: Mon, 18 Mar 2013 12:24:04 +0100
Subject: ARM: 7677/1: LPAE: Fix mapping in alloc_init_section for unaligned
 addresses

With LPAE enabled, alloc_init_section() does not map the entire
address space for unaligned addresses.

The issue also reproduced with CMA + LPAE. CMA tries to map 16MB
with page granularity mappings during boot. alloc_init_pte()
is called and out of 16MB, only 2MB gets mapped and rest remains
unaccessible.

Because of this OMAP5 boot is broken with CMA + LPAE enabled.
Fix the issue by ensuring that the entire addresses are
mapped.

Signed-off-by: R Sricharan <r.sricharan@ti.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoffer Dall <chris@cloudcar.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Christoffer Dall <chris@cloudcar.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 73 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e95a996ab78..78978945492 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -598,39 +598,60 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static void __init alloc_init_section(pud_t *pud, unsigned long addr,
-				      unsigned long end, phys_addr_t phys,
-				      const struct mem_type *type)
+static void __init map_init_section(pmd_t *pmd, unsigned long addr,
+			unsigned long end, phys_addr_t phys,
+			const struct mem_type *type)
 {
-	pmd_t *pmd = pmd_offset(pud, addr);
-
+#ifndef CONFIG_ARM_LPAE
 	/*
-	 * Try a section mapping - end, addr and phys must all be aligned
-	 * to a section boundary.  Note that PMDs refer to the individual
-	 * L1 entries, whereas PGDs refer to a group of L1 entries making
-	 * up one logical pointer to an L2 table.
+	 * In classic MMU format, puds and pmds are folded in to
+	 * the pgds. pmd_offset gives the PGD entry. PGDs refer to a
+	 * group of L1 entries making up one logical pointer to
+	 * an L2 table (2MB), where as PMDs refer to the individual
+	 * L1 entries (1MB). Hence increment to get the correct
+	 * offset for odd 1MB sections.
+	 * (See arch/arm/include/asm/pgtable-2level.h)
 	 */
-	if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) {
-		pmd_t *p = pmd;
-
-#ifndef CONFIG_ARM_LPAE
-		if (addr & SECTION_SIZE)
-			pmd++;
+	if (addr & SECTION_SIZE)
+		pmd++;
 #endif
+	do {
+		*pmd = __pmd(phys | type->prot_sect);
+		phys += SECTION_SIZE;
+	} while (pmd++, addr += SECTION_SIZE, addr != end);
 
-		do {
-			*pmd = __pmd(phys | type->prot_sect);
-			phys += SECTION_SIZE;
-		} while (pmd++, addr += SECTION_SIZE, addr != end);
+	flush_pmd_entry(pmd);
+}
 
-		flush_pmd_entry(p);
-	} else {
+static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
+				      unsigned long end, phys_addr_t phys,
+				      const struct mem_type *type)
+{
+	pmd_t *pmd = pmd_offset(pud, addr);
+	unsigned long next;
+
+	do {
 		/*
-		 * No need to loop; pte's aren't interested in the
-		 * individual L1 entries.
+		 * With LPAE, we must loop over to map
+		 * all the pmds for the given range.
 		 */
-		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
-	}
+		next = pmd_addr_end(addr, end);
+
+		/*
+		 * Try a section mapping - addr, next and phys must all be
+		 * aligned to a section boundary.
+		 */
+		if (type->prot_sect &&
+				((addr | next | phys) & ~SECTION_MASK) == 0) {
+			map_init_section(pmd, addr, next, phys, type);
+		} else {
+			alloc_init_pte(pmd, addr, next,
+						__phys_to_pfn(phys), type);
+		}
+
+		phys += next - addr;
+
+	} while (pmd++, addr = next, addr != end);
 }
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
@@ -641,7 +662,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 
 	do {
 		next = pud_addr_end(addr, end);
-		alloc_init_section(pud, addr, next, phys, type);
+		alloc_init_pmd(pud, addr, next, phys, type);
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
 }
-- 
cgit v1.2.3


From c40e3641670eb6ebfdb71d4b0c775416ef95f4f0 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 18 Mar 2013 19:44:14 +0100
Subject: ARM: 7679/1: Clear IDIVT hwcap if CONFIG_ARM_THUMB=n

Don't advertise support for the SDIV/UDIV thumb instructions if
the kernel is not compiled with support for thumb userspace. This
is in line with how we remove the THUMB hwcap in these
configurations.

Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3f6cbb2e3ed..e2c8bbffb0b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -484,7 +484,7 @@ static void __init setup_processor(void)
 		 list->elf_name, ENDIANNESS);
 	elf_hwcap = list->elf_hwcap;
 #ifndef CONFIG_ARM_THUMB
-	elf_hwcap &= ~HWCAP_THUMB;
+	elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
 #endif
 
 	feat_v6_fixup();
-- 
cgit v1.2.3


From 8164f7af88d9ad3a757bd14f634b23997ee77f6b Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 18 Mar 2013 19:44:15 +0100
Subject: ARM: 7680/1: Detect support for SDIV/UDIV from ISAR0 register

The ISAR0 register indicates support for the SDIV and UDIV
instructions in both the Thumb and ARM instruction set. Read the
register to detect the supported instructions and update the
elf_hwcap mask as appropriate. This is better than adding more
and more cpuid checks in proc-v7.S for each new cpu variant that
supports these instructions.

Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 20 ++++++++++++++++++++
 arch/arm/mm/proc-v7.S   |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e2c8bbffb0b..f3ac13f69b7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -353,6 +353,23 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+static void __init cpuid_init_hwcaps(void)
+{
+	unsigned int divide_instrs;
+
+	if (cpu_architecture() < CPU_ARCH_ARMv7)
+		return;
+
+	divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24;
+
+	switch (divide_instrs) {
+	case 2:
+		elf_hwcap |= HWCAP_IDIVA;
+	case 1:
+		elf_hwcap |= HWCAP_IDIVT;
+	}
+}
+
 static void __init feat_v6_fixup(void)
 {
 	int id = read_cpuid_id();
@@ -483,6 +500,9 @@ static void __init setup_processor(void)
 	snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c",
 		 list->elf_name, ENDIANNESS);
 	elf_hwcap = list->elf_hwcap;
+
+	cpuid_init_hwcaps();
+
 #ifndef CONFIG_ARM_THUMB
 	elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
 #endif
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3a3c015f8d5..bcd3d48922f 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -420,7 +420,7 @@ __v7_pj4b_proc_info:
 __v7_ca7mp_proc_info:
 	.long	0x410fc070
 	.long	0xff0ffff0
-	__v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV
+	__v7_proc __v7_ca7mp_setup
 	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
 
 	/*
@@ -430,7 +430,7 @@ __v7_ca7mp_proc_info:
 __v7_ca15mp_proc_info:
 	.long	0x410fc0f0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV
+	__v7_proc __v7_ca15mp_setup
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
-- 
cgit v1.2.3


From 120ecfafabec382c4feb79ff159ef42a39b6d33b Mon Sep 17 00:00:00 2001
From: Stepan Moskovchenko <stepanm@codeaurora.org>
Date: Mon, 18 Mar 2013 19:44:16 +0100
Subject: ARM: 7678/1: Work around faulty ISAR0 register in some Krait CPUs

Some early versions of the Krait CPU design incorrectly indicate
that they only support the UDIV and SDIV instructions in Thumb
mode when they actually support them in ARM and Thumb mode. It
seems that these CPUs follow the DDI0406B ARM ARM which has two
possible values for the divide instructions field, instead of the
DDI0406C document which has three possible values.

Work around this problem by checking the MIDR against Krait CPUs
with this faulty ISAR0 register and force the hwcaps to indicate
support in both modes.

[sboyd: Rewrote commit text to reflect real reasoning now that
	we autodetect udiv/sdiv]

Signed-off-by: Stepan Moskovchenko <stepanm@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7.S | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index bcd3d48922f..f584d3f5b37 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -433,6 +433,21 @@ __v7_ca15mp_proc_info:
 	__v7_proc __v7_ca15mp_setup
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
+	/*
+	 * Qualcomm Inc. Krait processors.
+	 */
+	.type	__krait_proc_info, #object
+__krait_proc_info:
+	.long	0x510f0400		@ Required ID value
+	.long	0xff0ffc00		@ Mask for ID
+	/*
+	 * Some Krait processors don't indicate support for SDIV and UDIV
+	 * instructions in the ARM instruction set, even though they actually
+	 * do support them.
+	 */
+	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV
+	.size	__krait_proc_info, . - __krait_proc_info
+
 	/*
 	 * Match any ARMv7 processor core.
 	 */
-- 
cgit v1.2.3


From 68a154fc53ddd3f7b33e482847a411bf54a50855 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Wed, 20 Mar 2013 17:30:30 +0100
Subject: ARM: 7681/1: hw_breakpoint: use warn_once to avoid spam from
 reset_ctrl_regs()

CPU debug features like hardware break, watchpoints can be used only
when the debug mode is enabled and available. Unfortunately on OMAP4
based devices, after a CPU power cycle, the debug feature gets disabled
which leads to a flood of messages coming from reset_ctrl_regs() which
gets called on every CPU_PM_EXIT with CPUidle enabled.

So make use of warn_once() so that system is usable.

Thanks to Will for pointers and Lokesh for the analysis of the issue.

Tested-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/hw_breakpoint.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 96093b75ab9..5dc1aa6f0f7 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -966,7 +966,7 @@ static void reset_ctrl_regs(void *unused)
 	}
 
 	if (err) {
-		pr_warning("CPU %d debug is powered down!\n", cpu);
+		pr_warn_once("CPU %d debug is powered down!\n", cpu);
 		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
 		return;
 	}
@@ -987,7 +987,7 @@ clear_vcr:
 	isb();
 
 	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
-		pr_warning("CPU %d failed to disable vector catch\n", cpu);
+		pr_warn_once("CPU %d failed to disable vector catch\n", cpu);
 		return;
 	}
 
@@ -1007,7 +1007,7 @@ clear_vcr:
 	}
 
 	if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) {
-		pr_warning("CPU %d failed to clear debug register pairs\n", cpu);
+		pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu);
 		return;
 	}
 
-- 
cgit v1.2.3


From 698613b63817f2f6ca79831cd1c37aae67025323 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 3 Apr 2013 16:33:26 +0100
Subject: ARM: iWMMXt: always enable iWMMXt support with PJ4 CPUs

Jason Cooper reports these build errors:
arch/arm/kernel/built-in.o: In function `iwmmxt_do':
/.../arch/arm/kernel/pj4-cp0.c:36: undefined reference to `iwmmxt_task_release'
/.../arch/arm/kernel/pj4-cp0.c:40: undefined reference to `iwmmxt_task_switch'
make: *** [vmlinux] Error 1

This is caused because the PJ4 code explicitly references the iWMMXt
code, but doesn't require it to be built.  Fix this by ensuring that
iWMMXt is always enabled with PJ4.

Reported-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 13b739469c5..12ea3b3d49a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1183,9 +1183,9 @@ config ARM_NR_BANKS
 	default 8
 
 config IWMMXT
-	bool "Enable iWMMXt support"
+	bool "Enable iWMMXt support" if !CPU_PJ4
 	depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4
-	default y if PXA27x || PXA3xx || ARCH_MMP
+	default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4
 	help
 	  Enable support for iWMMXt context switching at run time if
 	  running on a CPU that supports it.
-- 
cgit v1.2.3


From 6e7aceeb7c70b9ebad79bcfe91fcf738826e8e6d Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 25 Mar 2013 17:02:48 +0100
Subject: ARM: 7682/1: cache-l2x0: fix masking of RTL revision numbering and
 set_debug init

Commit b8db6b8 (ARM: 7547/4: cache-l2x0: add support for Aurora L2 cache
ctrl) moved the masking of the part ID which caused the RTL version to be
lost. Commit 6248d06 (ARM: 7545/1: cache-l2x0: make outer_cache_fns a
field of l2x0_of_data) changed how .set_debug is initialized. Both commits
break commit 74ddcdb (ARM: 7608/1: l2x0: Only set .set_debug
on PL310 r3p0 and earlier) which uses the RTL version to conditionally set
.set_debug function pointer. Commit b8db6b8 also caused the printed cache
ID to be missing the version information.

Fix this by reverting how the part number is masked so the RTL version
info is maintained. The cache-id-part DT property does not set the RTL
bits so masking them should have no effect. Also, re-arrange the order
of the function pointer init so the .set_debug function can be overridden.

Reported-by: Paolo Pisati <paolo.pisati@canonical.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Gregory CLEMENT <gregory.clement@free-electrons.com>
Cc: Yehuda Yitschak <yehuday@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c2f37390308..c465faca51b 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -299,7 +299,7 @@ static void l2x0_unlock(u32 cache_id)
 	int lockregs;
 	int i;
 
-	switch (cache_id) {
+	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
 	case L2X0_CACHE_ID_PART_L310:
 		lockregs = 8;
 		break;
@@ -333,15 +333,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	if (cache_id_part_number_from_dt)
 		cache_id = cache_id_part_number_from_dt;
 	else
-		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID)
-			& L2X0_CACHE_ID_PART_MASK;
+		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	aux &= aux_mask;
 	aux |= aux_val;
 
 	/* Determine the number of ways */
-	switch (cache_id) {
+	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
 	case L2X0_CACHE_ID_PART_L310:
 		if (aux & (1 << 16))
 			ways = 16;
@@ -725,7 +724,6 @@ static const struct l2x0_of_data pl310_data = {
 		.flush_all   = l2x0_flush_all,
 		.inv_all     = l2x0_inv_all,
 		.disable     = l2x0_disable,
-		.set_debug   = pl310_set_debug,
 	},
 };
 
@@ -814,9 +812,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 		data->save();
 
 	of_init = true;
-	l2x0_init(l2x0_base, aux_val, aux_mask);
-
 	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache));
+	l2x0_init(l2x0_base, aux_val, aux_mask);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 93dc68876b608da041fe40ed39424b0fcd5aa2fb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 26 Mar 2013 23:35:04 +0100
Subject: ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181
 (TLBI/DSB operations)

On Cortex-A15 (r0p0..r3p2) the TLBI/DSB are not adequately shooting down
all use of the old entries. This patch implements the erratum workaround
which consists of:

1. Dummy TLBIMVAIS and DSB on the CPU doing the TLBI operation.
2. Send IPI to the CPUs that are running the same mm (and ASID) as the
   one being invalidated (or all the online CPUs for global pages).
3. CPU receiving the IPI executes a DMB and CLREX (part of the exception
   return code already).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig                   | 10 ++++++
 arch/arm/include/asm/highmem.h     |  7 ++++
 arch/arm/include/asm/mmu_context.h |  2 ++
 arch/arm/include/asm/tlbflush.h    | 15 +++++++++
 arch/arm/kernel/smp_tlb.c          | 66 ++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/context.c              |  3 +-
 6 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 12ea3b3d49a..1cacda426a0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420
 	 to deadlock. This workaround puts DSB before executing ISB if
 	 an abort may occur on cache maintenance.
 
+config ARM_ERRATA_798181
+	bool "ARM errata: TLBI/DSB failure on Cortex-A15"
+	depends on CPU_V7 && SMP
+	help
+	  On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not
+	  adequately shooting down all use of the old entries. This
+	  option enables the Linux kernel workaround for this erratum
+	  which sends an IPI to the CPUs that are running the same ASID
+	  as the one being invalidated.
+
 endmenu
 
 source "arch/arm/common/Kconfig"
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 8c5e828f484..91b99abe7a9 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page);
 #endif
 #endif
 
+/*
+ * Needed to be able to broadcast the TLB invalidation for kmap.
+ */
+#ifdef CONFIG_ARM_ERRATA_798181
+#undef ARCH_NEEDS_KMAP_HIGH_GET
+#endif
+
 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 extern void *kmap_high_get(struct page *page);
 #else
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 863a6611323..a7b85e0d0cc 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -27,6 +27,8 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
 #define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
+DECLARE_PER_CPU(atomic64_t, active_asids);
+
 #else	/* !CONFIG_CPU_HAS_ASID */
 
 #ifdef CONFIG_MMU
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index 4db8c8820f0..9e9c041358c 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -450,6 +450,21 @@ static inline void local_flush_bp_all(void)
 		isb();
 }
 
+#ifdef CONFIG_ARM_ERRATA_798181
+static inline void dummy_flush_tlb_a15_erratum(void)
+{
+	/*
+	 * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0.
+	 */
+	asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0));
+	dsb();
+}
+#else
+static inline void dummy_flush_tlb_a15_erratum(void)
+{
+}
+#endif
+
 /*
  *	flush_pmd_entry
  *
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index bd030053139..e82e1d24877 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -12,6 +12,7 @@
 
 #include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
 
 /**********************************************************************/
 
@@ -69,12 +70,72 @@ static inline void ipi_flush_bp_all(void *ignored)
 	local_flush_bp_all();
 }
 
+#ifdef CONFIG_ARM_ERRATA_798181
+static int erratum_a15_798181(void)
+{
+	unsigned int midr = read_cpuid_id();
+
+	/* Cortex-A15 r0p0..r3p2 affected */
+	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2)
+		return 0;
+	return 1;
+}
+#else
+static int erratum_a15_798181(void)
+{
+	return 0;
+}
+#endif
+
+static void ipi_flush_tlb_a15_erratum(void *arg)
+{
+	dmb();
+}
+
+static void broadcast_tlb_a15_erratum(void)
+{
+	if (!erratum_a15_798181())
+		return;
+
+	dummy_flush_tlb_a15_erratum();
+	smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum,
+			       NULL, 1);
+}
+
+static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
+{
+	int cpu;
+	cpumask_t mask = { CPU_BITS_NONE };
+
+	if (!erratum_a15_798181())
+		return;
+
+	dummy_flush_tlb_a15_erratum();
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		/*
+		 * We only need to send an IPI if the other CPUs are running
+		 * the same ASID as the one being invalidated. There is no
+		 * need for locking around the active_asids check since the
+		 * switch_mm() function has at least one dmb() (as required by
+		 * this workaround) in case a context switch happens on
+		 * another CPU after the condition below.
+		 */
+		if (atomic64_read(&mm->context.id) ==
+		    atomic64_read(&per_cpu(active_asids, cpu)))
+			cpumask_set_cpu(cpu, &mask);
+	}
+	smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
+}
+
 void flush_tlb_all(void)
 {
 	if (tlb_ops_need_broadcast())
 		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
 	else
 		local_flush_tlb_all();
+	broadcast_tlb_a15_erratum();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
@@ -83,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
 	else
 		local_flush_tlb_mm(mm);
+	broadcast_tlb_mm_a15_erratum(mm);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
@@ -95,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 					&ta, 1);
 	} else
 		local_flush_tlb_page(vma, uaddr);
+	broadcast_tlb_mm_a15_erratum(vma->vm_mm);
 }
 
 void flush_tlb_kernel_page(unsigned long kaddr)
@@ -105,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr)
 		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
 	} else
 		local_flush_tlb_kernel_page(kaddr);
+	broadcast_tlb_a15_erratum();
 }
 
 void flush_tlb_range(struct vm_area_struct *vma,
@@ -119,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
 					&ta, 1);
 	} else
 		local_flush_tlb_range(vma, start, end);
+	broadcast_tlb_mm_a15_erratum(vma->vm_mm);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
@@ -130,6 +195,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 		on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
 	} else
 		local_flush_tlb_kernel_range(start, end);
+	broadcast_tlb_a15_erratum();
 }
 
 void flush_bp_all(void)
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index a5a4b2bc42b..2ac37372ef5 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
 static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
-static DEFINE_PER_CPU(atomic64_t, active_asids);
+DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
@@ -215,6 +215,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
 		local_flush_bp_all();
 		local_flush_tlb_all();
+		dummy_flush_tlb_a15_erratum();
 	}
 
 	atomic64_set(&per_cpu(active_asids, cpu), asid);
-- 
cgit v1.2.3


From 6f3d90e55660ba42301b5e9c7eed332cc9f70fd7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 28 Mar 2013 11:17:55 +0100
Subject: ARM: 7685/1: delay: use private ticks_per_jiffy field for timer-based
 delay ops

Commit 70264367a243 ("ARM: 7653/2: do not scale loops_per_jiffy when
using a constant delay clock") fixed a problem with our timer-based
delay loop, where loops_per_jiffy is scaled by cpufreq yet used directly
by the timer delay ops.

This patch fixes the problem in a more elegant way by keeping a private
ticks_per_jiffy field in the delay ops, independent of loops_per_jiffy
and therefore not subject to scaling. The loop-based delay continues to
use loops_per_jiffy directly, as it should.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/delay.h | 2 +-
 arch/arm/kernel/smp.c        | 3 ---
 arch/arm/lib/delay.c         | 8 +++++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index 720799fd3a8..dff714d886d 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -24,7 +24,7 @@ extern struct arm_delay_ops {
 	void (*delay)(unsigned long);
 	void (*const_udelay)(unsigned long);
 	void (*udelay)(unsigned long);
-	bool const_clock;
+	unsigned long ticks_per_jiffy;
 } arm_delay_ops;
 
 #define __delay(n)		arm_delay_ops.delay(n)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 79078edbb9b..1f2ccccaf00 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -673,9 +673,6 @@ static int cpufreq_callback(struct notifier_block *nb,
 	if (freq->flags & CPUFREQ_CONST_LOOPS)
 		return NOTIFY_OK;
 
-	if (arm_delay_ops.const_clock)
-		return NOTIFY_OK;
-
 	if (!per_cpu(l_p_j_ref, cpu)) {
 		per_cpu(l_p_j_ref, cpu) =
 			per_cpu(cpu_data, cpu).loops_per_jiffy;
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 6b93f6a1a3c..64dbfa57204 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -58,7 +58,7 @@ static void __timer_delay(unsigned long cycles)
 static void __timer_const_udelay(unsigned long xloops)
 {
 	unsigned long long loops = xloops;
-	loops *= loops_per_jiffy;
+	loops *= arm_delay_ops.ticks_per_jiffy;
 	__timer_delay(loops >> UDELAY_SHIFT);
 }
 
@@ -73,11 +73,13 @@ void __init register_current_timer_delay(const struct delay_timer *timer)
 		pr_info("Switching to timer-based delay loop\n");
 		delay_timer			= timer;
 		lpj_fine			= timer->freq / HZ;
-		loops_per_jiffy			= lpj_fine;
+
+		/* cpufreq may scale loops_per_jiffy, so keep a private copy */
+		arm_delay_ops.ticks_per_jiffy	= lpj_fine;
 		arm_delay_ops.delay		= __timer_delay;
 		arm_delay_ops.const_udelay	= __timer_const_udelay;
 		arm_delay_ops.udelay		= __timer_udelay;
-		arm_delay_ops.const_clock	= true;
+
 		delay_calibrated		= true;
 	} else {
 		pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
-- 
cgit v1.2.3


From b21e023ba4003fe4b5c32540e4aee5991c019e92 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Tue, 2 Apr 2013 22:11:46 +0100
Subject: ARM: 7689/1: add unwind annotations to ftrace asm

Add unwind annotations to the ftrace assembly code so that the function
tracer's stacktracing options (func_stack_trace, etc.) work when
CONFIG_ARM_UNWIND is enabled.

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/entry-common.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3248cde504e..fefd7f97143 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -276,7 +276,13 @@ ENDPROC(ftrace_graph_caller_old)
  */
 
 .macro mcount_enter
+/*
+ * This pad compensates for the push {lr} at the call site.  Note that we are
+ * unable to unwind through a function which does not otherwise save its lr.
+ */
+ UNWIND(.pad	#4)
 	stmdb	sp!, {r0-r3, lr}
+ UNWIND(.save	{r0-r3, lr})
 .endm
 
 .macro mcount_get_lr reg
@@ -289,6 +295,7 @@ ENDPROC(ftrace_graph_caller_old)
 .endm
 
 ENTRY(__gnu_mcount_nc)
+UNWIND(.fnstart)
 #ifdef CONFIG_DYNAMIC_FTRACE
 	mov	ip, lr
 	ldmia	sp!, {lr}
@@ -296,17 +303,22 @@ ENTRY(__gnu_mcount_nc)
 #else
 	__mcount
 #endif
+UNWIND(.fnend)
 ENDPROC(__gnu_mcount_nc)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(ftrace_caller)
+UNWIND(.fnstart)
 	__ftrace_caller
+UNWIND(.fnend)
 ENDPROC(ftrace_caller)
 #endif
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
+UNWIND(.fnstart)
 	__ftrace_graph_caller
+UNWIND(.fnend)
 ENDPROC(ftrace_graph_caller)
 #endif
 
-- 
cgit v1.2.3


From 4e1db26a0b42e2b6e27c05d68adcc01709c2eed2 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Wed, 3 Apr 2013 12:24:45 +0100
Subject: ARM: 7690/1: mm: fix CONFIG_LPAE typos

CONFIG_LPAE doesn't exist: the correct option is CONFIG_ARM_LPAE, so fix
up the two typos under arch/arm/.

The fix to head.S is slightly scary, but this is just for setting up
an early io-mapping for the serial port when running on a big-endian,
LPAE system. Since these systems don't exist in the wild (at least, I
have no access to one outside of kvmtool, which doesn't provide a serial
port suitable for earlyprintk), then we can revisit the code later if it
causes any problems.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/head.S  | 2 +-
 arch/arm/kernel/setup.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index e0eb9a1cae7..8bac553fe21 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -267,7 +267,7 @@ __create_page_tables:
 	addne	r6, r6, #1 << SECTION_SHIFT
 	strne	r6, [r3]
 
-#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
+#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
 	sub	r4, r4, #4			@ Fixup page table pointer
 						@ for 64-bit descriptors
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f3ac13f69b7..d343a6c3a6d 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -544,7 +544,7 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size)
 	size -= start & ~PAGE_MASK;
 	bank->start = PAGE_ALIGN(start);
 
-#ifndef CONFIG_LPAE
+#ifndef CONFIG_ARM_LPAE
 	if (bank->start + size < bank->start) {
 		printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in "
 			"32-bit physical address space\n", (long long)start);
-- 
cgit v1.2.3