From 30650239adc9e4e9439256d6988e521518dccbb3 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 29 Apr 2013 18:07:47 +0000 Subject: powerpc: Add an in memory udbg console This patch adds a new udbg early debug console which utilises statically defined input and output buffers stored within the kernel BSS. It is primarily designed to assist with bring up of new hardware which may not have a working console but which has a method of reading/writing kernel memory. This version incorporates comments made by Ben H (thanks!). Changes from v1: - Add memory barriers. - Ensure updating of read/write positions is atomic. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig.debug | 23 ++++++++ arch/powerpc/include/asm/udbg.h | 1 + arch/powerpc/kernel/udbg.c | 3 ++ arch/powerpc/sysdev/Makefile | 2 + arch/powerpc/sysdev/udbg_memcons.c | 105 +++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+) create mode 100644 arch/powerpc/sysdev/udbg_memcons.c diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 5416e28a753..863d877e0b5 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -262,8 +262,31 @@ config PPC_EARLY_DEBUG_OPAL_HVSI Select this to enable early debugging for the PowerNV platform using an "hvsi" console +config PPC_EARLY_DEBUG_MEMCONS + bool "In memory console" + help + Select this to enable early debugging using an in memory console. + This console provides input and output buffers stored within the + kernel BSS and should be safe to select on any system. A debugger + can then be used to read kernel output or send input to the console. endchoice +config PPC_MEMCONS_OUTPUT_SIZE + int "In memory console output buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 4096 + help + Selects the size of the output buffer (in bytes) of the in memory + console. + +config PPC_MEMCONS_INPUT_SIZE + int "In memory console input buffer size" + depends on PPC_EARLY_DEBUG_MEMCONS + default 128 + help + Selects the size of the input buffer (in bytes) of the in memory + console. + config PPC_EARLY_DEBUG_OPAL def_bool y depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 5a7510e9d09..dc590919f8e 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,6 +52,7 @@ extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); +extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); extern void __init udbg_init_debug_opal_raw(void); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 13b86709349..9d3fdcd6629 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -64,6 +64,9 @@ void __init udbg_early_init(void) udbg_init_usbgecko(); #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) + /* In memory console */ + udbg_init_memcons(); #elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC) udbg_init_ehv_bc(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index b0a518e9759..99464a7bdb3 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -64,6 +64,8 @@ endif obj-$(CONFIG_PPC_SCOM) += scom.o +obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o + subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-$(CONFIG_PPC_XICS) += xics/ diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c new file mode 100644 index 00000000000..ce5a7b489e4 --- /dev/null +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -0,0 +1,105 @@ +/* + * A udbg backend which logs messages and reads input from in memory + * buffers. + * + * The console output can be read from memcons_output which is a + * circular buffer whose next write position is stored in memcons.output_pos. + * + * Input may be passed by writing into the memcons_input buffer when it is + * empty. The input buffer is empty when both input_pos == input_start and + * *input_start == '\0'. + * + * Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp + * Copyright (C) 2013 Alistair Popple, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +struct memcons { + char *output_start; + char *output_pos; + char *output_end; + char *input_start; + char *input_pos; + char *input_end; +}; + +static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE]; +static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE]; + +struct memcons memcons = { + .output_start = memcons_output, + .output_pos = memcons_output, + .output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE], + .input_start = memcons_input, + .input_pos = memcons_input, + .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE], +}; + +void memcons_putc(char c) +{ + char *new_output_pos; + + *memcons.output_pos = c; + wmb(); + new_output_pos = memcons.output_pos + 1; + if (new_output_pos >= memcons.output_end) + new_output_pos = memcons.output_start; + + memcons.output_pos = new_output_pos; +} + +int memcons_getc_poll(void) +{ + char c; + char *new_input_pos; + + if (*memcons.input_pos) { + c = *memcons.input_pos; + + new_input_pos = memcons.input_pos + 1; + if (new_input_pos >= memcons.input_end) + new_input_pos = memcons.input_start; + else if (*new_input_pos == '\0') + new_input_pos = memcons.input_start; + + *memcons.input_pos = '\0'; + wmb(); + memcons.input_pos = new_input_pos; + return c; + } + + return -1; +} + +int memcons_getc(void) +{ + int c; + + while (1) { + c = memcons_getc_poll(); + if (c == -1) + cpu_relax(); + else + break; + } + + return c; +} + +void udbg_init_memcons(void) +{ + udbg_putc = memcons_putc; + udbg_getc = memcons_getc; + udbg_getc_poll = memcons_getc_poll; +} -- cgit v1.2.3 From 1de1455f33709a8afd8d41d26d09739a1148105b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 8 May 2013 14:14:26 +1000 Subject: powerpc/powernv: Properly drop characters if console is closed If the firmware returns an error such as "closed" (or hardware error), we should drop characters. Currently we only do that when a firmware compatible with OPAL v2 APIs is detected, in the code that calls opal_console_write_buffer_space(), which didn't exist with OPAL v1 (or didn't work). However, when enabling early debug consoles, the flag indicating that v2 is supported isn't set yet, causing us, in case of errors or closed console, to spin forever. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/opal.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index ade4463226c..12d9846aa87 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -144,6 +144,13 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { len = total_len; rc = opal_console_write(vtermno, &len, data); + + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && + rc != OPAL_BUSY_EVENT) { + written = total_len; + break; + } if (rc == OPAL_SUCCESS) { total_len -= len; data += len; -- cgit v1.2.3 From 73ed148aea9dc0508be7e30e7a447f55c1b2f378 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 10 May 2013 16:59:18 +1000 Subject: powerpc/powernv: Improve kexec reliability We add a machine_shutdown hook that frees the OPAL interrupts (so they get masked at the source and don't fire while kexec'ing) and which triggers an IODA reset on all the PCIe host bridges which will have the effect of blocking all DMAs and subsequent PCIs interrupts. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal.c | 17 +++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 9 +++++++++ arch/powerpc/platforms/powernv/pci.c | 12 ++++++++++++ arch/powerpc/platforms/powernv/pci.h | 2 ++ arch/powerpc/platforms/powernv/powernv.h | 2 ++ arch/powerpc/platforms/powernv/setup.c | 12 ++++++++++++ 7 files changed, 56 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index b6c8b58b1d7..b2906adb89d 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -563,6 +563,8 @@ extern void opal_nvram_init(void); extern int opal_machine_check(struct pt_regs *regs); +extern void opal_shutdown(void); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 12d9846aa87..27907cb18b8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,8 @@ struct opal { static struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; +static unsigned int *opal_irqs; +static unsigned int opal_irq_count; int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -323,6 +326,8 @@ static int __init opal_init(void) irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", irqs ? (irqlen / 4) : 0); + opal_irq_count = irqlen / 4; + opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { unsigned int hwirq = be32_to_cpup(irqs); unsigned int irq = irq_create_mapping(NULL, hwirq); @@ -334,7 +339,19 @@ static int __init opal_init(void) if (rc) pr_warning("opal: Error %d requesting irq %d" " (0x%x)\n", rc, irq, hwirq); + opal_irqs[i] = irq; } return 0; } subsys_initcall(opal_init); + +void opal_shutdown(void) +{ + unsigned int i; + + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], 0); + opal_irqs[i] = 0; + } +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1da578b7c1b..3937aaae5bc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1048,6 +1048,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } +static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +{ + opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + OPAL_ASSERT_RESET); +} + void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) { struct pci_controller *hose; @@ -1178,6 +1184,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; + /* Setup shutdown function for kexec */ + phb->shutdown = pnv_pci_ioda_shutdown; + /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 55dfca844dd..163bd7422f1 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -450,6 +450,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) pnv_pci_dma_fallback_setup(hose, pdev); } +void pnv_pci_shutdown(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->shutdown) + phb->shutdown(phb); + } +} + /* Fixup wrong class code in p7ioc and p8 root complex */ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 48dc4bb856a..25d76c4df50 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -86,6 +86,7 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); + void (*shutdown)(struct pnv_phb *phb); union { struct { @@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, u64 *startp, u64 *endp); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 8a9df7f9667..a1c6f83fc39 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { } #ifdef CONFIG_PCI extern void pnv_pci_init(void); +extern void pnv_pci_shutdown(void); #else static inline void pnv_pci_init(void) { } +static inline void pnv_pci_shutdown(void) { } #endif #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index db1ad1c8f68..c20381caaa3 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -126,6 +126,17 @@ static void pnv_progress(char *s, unsigned short hex) { } +static void pnv_shutdown(void) +{ + /* Let the PCI code clear up IODA tables */ + pnv_pci_shutdown(); + + /* And unregister all OPAL interrupts so they don't fire + * up while we kexec + */ + opal_shutdown(); +} + #ifdef CONFIG_KEXEC static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { @@ -187,6 +198,7 @@ define_machine(powernv) { .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, + .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC -- cgit v1.2.3 From 4a3b8d0b833707811011ad14dd12dabf31fa690a Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 9 May 2013 22:09:41 -0500 Subject: powerpc: hard_irq_disable(): Call trace_hardirqs_off after disabling lockdep.c has this: /* * So we're supposed to get called after you mask local IRQs, * but for some reason the hardware doesn't quite think you did * a proper job. */ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; Since irqs_disabled() is based on soft_enabled(), that (not just the hard EE bit) needs to be 0 before we call trace_hardirqs_off. Signed-off-by: Scott Wood --- arch/powerpc/include/asm/hw_irq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index d615b28dda8..ba713f166fa 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -96,11 +96,12 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ + u8 _was_enabled = get_paca()->soft_enabled; \ __hard_irq_disable(); \ - if (local_paca->soft_enabled) \ - trace_hardirqs_off(); \ get_paca()->soft_enabled = 0; \ get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (_was_enabled) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) -- cgit v1.2.3 From 613e60a66181acc5c8b63268e939622162da2393 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 11 May 2013 22:33:19 +0000 Subject: powerpc/mm: Use the correct mask value when looking at pgtable address Our pgtable are 2*sizeof(pte_t)*PTRS_PER_PTE which is PTE_FRAG_SIZE. Instead of depending on frag size, mask with PMD_MASKED_BITS. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgalloc-64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 91acb12bac9..b66ae722a8e 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -186,7 +186,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pgtable_t pmd_pgtable(pmd_t pmd) { - return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE); + return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, -- cgit v1.2.3 From 83d5e64b7efa7f39b10ff5e92792e807a720289c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 6 May 2013 10:51:00 +0000 Subject: powerpc: Fix build errors STRICT_MM_TYPECHECKS Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pte-hash64-64k.h | 2 +- arch/powerpc/kernel/pci-common.c | 5 ++--- arch/powerpc/mm/init_64.c | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index 3e13e23e4fd..d836d945068 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -47,7 +47,7 @@ * generic accessors and iterators here */ #define __real_pte(e,p) ((real_pte_t) { \ - (e), ((e) & _PAGE_COMBO) ? \ + (e), (pte_val(e) & _PAGE_COMBO) ? \ (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) #define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f5c5c90799a..6053f037ef0 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -359,7 +359,6 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, enum pci_mmap_state mmap_state, int write_combine) { - unsigned long prot = pgprot_val(protection); /* Write combine is always 0 on non-memory space mappings. On * memory space, if the user didn't pass 1, we check for a @@ -376,9 +375,9 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, /* XXX would be nice to have a way to ask for write-through */ if (write_combine) - return pgprot_noncached_wc(prot); + return pgprot_noncached_wc(protection); else - return pgprot_noncached(prot); + return pgprot_noncached(protection); } /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c2787bf779c..a90b9c45899 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -215,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long phys) { int mapped = htab_bolt_mapping(start, start + page_size, phys, - PAGE_KERNEL, mmu_vmemmap_psize, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); BUG_ON(mapped < 0); } -- cgit v1.2.3 From 79c66ce8f6448a3295a32efeac88c9debd7f7094 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 12 May 2013 15:04:53 +0000 Subject: powerpc/kexec: Fix kexec when using VMX optimised memcpy commit b3f271e86e5a (powerpc: POWER7 optimised memcpy using VMX and enhanced prefetch) uses VMX when it is safe to do so (ie not in interrupt). It also looks at the task struct to decide if we have to save the current tasks' VMX state. kexec calls memcpy() at a point where the task struct may have been overwritten by the new kexec segments. If it has been overwritten then when memcpy -> enable_altivec looks up current->thread.regs->msr we get a cryptic oops or lockup. I also notice we aren't initialising thread_info->cpu, which means smp_processor_id is broken. Fix that too. Signed-off-by: Anton Blanchard Cc: # 3.6+ Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec_64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 466a2908bb6..611acdf3009 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -335,10 +336,13 @@ void default_machine_kexec(struct kimage *image) pr_debug("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. + * We setup preempt_count to avoid using VMX in memcpy. * XXX: the task struct will likely be invalid once we do the copy! */ kexec_stack.thread_info.task = current_thread_info()->task; kexec_stack.thread_info.flags = 0; + kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET; + kexec_stack.thread_info.cpu = current_thread_info()->cpu; /* We need a static PACA, too; copy this CPU's PACA over and switch to * it. Also poison per_cpu_offset to catch anyone using non-static -- cgit v1.2.3 From a94a14720eaf55c5f06d6ca7ecbe3f87f6864fc6 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Tue, 7 May 2013 16:54:47 +0000 Subject: powerpc/rtas_flash: Fix validate_flash buffer overflow issue ibm,validate-flash-image RTAS call output buffer contains 150 - 200 bytes of data on latest system. Presently we have output buffer size as 64 bytes and we use sprintf to copy data from RTAS buffer to local buffer. This causes kernel oops (see below call trace). This patch increases local buffer size to 256 and also uses snprintf instead of sprintf to copy data from RTAS buffer. Kernel call trace : ------------------- Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=1024 NUMA pSeries Modules linked in: nfs fscache lockd auth_rpcgss nfs_acl sunrpc fuse loop dm_mod ipv6 ipv6_lib usb_storage ehea(X) sr_mod qlge ses cdrom enclosure st be2net sg ext3 jbd mbcache usbhid hid ohci_hcd ehci_hcd usbcore qla2xxx usb_common sd_mod crc_t10dif scsi_dh_hp_sw scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh lpfc scsi_transport_fc scsi_tgt ipr(X) libata scsi_mod Supported: Yes NIP: 4520323031333130 LR: 4520323031333130 CTR: 0000000000000000 REGS: c0000001b91779b0 TRAP: 0400 Tainted: G X (3.0.13-0.27-ppc64) MSR: 8000000040009032 CR: 44022488 XER: 20000018 TASK = c0000001bca1aba0[4736] 'cat' THREAD: c0000001b9174000 CPU: 36 GPR00: 4520323031333130 c0000001b9177c30 c000000000f87c98 000000000000009b GPR04: c0000001b9177c4a 000000000000000b 3520323031333130 2032303133313031 GPR08: 3133313031350a4d 000000000000009b 0000000000000000 c0000000003664a4 GPR12: 0000000022022448 c000000003ee6c00 0000000000000002 00000000100e8a90 GPR16: 00000000100cb9d8 0000000010093370 000000001001d310 0000000000000000 GPR20: 0000000000008000 00000000100fae60 000000000000005e 0000000000000000 GPR24: 0000000010129350 46573738302e3030 2046573738302e30 300a4d4720323031 GPR28: 333130313520554e 4b4e4f574e0a4d47 2032303133313031 3520323031333130 NIP [4520323031333130] 0x4520323031333130 LR [4520323031333130] 0x4520323031333130 Call Trace: [c0000001b9177c30] [4520323031333130] 0x4520323031333130 (unreliable) Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX Signed-off-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/rtas_flash.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 5b302247012..2f3cdb01506 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -89,6 +89,7 @@ /* Array sizes */ #define VALIDATE_BUF_SIZE 4096 +#define VALIDATE_MSG_LEN 256 #define RTAS_MSG_MAXLEN 64 /* Quirk - RTAS requires 4k list length and block size */ @@ -466,7 +467,7 @@ static void validate_flash(struct rtas_validate_flash_t *args_buf) } static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, - char *msg) + char *msg, int msglen) { int n; @@ -474,7 +475,8 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, n = sprintf(msg, "%d\n", args_buf->update_results); if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || (args_buf->update_results == VALIDATE_TMP_UPDATE)) - n += sprintf(msg + n, "%s\n", args_buf->buf); + n += snprintf(msg + n, msglen - n, "%s\n", + args_buf->buf); } else { n = sprintf(msg, "%d\n", args_buf->status); } @@ -486,11 +488,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, { struct rtas_validate_flash_t *const args_buf = &rtas_validate_flash_data; - char msg[RTAS_MSG_MAXLEN]; + char msg[VALIDATE_MSG_LEN]; int msglen; mutex_lock(&rtas_validate_flash_mutex); - msglen = get_validate_flash_msg(args_buf, msg); + msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN); mutex_unlock(&rtas_validate_flash_mutex); return simple_read_from_buffer(buf, count, ppos, msg, msglen); -- cgit v1.2.3 From 120496ac2d2d60aee68d3123a68169502a85f4b5 Mon Sep 17 00:00:00 2001 From: Robert Jennings Date: Tue, 7 May 2013 04:34:11 +0000 Subject: powerpc: Bring all threads online prior to migration/hibernation This patch brings online all threads which are present but not online prior to migration/hibernation. After migration/hibernation those threads are taken back offline. During migration/hibernation all online CPUs must call H_JOIN, this is required by the hypervisor. Without this patch, threads that are offline (H_CEDE'd) will not be woken to make the H_JOIN call and the OS will be deadlocked (all threads either JOIN'd or CEDE'd). Cc: Signed-off-by: Robert Jennings Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 2 + arch/powerpc/kernel/rtas.c | 113 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/suspend.c | 22 ++++++ 3 files changed, 137 insertions(+) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index a8bc2bb4adc..34fd70488d8 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -264,6 +264,8 @@ extern void rtas_progress(char *s, unsigned short hex); extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); +extern int rtas_online_cpus_mask(cpumask_var_t cpus); +extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(struct rtas_args *); struct rtc_time; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1fd6e7b2f39..52add6f3e20 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -807,6 +808,95 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } +enum rtas_cpu_state { + DOWN, + UP, +}; + +#ifndef CONFIG_SMP +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + if (!cpumask_empty(cpus)) { + cpumask_clear(cpus); + return -EINVAL; + } else + return 0; +} +#else +/* On return cpumask will be altered to indicate CPUs changed. + * CPUs with states changed will be set in the mask, + * CPUs with status unchanged will be unset in the mask. */ +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, + cpumask_var_t cpus) +{ + int cpu; + int cpuret = 0; + int ret = 0; + + if (cpumask_empty(cpus)) + return 0; + + for_each_cpu(cpu, cpus) { + switch (state) { + case DOWN: + cpuret = cpu_down(cpu); + break; + case UP: + cpuret = cpu_up(cpu); + break; + } + if (cpuret) { + pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", + __func__, + ((state == UP) ? "up" : "down"), + cpu, cpuret); + if (!ret) + ret = cpuret; + if (state == UP) { + /* clear bits for unchanged cpus, return */ + cpumask_shift_right(cpus, cpus, cpu); + cpumask_shift_left(cpus, cpus, cpu); + break; + } else { + /* clear bit for unchanged cpu, continue */ + cpumask_clear_cpu(cpu, cpus); + } + } + } + + return ret; +} +#endif + +int rtas_online_cpus_mask(cpumask_var_t cpus) +{ + int ret; + + ret = rtas_cpu_state_change_mask(UP, cpus); + + if (ret) { + cpumask_var_t tmp_mask; + + if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY)) + return ret; + + /* Use tmp_mask to preserve cpus mask from first failure */ + cpumask_copy(tmp_mask, cpus); + rtas_offline_cpus_mask(tmp_mask); + free_cpumask_var(tmp_mask); + } + + return ret; +} +EXPORT_SYMBOL(rtas_online_cpus_mask); + +int rtas_offline_cpus_mask(cpumask_var_t cpus) +{ + return rtas_cpu_state_change_mask(DOWN, cpus); +} +EXPORT_SYMBOL(rtas_offline_cpus_mask); + int rtas_ibm_suspend_me(struct rtas_args *args) { long state; @@ -814,6 +904,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); + cpumask_var_t offline_mask; + int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -837,11 +929,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return 0; } + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); data.token = rtas_token("ibm,suspend-me"); data.complete = &done; + + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); + cpuret = rtas_online_cpus_mask(offline_mask); + if (cpuret) { + pr_err("%s: Could not bring present CPUs online.\n", __func__); + atomic_set(&data.error, cpuret); + goto out; + } + stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -857,6 +962,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args) start_topology_update(); + /* Take down CPUs not online prior to suspend */ + cpuret = rtas_offline_cpus_mask(offline_mask); + if (cpuret) + pr_warn("%s: Could not restore CPUs to offline state.\n", + __func__); + +out: + free_cpumask_var(offline_mask); return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 47226e04126..5f997e79d57 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -16,6 +16,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY)) + return -ENOMEM; + stream_id = simple_strtoul(buf, NULL, 16); do { @@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev, } while (rc == -EAGAIN); if (!rc) { + /* All present CPUs must be online */ + cpumask_andnot(offline_mask, cpu_present_mask, + cpu_online_mask); + rc = rtas_online_cpus_mask(offline_mask); + if (rc) { + pr_err("%s: Could not bring present CPUs online.\n", + __func__); + goto out; + } + stop_topology_update(); rc = pm_suspend(PM_SUSPEND_MEM); start_topology_update(); + + /* Take down CPUs not online prior to suspend */ + if (!rtas_offline_cpus_mask(offline_mask)) + pr_warn("%s: Could not restore CPUs to offline " + "state.\n", __func__); } stream_id = 0; if (!rc) rc = count; +out: + free_cpumask_var(offline_mask); return rc; } -- cgit v1.2.3 From b80ec3dc8e100f6c8b27c02cbb6079d401c3a054 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 May 2013 18:43:39 +0000 Subject: powerpc: Make CONFIG_RTAS_PROC depend on CONFIG_PROC_FS We are getting build errors with CONFIG_PROC_FS=n: arch/powerpc/kernel/rtas_flash.c In function 'rtas_flash_init': 745:33: error: unused variable 'f' [-Werror=unused-variable] But rtas_flash.c should not be built when CONFIG_PROC_FS=n, beacause all it does is provide a /proc interface to the RTAS flash routines. CONFIG_RTAS_FLASH already depends on CONFIG_RTAS_PROC, to indicate that it depends on the RTAS proc support, but CONFIG_RTAS_PROC does not depend on CONFIG_PROC_FS. So fix that. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 34d224be93b..632444f1555 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -128,7 +128,7 @@ config PPC_RTAS_DAEMON config RTAS_PROC bool "Proc interface to RTAS" - depends on PPC_RTAS + depends on PPC_RTAS && PROC_FS default y config RTAS_FLASH -- cgit v1.2.3 From af945cf4bfc6586f0739b8c7f67af75576dec25e Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 6 May 2013 22:44:41 +0000 Subject: powerpc: Fix MAX_STACK_TRACE_ENTRIES too low warning again Saw this warning again, and this time from the ret_from_fork path. It seems we could clear the back chain earlier in copy_thread(), which could cover both path, and also fix potential lockdep usage in schedule_tail(), or exception occurred before we clear the back chain. Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_32.S | 2 -- arch/powerpc/kernel/entry_64.S | 2 -- arch/powerpc/kernel/process.c | 1 + 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e514de57a12..d22e73e4618 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -439,8 +439,6 @@ ret_from_fork: ret_from_kernel_thread: REST_NVGPRS(r1) bl schedule_tail - li r3,0 - stw r3,0(r1) mtlr r14 mr r3,r15 PPC440EP_ERR42 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3fe5259e2fe..48e8a86e7cb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -376,8 +376,6 @@ _GLOBAL(ret_from_fork) _GLOBAL(ret_from_kernel_thread) bl .schedule_tail REST_NVGPRS(r1) - li r3,0 - std r3,0(r1) ld r14, 0(r14) mtlr r14 mr r3,r15 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ceb4e7b62cf..21981a89e39 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -971,6 +971,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ + ((unsigned long *)sp)[0] = 0; sp -= sizeof(struct pt_regs); kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; -- cgit v1.2.3 From 75b93da43aa1132bf23dafbb4badb028ccf78129 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 May 2013 15:10:02 +1000 Subject: powerpc/powernv: Detect OPAL v3 API version Future firmwares will support that new version Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/firmware.h | 4 +++- arch/powerpc/include/asm/opal.h | 3 ++- arch/powerpc/platforms/powernv/opal.c | 6 +++++- arch/powerpc/platforms/powernv/setup.c | 4 +++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0df54646f96..681bc0314b6 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) +#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -69,7 +70,8 @@ enum { FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 | + FW_FEATURE_OPALv3, FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index b2906adb89d..cbb9305ab15 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -243,7 +243,8 @@ enum OpalMCE_TlbErrorType { enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, - OPAL_THREAD_STARTED = 0x1 + OPAL_THREAD_STARTED = 0x1, + OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */ }; enum OpalPciBusCompare { diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 27907cb18b8..628c564cead 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -56,7 +56,11 @@ int __init early_init_dt_scan_opal(unsigned long node, opal.entry, entryp, entrysz); powerpc_firmware_features |= FW_FEATURE_OPAL; - if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { + if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { + powerpc_firmware_features |= FW_FEATURE_OPALv2; + powerpc_firmware_features |= FW_FEATURE_OPALv3; + printk("OPAL V3 detected !\n"); + } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; printk("OPAL V2 detected !\n"); } else { diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index c20381caaa3..d4459bfc92f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -78,7 +78,9 @@ static void pnv_show_cpuinfo(struct seq_file *m) if (root) model = of_get_property(root, "model", NULL); seq_printf(m, "machine\t\t: PowerNV %s\n", model); - if (firmware_has_feature(FW_FEATURE_OPALv2)) + if (firmware_has_feature(FW_FEATURE_OPALv3)) + seq_printf(m, "firmware\t: OPAL v3\n"); + else if (firmware_has_feature(FW_FEATURE_OPALv2)) seq_printf(m, "firmware\t: OPAL v2\n"); else if (firmware_has_feature(FW_FEATURE_OPAL)) seq_printf(m, "firmware\t: OPAL v1\n"); -- cgit v1.2.3 From b2b48584dfc69a5ebc70ee7976524d7122e5df5f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 May 2013 15:12:31 +1000 Subject: powerpc/powernv: Fix starting of secondary CPUs on OPALv2 and v3 The current code fails to handle kexec on OPALv2. This fixes it and adds code to improve the situation on OPALv3 where we can query the CPU status from the firmware and decide what to do based on that. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/smp.c | 62 ++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 6a3ecca5b72..88c9459c3e0 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -71,18 +71,68 @@ int pnv_smp_kick_cpu(int nr) BUG_ON(nr < 0 || nr >= NR_CPUS); - /* On OPAL v2 the CPU are still spinning inside OPAL itself, - * get them back now + /* + * If we already started or OPALv2 is not supported, we just + * kick the CPU via the PACA */ - if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) { - pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); - rc = opal_start_cpu(pcpu, start_here); + if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2)) + goto kick; + + /* + * At this point, the CPU can either be spinning on the way in + * from kexec or be inside OPAL waiting to be started for the + * first time. OPAL v3 allows us to query OPAL to know if it + * has the CPUs, so we do that + */ + if (firmware_has_feature(FW_FEATURE_OPALv3)) { + uint8_t status; + + rc = opal_query_cpu_status(pcpu, &status); if (rc != OPAL_SUCCESS) { - pr_warn("OPAL Error %ld starting CPU %d\n", + pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr); return -ENODEV; } + + /* + * Already started, just kick it, probably coming from + * kexec and spinning + */ + if (status == OPAL_THREAD_STARTED) + goto kick; + + /* + * Available/inactive, let's kick it + */ + if (status == OPAL_THREAD_INACTIVE) { + pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", + nr, pcpu); + rc = opal_start_cpu(pcpu, start_here); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld starting CPU %d\n", + rc, nr); + return -ENODEV; + } + } else { + /* + * An unavailable CPU (or any other unknown status) + * shouldn't be started. It should also + * not be in the possible map but currently it can + * happen + */ + pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" + " (status %d)...\n", nr, pcpu, status); + return -ENODEV; + } + } else { + /* + * On OPAL v2, we just kick it and hope for the best, + * we must not test the error from opal_start_cpu() or + * we would fail to get CPUs from kexec. + */ + opal_start_cpu(pcpu, start_here); } + kick: return smp_generic_kick_cpu(nr); } -- cgit v1.2.3 From ca9d7aea5952c5e90bfe28ed738a7e962c0adad2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 13 May 2013 00:23:38 +0000 Subject: powerpc: Provide __bswapdi2 Some versions of GCC apparently expect this to be provided by libgcc. Updates from Mikey to fix 32 bit version and adding "r" to registers. Signed-off-by: David Woodhouse Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/misc_32.S | 11 +++++++++++ arch/powerpc/kernel/misc_64.S | 11 +++++++++++ arch/powerpc/kernel/ppc_ksyms.c | 3 ++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 19e096bd0e7..e469f30e6ee 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2) li r3,2 blr +_GLOBAL(__bswapdi2) + rotlwi r9,r4,8 + rotlwi r10,r3,8 + rlwimi r9,r4,24,0,7 + rlwimi r10,r3,24,0,7 + rlwimi r9,r4,24,16,23 + rlwimi r10,r3,24,16,23 + mr r3,r9 + mr r4,r10 + blr + _GLOBAL(abs) srawi r4,r3,31 xor r3,r3,r4 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 5cfa8008693..6820e45f557 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -234,6 +234,17 @@ _GLOBAL(__flush_dcache_icache) isync blr +_GLOBAL(__bswapdi2) + srdi r8,r3,32 + rlwinm r7,r3,8,0xffffffff + rlwimi r7,r3,24,0,7 + rlwinm r9,r8,8,0xffffffff + rlwimi r7,r3,24,16,23 + rlwimi r9,r8,24,0,7 + rlwimi r9,r8,24,16,23 + sldi r7,r7,32 + or r3,r7,r9 + blr #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) /* diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766fd79..c2966658699 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3); int __ucmpdi2(unsigned long long, unsigned long long); EXPORT_SYMBOL(__ucmpdi2); #endif - +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); -- cgit v1.2.3 From dcb615aef988b57deef3d9b4557ff20f681a82b0 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 13 May 2013 00:57:49 +0000 Subject: powerpc: Fix irq_set_affinity() return values Signed-off-by: Alexander Gordeev Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/wsp/ics.c | 2 +- arch/powerpc/sysdev/ehv_pic.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/xics/ics-opal.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 97fe82ee863..2d3b1dd9571 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -361,7 +361,7 @@ static int wsp_chip_set_affinity(struct irq_data *d, xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); wsp_ics_set_xive(ics, hw_irq, xive); - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip wsp_irq_chip = { diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 6e0e1005227..9cd0e60716f 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -81,7 +81,7 @@ int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, ev_int_set_config(src, config, prio, cpuid); spin_unlock_irqrestore(&ehv_pic_lock, flags); - return 0; + return IRQ_SET_MASK_OK; } static unsigned int ehv_pic_type_to_vecpri(unsigned int type) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ee21b5e71ae..0a13ecb270c 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -836,7 +836,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, mpic_physmask(mask)); } - return 0; + return IRQ_SET_MASK_OK; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index f7e8609df0d..39d72212655 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -148,7 +148,7 @@ static int ics_opal_set_affinity(struct irq_data *d, __func__, d->irq, hw_irq, server, rc); return -1; } - return 0; + return IRQ_SET_MASK_OK; } static struct irq_chip ics_opal_irq_chip = { -- cgit v1.2.3 From 6cecf76b47ba6bea3c81d170afc2e0b244e5849c Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 13 May 2013 14:14:53 +0000 Subject: powerpc/booke64: Fix kernel hangs at kernel_dbg_exc MSR_DE is not cleared on entry to the kernel, and we don't clear it explicitly outside of debug code. If we have MSR_DE set in prime_debug_regs(), and the new thread has events enabled in DBCR0 (e.g. ICMP is set in thread->dbsr0, even though it was cleared in the real DBCR0 when the thread got scheduled out), we'll end up taking a debug exception in the kernel when DBCR0 is loaded. DSRR0 will not point to an exception vector, and the kernel ends up hanging at kernel_dbg_exc. Fix this by always clearing MSR_DE when we load new debug state. Another observed source of kernel_dbg_exc hangs is with the branch taken event. If this event is active, but we take a non-debug trap (e.g. a TLB miss or an asynchronous interrupt) before the next branch. We end up taking a branch-taken debug exception on the initial branch instruction of the exception vector, but because the debug exception is DBSR_BT rather than DBSR_IC we branch to kernel_dbg_exc before even checking the DSRR0 address. Fix this by checking for DBSR_BT as well as DBSR_IC, which is what 32-bit does and what the comments suggest was intended in the 64-bit code as well. Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64e.S | 8 ++++---- arch/powerpc/kernel/process.c | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 42a756eec9f..645170a07ad 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -500,7 +500,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_CSRR1,r11 @@ -555,7 +555,7 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR /* check single-step/branch taken */ - andis. r15,r14,DBSR_IC@h + andis. r15,r14,(DBSR_IC|DBSR_BT)@h beq+ 1f LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) @@ -566,7 +566,7 @@ kernel_dbg_exc: bge+ cr1,1f /* here it looks like we got an inappropriate debug exception. */ - lis r14,DBSR_IC@h /* clear the IC event */ + lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ mtspr SPRN_DBSR,r14 mtspr SPRN_DSRR1,r11 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 21981a89e39..a902723fdc6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -339,6 +339,13 @@ static void set_debug_reg_defaults(struct thread_struct *thread) static void prime_debug_regs(struct thread_struct *thread) { + /* + * We could have inherited MSR_DE from userspace, since + * it doesn't get cleared on exception entry. Make sure + * MSR_DE is clear before we enable any debug events. + */ + mtmsr(mfmsr() & ~MSR_DE); + mtspr(SPRN_IAC1, thread->iac1); mtspr(SPRN_IAC2, thread->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 -- cgit v1.2.3 From 22ecbe8dcef4f6bf80ed4358600e8f6e03105c5c Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 13 May 2013 16:16:40 +0000 Subject: powerpc: Syscall hooks for context tracking subsystem This is the syscall slow path hooks for context tracking subsystem, corresponding to [PATCH] x86: Syscall hooks for userspace RCU extended QS commit bf5a3c13b939813d28ce26c01425054c740d6731 TIF_MEMDIE is moved to the second 16-bits (with value 17), as it seems there is no asm code using it. TIF_NOHZ is added to _TIF_SYCALL_T_OR_A, so it is better for it to be in the same 16 bits with others in the group, so in the asm code, andi. with this group could work. Signed-off-by: Li Zhong Acked-by: Frederic Weisbecker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/thread_info.h | 7 +++++-- arch/powerpc/kernel/ptrace.c | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8ceea14d6fe..ba7b1973866 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -97,7 +97,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_MEMDIE 9 /* is terminating due to OOM killer */ +#define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< #include #include +#include #include #include @@ -1788,6 +1789,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + user_exit(); + secure_computing_strict(regs->gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && @@ -1832,4 +1835,6 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + user_enter(); } -- cgit v1.2.3 From ba12eedee321eeb5baecaada285daeb3462c35f5 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 13 May 2013 16:16:41 +0000 Subject: powerpc: Exception hooks for context tracking subsystem This is the exception hooks for context tracking subsystem, including data access, program check, single step, instruction breakpoint, machine check, alignment, fp unavailable, altivec assist, unknown exception, whose handlers might use RCU. This patch corresponds to [PATCH] x86: Exception hooks for userspace RCU extended QS commit 6ba3c97a38803883c2eee489505796cb0a727122 But after the exception handling moved to generic code, and some changes in following two commits: 56dd9470d7c8734f055da2a6bac553caf4a468eb context_tracking: Move exception handling to generic code 6c1e0256fad84a843d915414e4b5973b7443d48d context_tracking: Restore correct previous context state on exception exit it is able for exception hooks to use the generic code above instead of a redundant arch implementation. Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/traps.c | 80 +++++++++++++++++++++++++++++------------ arch/powerpc/mm/fault.c | 41 +++++++++++++-------- arch/powerpc/mm/hash_utils_64.c | 36 ++++++++++++++----- 3 files changed, 112 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 83efa2f7d92..a7a648f6b75 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -667,6 +668,7 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int recover = 0; __get_cpu_var(irq_stat).mce_exceptions++; @@ -683,7 +685,7 @@ void machine_check_exception(struct pt_regs *regs) recover = cur_cpu_spec->machine_check(regs); if (recover > 0) - return; + goto bail; #if defined(CONFIG_8xx) && defined(CONFIG_PCI) /* the qspan pci read routines can cause machine checks -- Cort @@ -693,20 +695,23 @@ void machine_check_exception(struct pt_regs *regs) * -- BenH */ bad_page_fault(regs, regs->dar, SIGBUS); - return; + goto bail; #endif if (debugger_fault_handler(regs)) - return; + goto bail; if (check_io_access(regs)) - return; + goto bail; die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable Machine check"); + +bail: + exception_exit(prev_state); } void SMIException(struct pt_regs *regs) @@ -716,20 +721,29 @@ void SMIException(struct pt_regs *regs) void unknown_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, 0, 0); + + exception_exit(prev_state); } void instruction_breakpoint_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_iabr_match(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + +bail: + exception_exit(prev_state); } void RunModeException(struct pt_regs *regs) @@ -739,15 +753,20 @@ void RunModeException(struct pt_regs *regs) void __kprobes single_step_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + clear_single_step(regs); if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (debugger_sstep(regs)) - return; + goto bail; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + +bail: + exception_exit(prev_state); } /* @@ -1005,6 +1024,7 @@ int is_valid_bugaddr(unsigned long addr) void __kprobes program_check_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); extern int do_mathemu(struct pt_regs *regs); @@ -1014,26 +1034,26 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); - return; + goto bail; } if (reason & REASON_TRAP) { /* Debugger is first in line to stop recursive faults in * rcu_lock, notify_die, or atomic_notifier_call_chain */ if (debugger_bpt(regs)) - return; + goto bail; /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - return; + goto bail; if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; + goto bail; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1049,7 +1069,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) if (!user_mode(regs) && report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { regs->nip += 4; - return; + goto bail; } /* If usermode caused this, it's done something illegal and * gets a SIGILL slap on the wrist. We call it an illegal @@ -1059,7 +1079,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) */ if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); - return; + goto bail; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " "at %lx (msr 0x%x)\n", regs->nip, reason); @@ -1083,16 +1103,16 @@ void __kprobes program_check_exception(struct pt_regs *regs) switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); - return; + goto bail; case 1: { int code = 0; code = __parse_fpscr(current->thread.fpscr.val); _exception(SIGFPE, regs, code, regs->nip); - return; + goto bail; } case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } /* fall through on any other errors */ #endif /* CONFIG_MATH_EMULATION */ @@ -1103,10 +1123,10 @@ void __kprobes program_check_exception(struct pt_regs *regs) case 0: regs->nip += 4; emulate_single_step(regs); - return; + goto bail; case -EFAULT: _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; + goto bail; } } @@ -1114,10 +1134,14 @@ void __kprobes program_check_exception(struct pt_regs *regs) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + +bail: + exception_exit(prev_state); } void alignment_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; /* We restore the interrupt state now */ @@ -1131,7 +1155,7 @@ void alignment_exception(struct pt_regs *regs) if (fixed == 1) { regs->nip += 4; /* skip over emulated instruction */ emulate_single_step(regs); - return; + goto bail; } /* Operand address was bad */ @@ -1146,6 +1170,9 @@ void alignment_exception(struct pt_regs *regs) _exception(sig, regs, code, regs->dar); else bad_page_fault(regs, regs->dar, sig); + +bail: + exception_exit(prev_state); } void StackOverflow(struct pt_regs *regs) @@ -1174,23 +1201,32 @@ void trace_syscall(struct pt_regs *regs) void kernel_fp_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); + + exception_exit(prev_state); } void altivec_unavailable_exception(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; + goto bail; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); + +bail: + exception_exit(prev_state); } void vsx_unavailable_exception(struct pt_regs *regs) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 229951ffc35..8726779e140 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + enum ctx_state prev_state = exception_enter(); struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, int trap = TRAP(regs); int is_exec = trap == 0x400; int fault; + int rc = 0; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int rc = acop_handle_fault(regs, address, error_code); + rc = acop_handle_fault(regs, address, error_code); if (rc) - return rc; + goto bail; } #endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) - return 0; + goto bail; if (unlikely(debugger_fault_handler(regs))) - return 0; + goto bail; /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (address >= TASK_SIZE)) - return SIGSEGV; + if (!user_mode(regs) && (address >= TASK_SIZE)) { + rc = SIGSEGV; + goto bail; + } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { /* breakpoint match */ do_break(regs, address, error_code); - return 0; + goto bail; } #endif @@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, local_irq_enable(); if (in_atomic() || mm == NULL) { - if (!user_mode(regs)) - return SIGSEGV; + if (!user_mode(regs)) { + rc = SIGSEGV; + goto bail; + } /* in_atomic() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " @@ -417,9 +424,11 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - int rc = mm_fault_error(regs, address, fault); + rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) - return rc; + goto bail; + else + rc = 0; } /* @@ -454,7 +463,7 @@ good_area: } up_read(&mm->mmap_sem); - return 0; + goto bail; bad_area: up_read(&mm->mmap_sem); @@ -463,7 +472,7 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); - return 0; + goto bail; } if (is_exec && (error_code & DSISR_PROTFAULT)) @@ -471,7 +480,11 @@ bad_area_nosemaphore: " page (%lx) - exploit attempt? (uid: %d)\n", address, from_kuid(&init_user_ns, current_uid())); - return SIGSEGV; + rc = SIGSEGV; + +bail: + exception_exit(prev_state); + return rc; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88ac0eeaadd..e303a6d74e3 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -954,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access, */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { + enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; struct mm_struct *mm; @@ -973,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); - return 1; + rc = 1; + goto bail; } psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); @@ -992,19 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Not a valid range * Send the problem up to do_page_fault */ - return 1; + rc = 1; + goto bail; } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); /* Bad address. */ if (!vsid) { DBG_LOW("Bad address!\n"); - return 1; + rc = 1; + goto bail; } /* Get pgdir */ pgdir = mm->pgd; - if (pgdir == NULL) - return 1; + if (pgdir == NULL) { + rc = 1; + goto bail; + } /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); @@ -1027,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); - return 1; + rc = 1; + goto bail; } /* Add _PAGE_PRESENT to the required access perm */ @@ -1038,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) */ if (access & ~pte_val(*ptep)) { DBG_LOW(" no access !\n"); - return 1; + rc = 1; + goto bail; } #ifdef CONFIG_HUGETLB_PAGE - if (hugeshift) - return __hash_page_huge(ea, access, vsid, ptep, trap, local, + if (hugeshift) { + rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, ssize, hugeshift, psize); + goto bail; + } #endif /* CONFIG_HUGETLB_PAGE */ #ifndef CONFIG_PPC_64K_PAGES @@ -1124,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) pte_val(*(ptep + PTRS_PER_PTE))); #endif DBG_LOW(" -> rc=%d\n", rc); + +bail: + exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page); @@ -1259,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local) */ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) { + enum ctx_state prev_state = exception_enter(); + if (user_mode(regs)) { #ifdef CONFIG_PPC_SUBPAGE_PROT if (rc == -2) @@ -1268,6 +1284,8 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) _exception(SIGBUS, regs, BUS_ADRERR, address); } else bad_page_fault(regs, address, SIGBUS); + + exception_exit(prev_state); } long hpte_insert_repeating(unsigned long hash, unsigned long vpn, -- cgit v1.2.3 From 106ed886abc46723c290e82e5f161d9b40820203 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 13 May 2013 16:16:42 +0000 Subject: powerpc: Exit user context on notify resume This patch allows RCU usage in do_notify_resume, e.g. signal handling. It corresponds to [PATCH] x86: Exit RCU extended QS on notify resume commit edf55fda35c7dc7f2d9241c3abaddaf759b457c6 Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/signal.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf12eae02de..d63b5024c56 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,8 @@ static int do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { + user_exit(); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); @@ -169,4 +172,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + user_enter(); } -- cgit v1.2.3 From 5d1c57451154047ab31aadfbc712bb8aa174bfb3 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 13 May 2013 16:16:43 +0000 Subject: powerpc: Use the new schedule_user API on userspace preemption This patch corresponds to [PATCH] x86: Use the new schedule_user API on userspace preemption commit 0430499ce9d78691f3985962021b16bf8f8a8048 Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/context_tracking.h | 10 ++++++++++ arch/powerpc/kernel/entry_64.S | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/context_tracking.h diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h new file mode 100644 index 00000000000..b6f5a33b8ee --- /dev/null +++ b/arch/powerpc/include/asm/context_tracking.h @@ -0,0 +1,10 @@ +#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H +#define _ASM_POWERPC_CONTEXT_TRACKING_H + +#ifdef CONFIG_CONTEXT_TRACKING +#define SCHEDULE_USER bl .schedule_user +#else +#define SCHEDULE_USER bl .schedule +#endif + +#endif diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 48e8a86e7cb..794889b588c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -33,6 +33,7 @@ #include #include #include +#include /* * System calls. @@ -632,7 +633,7 @@ _GLOBAL(ret_from_except_lite) andi. r0,r4,_TIF_NEED_RESCHED beq 1f bl .restore_interrupts - bl .schedule + SCHEDULE_USER b .ret_from_except_lite 1: bl .save_nvgprs -- cgit v1.2.3 From a1797b2fd2051515689fd54a9db8fd20ebd5e5f7 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 13 May 2013 16:16:44 +0000 Subject: powerpc: select HAVE_CONTEXT_TRACKING for pSeries Start context tracking support from pSeries. Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 9a0941bc4d3..023b288f895 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -18,6 +18,7 @@ config PPC_PSERIES select PPC_PCI_CHOICE if EXPERT select ZLIB_DEFLATE select PPC_DOORBELL + select HAVE_CONTEXT_TRACKING default y config PPC_SPLPAR -- cgit v1.2.3 From d52f2dc40b52201700001e868093c5ec827a8f33 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 13 May 2013 18:44:56 +0000 Subject: powerpc/perf: Move BHRB code into CONFIG_PPC64 region The new Branch History Rolling buffer (BHRB) code is only useful on 64bit processors, so move it into the #ifdef CONFIG_PPC64 region. This avoids code bloat on 32bit systems. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/perf/core-book3s.c | 248 ++++++++++++++++++++-------------------- 1 file changed, 127 insertions(+), 121 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index c627843c5b2..843bb8be838 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -100,6 +100,10 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } +static inline void power_pmu_bhrb_enable(struct perf_event *event) {} +static inline void power_pmu_bhrb_disable(struct perf_event *event) {} +void power_pmu_flush_branch_stack(void) {} +static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} #endif /* CONFIG_PPC32 */ static bool regs_use_siar(struct pt_regs *regs) @@ -308,6 +312,129 @@ static inline int siar_valid(struct pt_regs *regs) return 1; } + +/* Reset all possible BHRB entries */ +static void power_pmu_bhrb_reset(void) +{ + asm volatile(PPC_CLRBHRB); +} + +static void power_pmu_bhrb_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + /* Clear BHRB if we changed task context to avoid data leaks */ + if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { + power_pmu_bhrb_reset(); + cpuhw->bhrb_context = event->ctx; + } + cpuhw->bhrb_users++; +} + +static void power_pmu_bhrb_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!ppmu->bhrb_nr) + return; + + cpuhw->bhrb_users--; + WARN_ON_ONCE(cpuhw->bhrb_users < 0); + + if (!cpuhw->disabled && !cpuhw->bhrb_users) { + /* BHRB cannot be turned off when other + * events are active on the PMU. + */ + + /* avoid stale pointer */ + cpuhw->bhrb_context = NULL; + } +} + +/* Called from ctxsw to prevent one process's branch entries to + * mingle with the other process's entries during context switch. + */ +void power_pmu_flush_branch_stack(void) +{ + if (ppmu->bhrb_nr) + power_pmu_bhrb_reset(); +} + + +/* Processing BHRB entries */ +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +{ + u64 val; + u64 addr; + int r_index, u_index, target, pred; + + r_index = 0; + u_index = 0; + while (r_index < ppmu->bhrb_nr) { + /* Assembly read function */ + val = read_bhrb(r_index); + + /* Terminal marker: End of valid BHRB entries */ + if (val == 0) { + break; + } else { + /* BHRB field break up */ + addr = val & BHRB_EA; + pred = val & BHRB_PREDICTION; + target = val & BHRB_TARGET; + + /* Probable Missed entry: Not applicable for POWER8 */ + if ((addr == 0) && (target == 0) && (pred == 1)) { + r_index++; + continue; + } + + /* Real Missed entry: Power8 based missed entry */ + if ((addr == 0) && (target == 1) && (pred == 1)) { + r_index++; + continue; + } + + /* Reserved condition: Not a valid entry */ + if ((addr == 0) && (target == 1) && (pred == 0)) { + r_index++; + continue; + } + + /* Is a target address */ + if (val & BHRB_TARGET) { + /* First address cannot be a target address */ + if (r_index == 0) { + r_index++; + continue; + } + + /* Update target address for the previous entry */ + cpuhw->bhrb_entries[u_index - 1].to = addr; + cpuhw->bhrb_entries[u_index - 1].mispred = pred; + cpuhw->bhrb_entries[u_index - 1].predicted = ~pred; + + /* Dont increment u_index */ + r_index++; + } else { + /* Update address, flags for current entry */ + cpuhw->bhrb_entries[u_index].from = addr; + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; + + /* Successfully popullated one entry */ + u_index++; + r_index++; + } + } + } + cpuhw->bhrb_stack.nr = u_index; + return; +} + #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -904,47 +1031,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* Reset all possible BHRB entries */ -static void power_pmu_bhrb_reset(void) -{ - asm volatile(PPC_CLRBHRB); -} - -void power_pmu_bhrb_enable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - /* Clear BHRB if we changed task context to avoid data leaks */ - if (event->ctx->task && cpuhw->bhrb_context != event->ctx) { - power_pmu_bhrb_reset(); - cpuhw->bhrb_context = event->ctx; - } - cpuhw->bhrb_users++; -} - -void power_pmu_bhrb_disable(struct perf_event *event) -{ - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - - if (!ppmu->bhrb_nr) - return; - - cpuhw->bhrb_users--; - WARN_ON_ONCE(cpuhw->bhrb_users < 0); - - if (!cpuhw->disabled && !cpuhw->bhrb_users) { - /* BHRB cannot be turned off when other - * events are active on the PMU. - */ - - /* avoid stale pointer */ - cpuhw->bhrb_context = NULL; - } -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -1180,15 +1266,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -/* Called from ctxsw to prevent one process's branch entries to - * mingle with the other process's entries during context switch. - */ -void power_pmu_flush_branch_stack(void) -{ - if (ppmu->bhrb_nr) - power_pmu_bhrb_reset(); -} - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1458,77 +1535,6 @@ struct pmu power_pmu = { .flush_branch_stack = power_pmu_flush_branch_stack, }; -/* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) -{ - u64 val; - u64 addr; - int r_index, u_index, target, pred; - - r_index = 0; - u_index = 0; - while (r_index < ppmu->bhrb_nr) { - /* Assembly read function */ - val = read_bhrb(r_index); - - /* Terminal marker: End of valid BHRB entries */ - if (val == 0) { - break; - } else { - /* BHRB field break up */ - addr = val & BHRB_EA; - pred = val & BHRB_PREDICTION; - target = val & BHRB_TARGET; - - /* Probable Missed entry: Not applicable for POWER8 */ - if ((addr == 0) && (target == 0) && (pred == 1)) { - r_index++; - continue; - } - - /* Real Missed entry: Power8 based missed entry */ - if ((addr == 0) && (target == 1) && (pred == 1)) { - r_index++; - continue; - } - - /* Reserved condition: Not a valid entry */ - if ((addr == 0) && (target == 1) && (pred == 0)) { - r_index++; - continue; - } - - /* Is a target address */ - if (val & BHRB_TARGET) { - /* First address cannot be a target address */ - if (r_index == 0) { - r_index++; - continue; - } - - /* Update target address for the previous entry */ - cpuhw->bhrb_entries[u_index - 1].to = addr; - cpuhw->bhrb_entries[u_index - 1].mispred = pred; - cpuhw->bhrb_entries[u_index - 1].predicted = ~pred; - - /* Dont increment u_index */ - r_index++; - } else { - /* Update address, flags for current entry */ - cpuhw->bhrb_entries[u_index].from = addr; - cpuhw->bhrb_entries[u_index].mispred = pred; - cpuhw->bhrb_entries[u_index].predicted = ~pred; - - /* Successfully popullated one entry */ - u_index++; - r_index++; - } - } - } - cpuhw->bhrb_stack.nr = u_index; - return; -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled -- cgit v1.2.3 From 506e70d13236dfdb0bc15e0914298ab0a7b7f4df Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 13 May 2013 18:44:57 +0000 Subject: powerpc/pmu: Fix order of interpreting BHRB target entries The current Branch History Rolling Buffer (BHRB) code misinterprets the order of entries in the hardware buffer. It assumes that a branch target address will be read _after_ its corresponding branch. In reality the branch target comes before (lower mfbhrb entry) it's corresponding branch. This is a rewrite of the code to take this into account. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/perf/core-book3s.c | 89 +++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 843bb8be838..3fdfe4575b8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -363,72 +363,75 @@ void power_pmu_flush_branch_stack(void) power_pmu_bhrb_reset(); } - /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; - int r_index, u_index, target, pred; + int r_index, u_index, pred; r_index = 0; u_index = 0; while (r_index < ppmu->bhrb_nr) { /* Assembly read function */ - val = read_bhrb(r_index); - - /* Terminal marker: End of valid BHRB entries */ - if (val == 0) { + val = read_bhrb(r_index++); + if (!val) + /* Terminal marker: End of valid BHRB entries */ break; - } else { - /* BHRB field break up */ + else { addr = val & BHRB_EA; pred = val & BHRB_PREDICTION; - target = val & BHRB_TARGET; - /* Probable Missed entry: Not applicable for POWER8 */ - if ((addr == 0) && (target == 0) && (pred == 1)) { - r_index++; + if (!addr) + /* invalid entry */ continue; - } - /* Real Missed entry: Power8 based missed entry */ - if ((addr == 0) && (target == 1) && (pred == 1)) { - r_index++; - continue; - } - - /* Reserved condition: Not a valid entry */ - if ((addr == 0) && (target == 1) && (pred == 0)) { - r_index++; - continue; - } + /* Branches are read most recent first (ie. mfbhrb 0 is + * the most recent branch). + * There are two types of valid entries: + * 1) a target entry which is the to address of a + * computed goto like a blr,bctr,btar. The next + * entry read from the bhrb will be branch + * corresponding to this target (ie. the actual + * blr/bctr/btar instruction). + * 2) a from address which is an actual branch. If a + * target entry proceeds this, then this is the + * matching branch for that target. If this is not + * following a target entry, then this is a branch + * where the target is given as an immediate field + * in the instruction (ie. an i or b form branch). + * In this case we need to read the instruction from + * memory to determine the target/to address. + */ - /* Is a target address */ if (val & BHRB_TARGET) { - /* First address cannot be a target address */ - if (r_index == 0) { - r_index++; - continue; - } - - /* Update target address for the previous entry */ - cpuhw->bhrb_entries[u_index - 1].to = addr; - cpuhw->bhrb_entries[u_index - 1].mispred = pred; - cpuhw->bhrb_entries[u_index - 1].predicted = ~pred; + /* Target branches use two entries + * (ie. computed gotos/XL form) + */ + cpuhw->bhrb_entries[u_index].to = addr; + cpuhw->bhrb_entries[u_index].mispred = pred; + cpuhw->bhrb_entries[u_index].predicted = ~pred; - /* Dont increment u_index */ - r_index++; + /* Get from address in next entry */ + val = read_bhrb(r_index++); + addr = val & BHRB_EA; + if (val & BHRB_TARGET) { + /* Shouldn't have two targets in a + row.. Reset index and try again */ + r_index--; + addr = 0; + } + cpuhw->bhrb_entries[u_index].from = addr; } else { - /* Update address, flags for current entry */ + /* Branches to immediate field + (ie I or B form) */ cpuhw->bhrb_entries[u_index].from = addr; + cpuhw->bhrb_entries[u_index].to = 0; cpuhw->bhrb_entries[u_index].mispred = pred; cpuhw->bhrb_entries[u_index].predicted = ~pred; - - /* Successfully popullated one entry */ - u_index++; - r_index++; } + u_index++; + } } cpuhw->bhrb_stack.nr = u_index; -- cgit v1.2.3 From 691231846cebfe1fbbcf898c8af17a569dbb5463 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 13 May 2013 18:44:58 +0000 Subject: powerpc/perf: Fix setting of "to" addresses for BHRB Currently we only set the "to" address in the branch stack when the CPU explicitly gives us a value. Unfortunately it only does this for XL form branches (eg blr, bctr, bctar) and not I and B form branches (eg b, bc). Fortunately if we read the instruction from memory we can extract the offset of a branch and calculate the target address. This adds a function power_pmu_bhrb_to() to calculate the target/to address of the corresponding I and B form branches. It handles branches in both user and kernel spaces. It also plumbs this into the perf brhb reading code. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/perf/core-book3s.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3fdfe4575b8..426180b8497 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -13,11 +13,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define BHRB_MAX_ENTRIES 32 #define BHRB_TARGET 0x0000000000000002 @@ -362,6 +364,32 @@ void power_pmu_flush_branch_stack(void) if (ppmu->bhrb_nr) power_pmu_bhrb_reset(); } +/* Calculate the to address for a branch */ +static __u64 power_pmu_bhrb_to(u64 addr) +{ + unsigned int instr; + int ret; + __u64 target; + + if (is_kernel_addr(addr)) + return branch_target((unsigned int *)addr); + + /* Userspace: need copy instruction here then translate it */ + pagefault_disable(); + ret = __get_user_inatomic(instr, (unsigned int __user *)addr); + if (ret) { + pagefault_enable(); + return 0; + } + pagefault_enable(); + + target = branch_target(&instr); + if ((!target) || (instr & BRANCH_ABSOLUTE)) + return target; + + /* Translate relative branch target from kernel to user address */ + return target - (unsigned long)&instr + addr; +} /* Processing BHRB entries */ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) @@ -426,7 +454,8 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) /* Branches to immediate field (ie I or B form) */ cpuhw->bhrb_entries[u_index].from = addr; - cpuhw->bhrb_entries[u_index].to = 0; + cpuhw->bhrb_entries[u_index].to = + power_pmu_bhrb_to(addr); cpuhw->bhrb_entries[u_index].mispred = pred; cpuhw->bhrb_entries[u_index].predicted = ~pred; } -- cgit v1.2.3 From e34166ad63eac4d0fa98b4c4ed7a98202a18faef Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 May 2013 17:02:11 +1000 Subject: powerpc: Set show_unhandled_signals to 1 by default Just like other architectures Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index d63b5024c56..577a8aa69c6 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -25,7 +25,7 @@ * through debug.exception-trace sysctl. */ -int show_unhandled_signals = 0; +int show_unhandled_signals = 1; /* * Allocate space for the signal frame -- cgit v1.2.3