From 99bcc0590806c4f7a4ecf1a11add335b56cde963 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 13:59:43 +0100 Subject: [PATCH] ARM: Add missed AAEC2000 file My scripts missed committing this file. Signed-off-by: Russell King --- arch/arm/mach-aaec2000/Makefile.boot | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/arm/mach-aaec2000/Makefile.boot (limited to 'arch') diff --git a/arch/arm/mach-aaec2000/Makefile.boot b/arch/arm/mach-aaec2000/Makefile.boot new file mode 100644 index 00000000000..8f5a8b7c53c --- /dev/null +++ b/arch/arm/mach-aaec2000/Makefile.boot @@ -0,0 +1 @@ + zreladdr-y := 0xf0008000 -- cgit v1.2.3 From 2ea83398b75309d8fdc999c4bb252e72d7e4fd9d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 14:04:05 +0100 Subject: [PATCH] ARM: Add VST idle loop call This call allows the dynamic tick support to reprogram the timer immediately before the CPU idles. Signed-off-by: Russell King --- arch/arm/kernel/process.c | 5 ++++- arch/arm/kernel/time.c | 10 +++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8f146a4b475..bbea636ff68 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include extern const char *processor_modes[]; extern void setup_mm_for_reboot(char mode); @@ -85,8 +86,10 @@ EXPORT_SYMBOL(pm_power_off); void default_idle(void) { local_irq_disable(); - if (!need_resched() && !hlt_counter) + if (!need_resched() && !hlt_counter) { + timer_dyn_reprogram(); arch_idle(); + } local_irq_enable(); } diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 06054c9ba07..1b7fcd50c3e 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -424,15 +424,19 @@ static int timer_dyn_tick_disable(void) return ret; } +/* + * Reprogram the system timer for at least the calculated time interval. + * This function should be called from the idle thread with IRQs disabled, + * immediately before sleeping. + */ void timer_dyn_reprogram(void) { struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; - unsigned long flags; - write_seqlock_irqsave(&xtime_lock, flags); + write_seqlock(&xtime_lock); if (dyn_tick->state & DYN_TICK_ENABLED) dyn_tick->reprogram(next_timer_interrupt() - jiffies); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_sequnlock(&xtime_lock); } static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf) -- cgit v1.2.3 From a343e6075a396e07eeff52c0da5629c8fd396be2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 14:08:56 +0100 Subject: [PATCH] ARM: Move PGD kernel page table initialisation It doesn't make sense to have the PGD kernel pointers initialisation separate from the PGD user pointers, especially when we clean the data cache over the whole range. Signed-off-by: Russell King --- arch/arm/mm/mm-armv.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index 2c2b93d77d4..d79864a0dfa 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -169,7 +169,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); + /* + * Copy over the kernel and IO PGD entries + */ init_pgd = pgd_offset_k(0); + memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, + (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); + + clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); if (!vectors_high()) { /* @@ -198,14 +205,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } - /* - * Copy over the kernel and IO PGD entries - */ - memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, - (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); - - clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - return new_pgd; no_pte: -- cgit v1.2.3 From a013053d4965d9a45300938e713a4b512e0257d8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 14:16:47 +0100 Subject: [PATCH] ARM: Move memmap freeing into init.c It doesn't make sense for this to be in mm-armv.c now that 26-bit ARM support is no longer integrated into arch/arm. Signed-off-by: Russell King --- arch/arm/mm/init.c | 71 +++++++++++++++++++++++++++++++++++++++++++++----- arch/arm/mm/mm-armv.c | 72 --------------------------------------------------- 2 files changed, 65 insertions(+), 78 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c08710b1ff0..6dcb23d64bf 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -522,6 +522,69 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s) printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); } +static inline void +free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + unsigned long pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn); + end_pg = pfn_to_page(end_pfn); + + /* + * Convert to physical addresses, and + * round start upwards and end downwards. + */ + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, + * free the section of the memmap array. + */ + if (pg < pgend) + free_bootmem_node(NODE_DATA(node), pg, pgend - pg); +} + +/* + * The mem_map array can get very big. Free the unused area of the memory map. + */ +static void __init free_unused_memmap_node(int node, struct meminfo *mi) +{ + unsigned long bank_start, prev_bank_end = 0; + unsigned int i; + + /* + * [FIXME] This relies on each bank being in address order. This + * may not be the case, especially if the user has provided the + * information on the command line. + */ + for (i = 0; i < mi->nr_banks; i++) { + if (mi->bank[i].size == 0 || mi->bank[i].node != node) + continue; + + bank_start = mi->bank[i].start >> PAGE_SHIFT; + if (bank_start < prev_bank_end) { + printk(KERN_ERR "MEM: unordered memory banks. " + "Not freeing memmap.\n"); + break; + } + + /* + * If we had a previous bank, and there is a space + * between the current bank and the previous, free it. + */ + if (prev_bank_end && prev_bank_end != bank_start) + free_memmap(node, prev_bank_end, bank_start); + + prev_bank_end = (mi->bank[i].start + + mi->bank[i].size) >> PAGE_SHIFT; + } +} + /* * mem_init() marks the free areas in the mem_map and tells us how much * memory is free. This is done after various parts of the system have @@ -540,16 +603,12 @@ void __init mem_init(void) max_mapnr = virt_to_page(high_memory) - mem_map; #endif - /* - * We may have non-contiguous memory. - */ - if (meminfo.nr_banks != 1) - create_memmap_holes(&meminfo); - /* this will put all unused low memory onto the freelists */ for_each_online_node(node) { pg_data_t *pgdat = NODE_DATA(node); + free_unused_memmap_node(node, &meminfo); + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); } diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index d79864a0dfa..052ab443ec4 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -697,75 +697,3 @@ void __init iotable_init(struct map_desc *io_desc, int nr) for (i = 0; i < nr; i++) create_mapping(io_desc + i); } - -static inline void -free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) -{ - struct page *start_pg, *end_pg; - unsigned long pg, pgend; - - /* - * Convert start_pfn/end_pfn to a struct page pointer. - */ - start_pg = pfn_to_page(start_pfn); - end_pg = pfn_to_page(end_pfn); - - /* - * Convert to physical addresses, and - * round start upwards and end downwards. - */ - pg = PAGE_ALIGN(__pa(start_pg)); - pgend = __pa(end_pg) & PAGE_MASK; - - /* - * If there are free pages between these, - * free the section of the memmap array. - */ - if (pg < pgend) - free_bootmem_node(NODE_DATA(node), pg, pgend - pg); -} - -static inline void free_unused_memmap_node(int node, struct meminfo *mi) -{ - unsigned long bank_start, prev_bank_end = 0; - unsigned int i; - - /* - * [FIXME] This relies on each bank being in address order. This - * may not be the case, especially if the user has provided the - * information on the command line. - */ - for (i = 0; i < mi->nr_banks; i++) { - if (mi->bank[i].size == 0 || mi->bank[i].node != node) - continue; - - bank_start = mi->bank[i].start >> PAGE_SHIFT; - if (bank_start < prev_bank_end) { - printk(KERN_ERR "MEM: unordered memory banks. " - "Not freeing memmap.\n"); - break; - } - - /* - * If we had a previous bank, and there is a space - * between the current bank and the previous, free it. - */ - if (prev_bank_end && prev_bank_end != bank_start) - free_memmap(node, prev_bank_end, bank_start); - - prev_bank_end = PAGE_ALIGN(mi->bank[i].start + - mi->bank[i].size) >> PAGE_SHIFT; - } -} - -/* - * The mem_map array can get very big. Free - * the unused area of the memory map. - */ -void __init create_memmap_holes(struct meminfo *mi) -{ - int node; - - for_each_online_node(node) - free_unused_memmap_node(node, mi); -} -- cgit v1.2.3 From f3bb742640338eca0d8c3fa8071df89168efbf0a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Jun 2005 14:49:10 +0100 Subject: [PATCH] ARM: Update mach-types Signed-off-by: Russell King --- arch/arm/tools/mach-types | 69 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 30c1dfbb052..6d3a79e5fef 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -6,7 +6,7 @@ # To add an entry into this database, please see Documentation/arm/README, # or contact rmk@arm.linux.org.uk # -# Last update: Thu Mar 24 14:34:50 2005 +# Last update: Thu Jun 23 20:19:33 2005 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -243,7 +243,7 @@ yoho ARCH_YOHO YOHO 231 jasper ARCH_JASPER JASPER 232 dsc25 ARCH_DSC25 DSC25 233 omap_innovator MACH_OMAP_INNOVATOR OMAP_INNOVATOR 234 -ramses ARCH_RAMSES RAMSES 235 +mnci ARCH_RAMSES RAMSES 235 s28x ARCH_S28X S28X 236 mport3 ARCH_MPORT3 MPORT3 237 pxa_eagle250 ARCH_PXA_EAGLE250 PXA_EAGLE250 238 @@ -323,7 +323,7 @@ nimbra29x ARCH_NIMBRA29X NIMBRA29X 311 nimbra210 ARCH_NIMBRA210 NIMBRA210 312 hhp_d95xx ARCH_HHP_D95XX HHP_D95XX 313 labarm ARCH_LABARM LABARM 314 -m825xx ARCH_M825XX M825XX 315 +comcerto ARCH_M825XX M825XX 315 m7100 SA1100_M7100 M7100 316 nipc2 ARCH_NIPC2 NIPC2 317 fu7202 ARCH_FU7202 FU7202 318 @@ -724,3 +724,66 @@ lpc22xx MACH_LPC22XX LPC22XX 715 omap_comet3 MACH_COMET3 COMET3 716 omap_comet4 MACH_COMET4 COMET4 717 csb625 MACH_CSB625 CSB625 718 +fortunet2 MACH_FORTUNET2 FORTUNET2 719 +s5h2200 MACH_S5H2200 S5H2200 720 +optorm920 MACH_OPTORM920 OPTORM920 721 +adsbitsyxb MACH_ADSBITSYXB ADSBITSYXB 722 +adssphere MACH_ADSSPHERE ADSSPHERE 723 +adsportal MACH_ADSPORTAL ADSPORTAL 724 +ln2410sbc MACH_LN2410SBC LN2410SBC 725 +cb3rufc MACH_CB3RUFC CB3RUFC 726 +mp2usb MACH_MP2USB MP2USB 727 +ntnp425c MACH_NTNP425C NTNP425C 728 +colibri MACH_COLIBRI COLIBRI 729 +pcm7220 MACH_PCM7220 PCM7220 730 +gateway7001 MACH_GATEWAY7001 GATEWAY7001 731 +pcm027 MACH_PCM027 PCM027 732 +cmpxa MACH_CMPXA CMPXA 733 +anubis MACH_ANUBIS ANUBIS 734 +ite8152 MACH_ITE8152 ITE8152 735 +lpc3xxx MACH_LPC3XXX LPC3XXX 736 +puppeteer MACH_PUPPETEER PUPPETEER 737 +vt001 MACH_MACH_VADATECH MACH_VADATECH 738 +e570 MACH_E570 E570 739 +x50 MACH_X50 X50 740 +recon MACH_RECON RECON 741 +xboardgp8 MACH_XBOARDGP8 XBOARDGP8 742 +fpic2 MACH_FPIC2 FPIC2 743 +akita MACH_AKITA AKITA 744 +a81 MACH_A81 A81 745 +svm_sc25x MACH_SVM_SC25X SVM_SC25X 746 +vt020 MACH_VADATECH020 VADATECH020 747 +tli MACH_TLI TLI 748 +edb9315lc MACH_EDB9315LC EDB9315LC 749 +passec MACH_PASSEC PASSEC 750 +ds_tiger MACH_DS_TIGER DS_TIGER 751 +e310 MACH_E310 E310 752 +e330 MACH_E330 E330 753 +rt3000 MACH_RT3000 RT3000 754 +nokia770 MACH_NOKIA770 NOKIA770 755 +pnx0106 MACH_PNX0106 PNX0106 756 +hx21xx MACH_HX21XX HX21XX 757 +faraday MACH_FARADAY FARADAY 758 +sbc9312 MACH_SBC9312 SBC9312 759 +batman MACH_BATMAN BATMAN 760 +jpd201 MACH_JPD201 JPD201 761 +mipsa MACH_MIPSA MIPSA 762 +kacom MACH_KACOM KACOM 763 +swarcocpu MACH_SWARCOCPU SWARCOCPU 764 +swarcodsl MACH_SWARCODSL SWARCODSL 765 +blueangel MACH_BLUEANGEL BLUEANGEL 766 +hairygrama MACH_HAIRYGRAMA HAIRYGRAMA 767 +banff MACH_BANFF BANFF 768 +carmeva MACH_CARMEVA CARMEVA 769 +sam255 MACH_SAM255 SAM255 770 +ppm10 MACH_PPM10 PPM10 771 +edb9315a MACH_EDB9315A EDB9315A 772 +sunset MACH_SUNSET SUNSET 773 +stargate2 MACH_STARGATE2 STARGATE2 774 +intelmote2 MACH_INTELMOTE2 INTELMOTE2 775 +trizeps4 MACH_TRIZEPS4 TRIZEPS4 776 +mainstone2 MACH_MAINSTONE2 MAINSTONE2 777 +ez_ixp42x MACH_EZ_IXP42X EZ_IXP42X 778 +tapwave_zodiac MACH_TAPWAVE_ZODIAC TAPWAVE_ZODIAC 779 +universalmeter MACH_UNIVERSALMETER UNIVERSALMETER 780 +hicoarm9 MACH_HICOARM9 HICOARM9 781 -- cgit v1.2.3 From 22e2c507c301c3dbbcf91b4948b88f78842ee6c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Jun 2005 10:55:12 +0200 Subject: [PATCH] Update cfq io scheduler to time sliced design This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 2 ++ arch/ia64/kernel/entry.S | 4 ++-- arch/ppc/kernel/misc.S | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 442a6e937b1..3db9a04aec6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -289,3 +289,5 @@ ENTRY(sys_call_table) .long sys_add_key .long sys_request_key .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get /* 290 */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index b1d5d3d5276..785a51b0ad8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1577,8 +1577,8 @@ sys_call_table: data8 sys_add_key data8 sys_request_key data8 sys_keyctl - data8 sys_ni_syscall - data8 sys_ni_syscall // 1275 + data8 sys_ioprio_set + data8 sys_ioprio_get // 1275 data8 sys_set_zone_reclaim data8 sys_ni_syscall data8 sys_ni_syscall diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index b6a63a49a23..191a8def3bd 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table) .long sys_request_key /* 270 */ .long sys_keyctl .long sys_waitid + .long sys_ioprio_set + .long sys_ioprio_get -- cgit v1.2.3 From 313980c92724cf42877a7bdafdef439ee9d68ccb Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 11 Apr 2005 15:38:25 -0700 Subject: [PATCH] USB: omap_udc updates (mostly cleanups) Various USB patches, mostly for portability: - Fifo mode 1 didn't work previously (oopsed), so now it's fixed and (why not) defines even more endpoints for composite devices. - OMAP 1710 doesn't have an internal transceiver. - Small PM update: if the USB link is suspended, don't disconnect on entry to deep sleep. - Be more correct about handling zero length control reads. OMAP seems to mis-handle that protocol peculiarity though; best avoided. - Platform device resources (for UDC and OTG controllers) now use physical addresses, so /proc/iomem is more consistent. - Minor cleanups, notably (by volume) for "sparse" NULL warnings. Signed-off-by: David Brownell --- arch/arm/mach-omap/usb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap/usb.c b/arch/arm/mach-omap/usb.c index 6e805d451d0..7f37857b1a2 100644 --- a/arch/arm/mach-omap/usb.c +++ b/arch/arm/mach-omap/usb.c @@ -288,8 +288,8 @@ static void usb_release(struct device *dev) static struct resource udc_resources[] = { /* order is significant! */ { /* registers */ - .start = IO_ADDRESS(UDC_BASE), - .end = IO_ADDRESS(UDC_BASE + 0xff), + .start = UDC_BASE, + .end = UDC_BASE + 0xff, .flags = IORESOURCE_MEM, }, { /* general IRQ */ .start = IH2_BASE + 20, @@ -355,8 +355,8 @@ static struct platform_device ohci_device = { static struct resource otg_resources[] = { /* order is significant! */ { - .start = IO_ADDRESS(OTG_BASE), - .end = IO_ADDRESS(OTG_BASE + 0xff), + .start = OTG_BASE, + .end = OTG_BASE + 0xff, .flags = IORESOURCE_MEM, }, { .start = IH2_BASE + 8, -- cgit v1.2.3 From 9c4142a133f7efee08238722b157656c3da7ca97 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 27 Jun 2005 14:36:16 -0700 Subject: [PATCH] ppc32: Fix compiling of sandpoint platform Lost a curly brace in translation. Everything is better now. Signed-off-by: Matt McClintock Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/sandpoint.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/ppc/platforms/sandpoint.c b/arch/ppc/platforms/sandpoint.c index 70e58f43f2b..8b149c2fc54 100644 --- a/arch/ppc/platforms/sandpoint.c +++ b/arch/ppc/platforms/sandpoint.c @@ -324,6 +324,7 @@ sandpoint_setup_arch(void) pdata[1].irq = 0; pdata[1].mapbase = 0; } + } printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n"); printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n"); -- cgit v1.2.3 From d4b3a80e399c989028acd5185c792fab82eda035 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Mon, 27 Jun 2005 14:36:30 -0700 Subject: [PATCH] mips: fixed try_to_freeze build error arch/mips/kernel/signal.c: In function 'do_signal': arch/mips/kernel/signal.c:460: error: too many arguments to function 'try_to_freeze' Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 508026ae584..65ee15396ff 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -457,7 +457,7 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs) if (!user_mode(regs)) return 1; - if (try_to_freeze(0)) + if (try_to_freeze()) goto no_signal; if (!oldset) -- cgit v1.2.3 From bb1657468152c5e5232c7bf35cf0e9c41b5d9910 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 27 Jun 2005 13:09:00 -0300 Subject: [PATCH] 8xx: avoid "dcbst" misbehaviour with unpopulated TLB The proposed _tlbie call at update_mmu_cache() is safe because: Addresses for which update_mmu_cache() gets invocated are never inside the static kernel virtual mapping, meaning that there is no risk for the _tlbie() here to be thrashing the pinned entry, as Dan suspected. The intermediate TLB state in which this bug can be triggered is not visible by userspace or any other contexts, except the page fault handling path. So there is no need to worry about userspace dcbxxx users. The other solution to this is to avoid dcbst misbehaviour in the first place, which involves changing in-kernel "dcbst" callers to use 8xx specific SPR's. Summary: On 8xx, cache control instructions (particularly "dcbst" from flush_dcache_icache) fault as write operation if there is an unpopulated TLB entry for the address in question. To workaround that, we invalidate the TLB here, thus avoiding dcbst misbehaviour. Signed-off-by: Marcelo Tosatti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/mm/init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index 334ef4150d9..6164a2b3473 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c @@ -606,9 +606,19 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, struct page *page = pfn_to_page(pfn); if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) { - if (vma->vm_mm == current->active_mm) + if (vma->vm_mm == current->active_mm) { +#ifdef CONFIG_8xx + /* On 8xx, cache control instructions (particularly + * "dcbst" from flush_dcache_icache) fault as write + * operation if there is an unpopulated TLB entry + * for the address in question. To workaround that, + * we invalidate the TLB here, thus avoiding dcbst + * misbehaviour. + */ + _tlbie(address); +#endif __flush_dcache_icache((void *) address); - else + } else flush_dcache_icache_page(page); set_bit(PG_arch_1, &page->flags); } -- cgit v1.2.3 From e4ee69c8c1e7ff9790fbce29c7be50db57323a6f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Jun 2005 14:36:32 -0700 Subject: [PATCH] ppc32: Bump PMU interrupt priority The Power Management Unit on PowerMacs is very sensitive to timeouts during async message exchanges. It uses rather crude protocol based on a shift register with an interrupt and is almost continuously exchanging messages with the host CPU on laptops. This patch adds a routine to the open_pic driver to be able to select a PMU driver so that it bumps it's interrupt priority to above the normal level. This will allow PMU interrupts to occur while another interrupt is pending, and thus reduce the risk of machine beeing abruptly shutdown by the PMU due to a timeout in PMU communication caused by excessive interrupt latency. The problem is very rare, and usually just doesn't happen, but it is still useful to make things even more robust. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/syslib/open_pic.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c index b45d8268bf9..ad39b86ca92 100644 --- a/arch/ppc/syslib/open_pic.c +++ b/arch/ppc/syslib/open_pic.c @@ -370,8 +370,9 @@ void __init openpic_init(int offset) /* Initialize IPI interrupts */ if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb); for (i = 0; i < OPENPIC_NUM_IPI; i++) { - /* Disabled, Priority 10..13 */ - openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+offset); + /* Disabled, increased priorities 10..13 */ + openpic_initipi(i, OPENPIC_PRIORITY_IPI_BASE+i, + OPENPIC_VEC_IPI+i+offset); /* IPIs are per-CPU */ irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU; irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi; @@ -399,8 +400,9 @@ void __init openpic_init(int offset) if (sense & IRQ_SENSE_MASK) irq_desc[i+offset].status = IRQ_LEVEL; - /* Enabled, Priority 8 */ - openpic_initirq(i, 8, i+offset, (sense & IRQ_POLARITY_MASK), + /* Enabled, Default priority */ + openpic_initirq(i, OPENPIC_PRIORITY_DEFAULT, i+offset, + (sense & IRQ_POLARITY_MASK), (sense & IRQ_SENSE_MASK)); /* Processor 0 */ openpic_mapirq(i, CPU_MASK_CPU0, CPU_MASK_NONE); @@ -655,6 +657,18 @@ static void __init openpic_maptimer(u_int timer, cpumask_t cpumask) cpus_addr(phys)[0]); } +/* + * Change the priority of an interrupt + */ +void __init +openpic_set_irq_priority(u_int irq, u_int pri) +{ + check_arg_irq(irq); + openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority, + OPENPIC_PRIORITY_MASK, + pri << OPENPIC_PRIORITY_SHIFT); +} + /* * Initalize the interrupt source which will generate an NMI. * This raises the interrupt's priority from 8 to 9. @@ -665,9 +679,7 @@ void __init openpic_init_nmi_irq(u_int irq) { check_arg_irq(irq); - openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority, - OPENPIC_PRIORITY_MASK, - 9 << OPENPIC_PRIORITY_SHIFT); + openpic_set_irq_priority(irq, OPENPIC_PRIORITY_NMI); } /* -- cgit v1.2.3 From 8c8709334cec803368a432a33e0f2e116d48fe07 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Jun 2005 14:36:34 -0700 Subject: [PATCH] ppc32: Remove CONFIG_PMAC_PBOOK This patch removes CONFIG_PMAC_PBOOK (PowerBook support). This is now split into CONFIG_PMAC_MEDIABAY for the actual hotswap bay that some powerbooks have, CONFIG_PM for power management related code, and just left out of any CONFIG_* option for some generally useful stuff that can be used on non-laptops as well. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/pmac_sleep.S | 4 ++-- arch/ppc/platforms/pmac_time.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ppc/platforms/pmac_sleep.S b/arch/ppc/platforms/pmac_sleep.S index f459ade1bd6..016a7464915 100644 --- a/arch/ppc/platforms/pmac_sleep.S +++ b/arch/ppc/platforms/pmac_sleep.S @@ -46,7 +46,7 @@ .section .text .align 5 -#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC) +#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) /* This gets called by via-pmu.c late during the sleep process. * The PMU was already send the sleep command and will shut us down @@ -382,7 +382,7 @@ turn_on_mmu: isync rfi -#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */ +#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ .section .data .balign L1_CACHE_LINE_SIZE diff --git a/arch/ppc/platforms/pmac_time.c b/arch/ppc/platforms/pmac_time.c index de60ccc7db9..778ce4fec36 100644 --- a/arch/ppc/platforms/pmac_time.c +++ b/arch/ppc/platforms/pmac_time.c @@ -206,7 +206,7 @@ via_calibrate_decr(void) return 1; } -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM /* * Reset the time after a sleep. */ @@ -238,7 +238,7 @@ time_sleep_notify(struct pmu_sleep_notifier *self, int when) static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = { time_sleep_notify, SLEEP_LEVEL_MISC, }; -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* * Query the OF and get the decr frequency. @@ -251,9 +251,9 @@ pmac_calibrate_decr(void) struct device_node *cpu; unsigned int freq, *fp; -#ifdef CONFIG_PMAC_PBOOK +#ifdef CONFIG_PM pmu_register_sleep_notifier(&time_sleep_notifier); -#endif /* CONFIG_PMAC_PBOOK */ +#endif /* CONFIG_PM */ /* We assume MacRISC2 machines have correct device-tree * calibration. That's better since the VIA itself seems -- cgit v1.2.3 From 6ae3db110e62b0846aae1b5c6e661484ee3a5ed1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Jun 2005 14:36:35 -0700 Subject: [PATCH] ppc64: Add missing exports This patch adds a couple of missing symbol exports. flush_dcache_page is used by the AGP driver and rtc_lock by the RTC driver. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/ppc_ksyms.c | 1 + arch/ppc64/kernel/time.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c index b230a63fe4c..705742f4eec 100644 --- a/arch/ppc64/kernel/ppc_ksyms.c +++ b/arch/ppc64/kernel/ppc_ksyms.c @@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(giveup_altivec); #endif EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(flush_dcache_range); #ifdef CONFIG_SMP #ifdef CONFIG_PPC_ISERIES diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 2348a75e050..2a532db9138 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -91,6 +91,7 @@ unsigned long tb_to_xs; unsigned tb_to_us; unsigned long processor_freq; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL_GPL(rtc_lock); unsigned long tb_to_ns_scale; unsigned long tb_to_ns_shift; -- cgit v1.2.3 From ffaa8bd6c904d1ab79b677905067349a5ff51d84 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 27 Jun 2005 14:36:36 -0700 Subject: [PATCH] seccomp: tsc disable I believe at least for seccomp it's worth to turn off the tsc, not just for HT but for the L2 cache too. So it's up to you, either you turn it off completely (which isn't very nice IMHO) or I recommend to apply this below patch. This has been tested successfully on x86-64 against current cogito repository (i686 compiles so I didn't bother testing ;). People selling the cpu through cpushare may appreciate this bit for a peace of mind. There's no way to get any timing info anymore with this applied (gettimeofday is forbidden of course). The seccomp environment is completely deterministic so it can't be allowed to get timing info, it has to be deterministic so in the future I can enable a computing mode that does a parallel computing for each task with server side transparent checkpointing and verification that the output is the same from all the 2/3 seller computers for each task, without the buyer even noticing (for now the verification is left to the buyer client side and there's no checkpointing, since that would require more kernel changes to track the dirty bits but it'll be easy to extend once the basic mode is finished). Eliminating a cold-cache read of the cr4 global variable will save one cacheline during the tlb flush while making the code per-cpu-safe at the same time. Thanks to Mikael Pettersson for noticing the tlb flush wasn't per-cpu-safe. The global tlb flush can run from irq (IPI calling do_flush_tlb_all) but it'll be transparent to the switch_to code since the IPI won't make any change to the cr4 contents from the point of view of the interrupted code and since it's now all per-cpu stuff, it will not race. So no need to disable irqs in switch_to slow path. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 29 +++++++++++++++++++++++++++++ arch/x86_64/kernel/process.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'arch') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5f8cfa6b794..ba243a4cc11 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } +/* + * This function selects if the context switch from prev to next + * has to tweak the TSC disable bit in the cr4. + */ +static inline void disable_tsc(struct task_struct *prev_p, + struct task_struct *next_p) +{ + struct thread_info *prev, *next; + + /* + * gcc should eliminate the ->thread_info dereference if + * has_secure_computing returns 0 at compile time (SECCOMP=n). + */ + prev = prev_p->thread_info; + next = next_p->thread_info; + + if (has_secure_computing(prev) || has_secure_computing(next)) { + /* slow path here */ + if (has_secure_computing(prev) && + !has_secure_computing(next)) { + write_cr4(read_cr4() & ~X86_CR4_TSD); + } else if (!has_secure_computing(prev) && + has_secure_computing(next)) + write_cr4(read_cr4() | X86_CR4_TSD); + } +} + /* * switch_to(x,yn) should switch tasks from x to y. * @@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + disable_tsc(prev_p, next_p); + return prev_p; } diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 1d91271796e..7577f9d7a75 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -481,6 +481,33 @@ out: return err; } +/* + * This function selects if the context switch from prev to next + * has to tweak the TSC disable bit in the cr4. + */ +static inline void disable_tsc(struct task_struct *prev_p, + struct task_struct *next_p) +{ + struct thread_info *prev, *next; + + /* + * gcc should eliminate the ->thread_info dereference if + * has_secure_computing returns 0 at compile time (SECCOMP=n). + */ + prev = prev_p->thread_info; + next = next_p->thread_info; + + if (has_secure_computing(prev) || has_secure_computing(next)) { + /* slow path here */ + if (has_secure_computing(prev) && + !has_secure_computing(next)) { + write_cr4(read_cr4() & ~X86_CR4_TSD); + } else if (!has_secure_computing(prev) && + has_secure_computing(next)) + write_cr4(read_cr4() | X86_CR4_TSD); + } +} + /* * This special macro can be used to load a debugging register */ @@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * } } + disable_tsc(prev_p, next_p); + return prev_p; } -- cgit v1.2.3 From 9ec4b1f356b3bad928ae8e2aa9caebfa737d52df Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 27 Jun 2005 15:17:01 -0700 Subject: [PATCH] kprobes: fix single-step out of line - take2 Now that PPC64 has no-execute support, here is a second try to fix the single step out of line during kprobe execution. Kprobes on x86_64 already solved this problem by allocating an executable page and using it as the scratch area for stepping out of line. Reuse that. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/kprobes.c | 26 ++++++++-- arch/x86_64/kernel/kprobes.c | 113 +------------------------------------------ 2 files changed, 23 insertions(+), 116 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 782ce3efa2c..86cc5496db9 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -36,6 +36,8 @@ #include #include +static DECLARE_MUTEX(kprobe_mutex); + static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; static struct kprobe *kprobe_prev; @@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on rfid or mtmsrd\n"); ret = -EINVAL; } + + /* insn must be on a special executable page on ppc64 */ + if (!ret) { + up(&kprobe_mutex); + p->ainsn.insn = get_insn_slot(); + down(&kprobe_mutex); + if (!p->ainsn.insn) + ret = -ENOMEM; + } return ret; } @@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p) void arch_remove_kprobe(struct kprobe *p) { + up(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + down(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { + kprobe_opcode_t insn = *p->ainsn.insn; + regs->msr |= MSR_SE; - /*single step inline if it a breakpoint instruction*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) + + /* single step inline if it is a trap variant */ + if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn)) regs->nip = (unsigned long)p->addr; else - regs->nip = (unsigned long)&p->ainsn.insn; + regs->nip = (unsigned long)p->ainsn.insn; } static inline void save_previous_kprobe(void) @@ -205,9 +222,10 @@ no_kprobe: static void resume_execution(struct kprobe *p, struct pt_regs *regs) { int ret; + unsigned int insn = *p->ainsn.insn; regs->nip = (unsigned long)p->addr; - ret = emulate_step(regs, p->ainsn.insn[0]); + ret = emulate_step(regs, insn); if (ret == 0) regs->nip = (unsigned long)p->addr + 4; } diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 4e680f87a75..6a1c88376be 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -38,7 +38,7 @@ #include #include #include -#include + #include #include #include @@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev; static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; -static kprobe_opcode_t *get_insn_slot(void); -static void free_insn_slot(kprobe_opcode_t *slot); void jprobe_return_end(void); /* copy of the kernel stack at the probe fire time */ @@ -681,112 +679,3 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } - -/* - * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped. - * By default on x86_64, pages we get from kmalloc or vmalloc are not - * executable. Single-stepping an instruction on such a page yields an - * oops. So instead of storing the instruction copies in their respective - * kprobe objects, we allocate a page, map it executable, and store all the - * instruction copies there. (We can allocate additional pages if somebody - * inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE - * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t) - * bytes. - */ -#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t))) -struct kprobe_insn_page { - struct hlist_node hlist; - kprobe_opcode_t *insns; /* page of instruction slots */ - char slot_used[INSNS_PER_PAGE]; - int nused; -}; - -static struct hlist_head kprobe_insn_pages; - -/** - * get_insn_slot() - Find a slot on an executable page for an instruction. - * We allocate an executable page if there's no room on existing ones. - */ -static kprobe_opcode_t *get_insn_slot(void) -{ - struct kprobe_insn_page *kip; - struct hlist_node *pos; - - hlist_for_each(pos, &kprobe_insn_pages) { - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); - if (kip->nused < INSNS_PER_PAGE) { - int i; - for (i = 0; i < INSNS_PER_PAGE; i++) { - if (!kip->slot_used[i]) { - kip->slot_used[i] = 1; - kip->nused++; - return kip->insns + (i*MAX_INSN_SIZE); - } - } - /* Surprise! No unused slots. Fix kip->nused. */ - kip->nused = INSNS_PER_PAGE; - } - } - - /* All out of space. Need to allocate a new page. Use slot 0.*/ - kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); - if (!kip) { - return NULL; - } - - /* - * For the %rip-relative displacement fixups to be doable, we - * need our instruction copy to be within +/- 2GB of any data it - * might access via %rip. That is, within 2GB of where the - * kernel image and loaded module images reside. So we allocate - * a page in the module loading area. - */ - kip->insns = module_alloc(PAGE_SIZE); - if (!kip->insns) { - kfree(kip); - return NULL; - } - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, &kprobe_insn_pages); - memset(kip->slot_used, 0, INSNS_PER_PAGE); - kip->slot_used[0] = 1; - kip->nused = 1; - return kip->insns; -} - -/** - * free_insn_slot() - Free instruction slot obtained from get_insn_slot(). - */ -static void free_insn_slot(kprobe_opcode_t *slot) -{ - struct kprobe_insn_page *kip; - struct hlist_node *pos; - - hlist_for_each(pos, &kprobe_insn_pages) { - kip = hlist_entry(pos, struct kprobe_insn_page, hlist); - if (kip->insns <= slot - && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) { - int i = (slot - kip->insns) / MAX_INSN_SIZE; - kip->slot_used[i] = 0; - kip->nused--; - if (kip->nused == 0) { - /* - * Page is no longer in use. Free it unless - * it's the last one. We keep the last one - * so as not to have to set it up again the - * next time somebody inserts a probe. - */ - hlist_del(&kip->hlist); - if (hlist_empty(&kprobe_insn_pages)) { - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, - &kprobe_insn_pages); - } else { - module_free(NULL, kip->insns); - kfree(kip); - } - } - return; - } - } -} -- cgit v1.2.3 From 4bdbd37f6d01abc4c002bb8de90ea2c3bc7abe7e Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:09 -0700 Subject: [PATCH] Return probe redesign: i386 specific changes The following patch contains the i386 specific changes for the new return probe design. Changes include: * Removing the architecture specific functions for querying a return probe instance off a stack address * Complete rework onf arch_prepare_kretprobe() and trampoline_probe_handler() * Removing trampoline_post_handler() * Adding arch_init() so that now we handle registering the return probe trampoline instead of kernel/kprobes.c doing it Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 133 ++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 63 deletions(-) (limited to 'arch') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 3762f6b35ab..fc8b1752176 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } -struct task_struct *arch_get_kprobe_task(void *ptr) -{ - return ((struct thread_info *) (((unsigned long) ptr) & - (~(THREAD_SIZE -1))))->task; -} - void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; - struct kretprobe_instance *ri; - static void *orig_ret_addr; + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *) *sara; - /* - * Save the return address when the return probe hits - * the first time, and use it to populate the (krprobe - * instance)->ret_addr for subsequent return probes at - * the same addrress since stack address would have - * the kretprobe_trampoline by then. - */ - if (((void*) *sara) != kretprobe_trampoline) - orig_ret_addr = (void*) *sara; - - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->stack_addr = sara; - ri->ret_addr = orig_ret_addr; - add_rp_inst(ri); /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; - } else { - rp->nmissed++; - } -} -void arch_kprobe_flush_task(struct task_struct *tk) -{ - struct kretprobe_instance *ri; - while ((ri = get_rp_inst_tsk(tk)) != NULL) { - *((unsigned long *)(ri->stack_addr)) = - (unsigned long) ri->ret_addr; - recycle_rp_inst(ri); - } + add_rp_inst(ri); + } else { + rp->nmissed++; + } } /* @@ -286,36 +261,59 @@ no_kprobe: */ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct task_struct *tsk; - struct kretprobe_instance *ri; - struct hlist_head *head; - struct hlist_node *node; - unsigned long *sara = ((unsigned long *) ®s->esp) - 1; - - tsk = arch_get_kprobe_task(sara); - head = kretprobe_inst_table_head(tsk); - - hlist_for_each_entry(ri, node, head, hlist) { - if (ri->stack_addr == sara && ri->rp) { - if (ri->rp->handler) - ri->rp->handler(ri, regs); - } - } - return 0; -} + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; -void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) -{ - struct kretprobe_instance *ri; - /* RA already popped */ - unsigned long *sara = ((unsigned long *)®s->esp) - 1; + head = kretprobe_inst_table_head(current); - while ((ri = get_rp_inst(sara))) { - regs->eip = (unsigned long)ri->ret_addr; + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; } - regs->eflags &= ~TF_MASK; + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->eip = orig_ret_address; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; } /* @@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs) current_kprobe->post_handler(current_kprobe, regs, 0); } - if (current_kprobe->post_handler != trampoline_post_handler) - resume_execution(current_kprobe, regs); + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; /*Restore back the original saved kprobes variables and continue. */ @@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init(void) +{ + return register_kprobe(&trampoline_p); +} -- cgit v1.2.3 From ba8af12f432c4f00ddb0bc1068b57b20aac93ecf Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:10 -0700 Subject: [PATCH] Return probe redesign: x86_64 specific changes The following patch contains the x86_64 specific changes for the new return probe design. Changes include: * Removing the architecture specific functions for querying a return probe instance off a stack address * Complete rework onf arch_prepare_kretprobe() and trampoline_probe_handler() * Removing trampoline_post_handler() * Adding arch_init() so that now we handle registering the return probe trampoline instead of kernel/kprobes.c doing it NOTE: Note that with this new design, the dependency on calculating a pointer to the task off the stack pointer no longer exist (resolving the problem of interruption stacks as pointed out in the original feedback to this port.) Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 133 +++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 63 deletions(-) (limited to 'arch') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 6a1c88376be..acd2a778ebe 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -272,48 +272,23 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } -struct task_struct *arch_get_kprobe_task(void *ptr) -{ - return ((struct thread_info *) (((unsigned long) ptr) & - (~(THREAD_SIZE -1))))->task; -} - void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { unsigned long *sara = (unsigned long *)regs->rsp; - struct kretprobe_instance *ri; - static void *orig_ret_addr; + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *) *sara; - /* - * Save the return address when the return probe hits - * the first time, and use it to populate the (krprobe - * instance)->ret_addr for subsequent return probes at - * the same addrress since stack address would have - * the kretprobe_trampoline by then. - */ - if (((void*) *sara) != kretprobe_trampoline) - orig_ret_addr = (void*) *sara; - - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->stack_addr = sara; - ri->ret_addr = orig_ret_addr; - add_rp_inst(ri); /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; - } else { - rp->nmissed++; - } -} -void arch_kprobe_flush_task(struct task_struct *tk) -{ - struct kretprobe_instance *ri; - while ((ri = get_rp_inst_tsk(tk)) != NULL) { - *((unsigned long *)(ri->stack_addr)) = - (unsigned long) ri->ret_addr; - recycle_rp_inst(ri); - } + add_rp_inst(ri); + } else { + rp->nmissed++; + } } /* @@ -426,36 +401,59 @@ no_kprobe: */ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct task_struct *tsk; - struct kretprobe_instance *ri; - struct hlist_head *head; - struct hlist_node *node; - unsigned long *sara = (unsigned long *)regs->rsp - 1; - - tsk = arch_get_kprobe_task(sara); - head = kretprobe_inst_table_head(tsk); - - hlist_for_each_entry(ri, node, head, hlist) { - if (ri->stack_addr == sara && ri->rp) { - if (ri->rp->handler) - ri->rp->handler(ri, regs); - } - } - return 0; -} + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; -void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) -{ - struct kretprobe_instance *ri; - /* RA already popped */ - unsigned long *sara = ((unsigned long *)regs->rsp) - 1; + head = kretprobe_inst_table_head(current); - while ((ri = get_rp_inst(sara))) { - regs->rip = (unsigned long)ri->ret_addr; + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; } - regs->eflags &= ~TF_MASK; + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->rip = orig_ret_address; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; } /* @@ -548,8 +546,7 @@ int post_kprobe_handler(struct pt_regs *regs) current_kprobe->post_handler(current_kprobe, regs, 0); } - if (current_kprobe->post_handler != trampoline_post_handler) - resume_execution(current_kprobe, regs); + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_rflags; /* Restore the original saved kprobes variables and continue. */ @@ -679,3 +676,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init(void) +{ + return register_kprobe(&trampoline_p); +} -- cgit v1.2.3 From 9508dbfe39112813612085c00d55bacd398eddc6 Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:12 -0700 Subject: [PATCH] Return probe redesign: ia64 specific implementation The following patch implements function return probes for ia64 using the revised design. With this new design we no longer need to do some of the odd hacks previous required on the last ia64 return probe port that I sent out for comments. Note that this new implementation still does not resolve the problem noted by Keith Owens where backtrace data is lost after a return probe is hit. Changes include: * Addition of kretprobe_trampoline to act as a dummy function for instrumented functions to return to, and for the return probe infrastructure to place a kprobe on on, gaining control so that the return probe handler can be called, and so that the instruction pointer can be moved back to the original return address. * Addition of arch_init(), allowing a kprobe to be registered on kretprobe_trampoline * Addition of trampoline_probe_handler() which is used as the pre_handler for the kprobe inserted on kretprobe_implementation. This is the function that handles the details for calling the return probe handler function and returning control back at the original return address * Addition of arch_prepare_kretprobe() which is setup as the pre_handler for a kprobe registered at the beginning of the target function by kernel/kprobes.c so that a return probe instance can be setup when a caller enters the target function. (A return probe instance contains all the needed information for trampoline_probe_handler to do it's job.) * Hooks added to the exit path of a task so that we can cleanup any left-over return probe instances (i.e. if a task dies while inside a targeted function then the return probe instance was reserved at the beginning of the function but the function never returns so we need to mark the instance as unused.) Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/kprobes.c | 103 ++++++++++++++++++++++++++++++++++++++++++++- arch/ia64/kernel/process.c | 16 +++++++ 2 files changed, 117 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 5978823d5c6..c97e18e634c 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -290,6 +290,94 @@ static inline void set_current_kprobe(struct kprobe *p) current_kprobe = p; } +static void kretprobe_trampoline(void) +{ +} + +/* + * At this point the target function has been tricked into + * returning into our trampoline. Lookup the associated instance + * and then: + * - call the handler function + * - cleanup by marking the instance as unused + * - long jump back to the original return address + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address = + ((struct fnptr *)kretprobe_trampoline)->ip; + + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->cr_iip = orig_ret_address; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *)regs->b0; + + /* Replace the return addr with trampoline addr */ + regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + + add_rp_inst(ri); + } else { + rp->nmissed++; + } +} + int arch_prepare_kprobe(struct kprobe *p) { unsigned long addr = (unsigned long) p->addr; @@ -492,8 +580,8 @@ static int pre_kprobes_handler(struct die_args *args) if (p->pre_handler && p->pre_handler(p, regs)) /* * Our pre-handler is specifically requesting that we just - * do a return. This is handling the case where the - * pre-handler is really our special jprobe pre-handler. + * do a return. This is used for both the jprobe pre-handler + * and the kretprobe trampoline */ return 1; @@ -599,3 +687,14 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) *regs = jprobe_saved_regs; return 1; } + +static struct kprobe trampoline_p = { + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init(void) +{ + trampoline_p.addr = + (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + return register_kprobe(&trampoline_p); +} diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index ebb71f3d6d1..6e35bff05d5 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -707,6 +708,13 @@ kernel_thread_helper (int (*fn)(void *), void *arg) void flush_thread (void) { + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(current); + /* drop floating-point and debug-register state if it exists: */ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); ia64_drop_fpu(current); @@ -721,6 +729,14 @@ flush_thread (void) void exit_thread (void) { + + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(current); + ia64_drop_fpu(current); #ifdef CONFIG_PERFMON /* if needed, stop monitoring and flush state to perfmon context */ -- cgit v1.2.3 From 97f7943d70ff0e1e92ea627c44cfacfdae65dbc4 Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:15 -0700 Subject: [PATCH] Return probe redesign: ppc64 specific implementation The following is a patch provided by Ananth Mavinakayanahalli that implements the new PPC64 specific parts of the new function return probe design. NOTE: Since getting Ananth's patch, I changed trampoline_probe_handler() to consume each of the outstanding return probem instances (feedback on my original RFC after Ananth cut a patch), and also added the arch_init() function (adding arch specific initialization.) I have cross compiled but have not testing this on a PPC64 machine. Changes include: * Addition of kretprobe_trampoline to act as a dummy function for instrumented functions to return to, and for the return probe infrastructure to place a kprobe on on, gaining control so that the return probe handler can be called, and so that the instruction pointer can be moved back to the original return address. * Addition of arch_init(), allowing a kprobe to be registered on kretprobe_trampoline * Addition of trampoline_probe_handler() which is used as the pre_handler for the kprobe inserted on kretprobe_implementation. This is the function that handles the details for calling the return probe handler function and returning control back at the original return address * Addition of arch_prepare_kretprobe() which is setup as the pre_handler for a kprobe registered at the beginning of the target function by kernel/kprobes.c so that a return probe instance can be setup when a caller enters the target function. (A return probe instance contains all the needed information for trampoline_probe_handler to do it's job.) * Hooks added to the exit path of a task so that we can cleanup any left-over return probe instances (i.e. if a task dies while inside a targeted function then the return probe instance was reserved at the beginning of the function but the function never returns so we need to mark the instance as unused.) Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/kprobes.c | 99 +++++++++++++++++++++++++++++++++++++++++++++ arch/ppc64/kernel/process.c | 4 ++ 2 files changed, 103 insertions(+) (limited to 'arch') diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index 86cc5496db9..1d2ff6d6b0b 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -122,6 +122,23 @@ static inline void restore_previous_kprobe(void) kprobe_saved_msr = kprobe_saved_msr_prev; } +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; + ri->ret_addr = (kprobe_opcode_t *)regs->link; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)kretprobe_trampoline; + add_rp_inst(ri); + } else { + rp->nmissed++; + } +} + static inline int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -211,6 +228,78 @@ no_kprobe: return ret; } +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +void kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n" + "nop\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head; + struct hlist_node *node, *tmp; + unsigned long orig_ret_address = 0; + unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + + head = kretprobe_inst_table_head(current); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more then one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + regs->nip = orig_ret_address; + + unlock_kprobes(); + + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption. + */ + return 1; +} + /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" @@ -349,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); return 1; } + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index aba89554d89..f7cae05e40f 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { + kprobe_flush_task(current); + #ifndef CONFIG_SMP if (last_task_used_math == current) last_task_used_math = NULL; @@ -321,6 +324,7 @@ void flush_thread(void) { struct thread_info *t = current_thread_info(); + kprobe_flush_task(current); if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); -- cgit v1.2.3 From a528e21c235862cc1ae50e7809eb9116dc40ea0c Mon Sep 17 00:00:00 2001 From: Rusty Lynch Date: Mon, 27 Jun 2005 15:17:15 -0700 Subject: [PATCH] kprobes/ia64: refuse inserting kprobe on slot 1 Without the ability to atomically write 16 bytes, we can not update the middle slot of a bundle, slot 1, unless we stop the machine first. This patch will ensure the ability to robustly insert and remove a kprobe by refusing to insert a kprobe on slot 1 until a mechanism is in place to safely handle this case. Signed-off-by: Rusty Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/kprobes.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index c97e18e634c..ec2ceade12b 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -270,6 +270,13 @@ static int valid_kprobe_addr(int template, int slot, unsigned long addr) addr); return -EINVAL; } + + if (slot == 1 && bundle_encoding[template][1] != L) { + printk(KERN_WARNING "Inserting kprobes on slot #1 " + "is not supported\n"); + return -EINVAL; + } + return 0; } -- cgit v1.2.3 From c7b645f934e52a54af58142d91fb51f881f8ce26 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 27 Jun 2005 15:17:16 -0700 Subject: [PATCH] kprobes/ia64: refuse kprobe on ivt code Not safe to insert kprobes on IVT code. This patch checks to see if the address on which Kprobes is being inserted is in ivt code and if it is in ivt code then refuse to register kprobe. Signed-off-by: Anil S Keshavamurthy Acked-by: David Mosberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/kprobes.c | 18 ++++++++++++++++-- arch/ia64/kernel/vmlinux.lds.S | 7 ++++++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index ec2ceade12b..3aa3167edbe 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -34,6 +34,7 @@ #include #include +#include extern void jprobe_inst_return(void); @@ -263,14 +264,27 @@ static inline void get_kprobe_inst(bundle_t *bundle, uint slot, } } +/* Returns non-zero if the addr is in the Interrupt Vector Table */ +static inline int in_ivt_functions(unsigned long addr) +{ + return (addr >= (unsigned long)__start_ivt_text + && addr < (unsigned long)__end_ivt_text); +} + static int valid_kprobe_addr(int template, int slot, unsigned long addr) { if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { - printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n", - addr); + printk(KERN_WARNING "Attempting to insert unaligned kprobe " + "at 0x%lx\n", addr); return -EINVAL; } + if (in_ivt_functions(addr)) { + printk(KERN_WARNING "Kprobes can't be inserted inside " + "IVT functions at 0x%lx\n", addr); + return -EINVAL; + } + if (slot == 1 && bundle_encoding[template][1] != L) { printk(KERN_WARNING "Inserting kprobes on slot #1 " "is not supported\n"); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index b9f0db4c1b0..a676e79e068 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -8,6 +8,11 @@ #define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) #include +#define IVT_TEXT \ + VMLINUX_SYMBOL(__start_ivt_text) = .; \ + *(.text.ivt) \ + VMLINUX_SYMBOL(__end_ivt_text) = .; + OUTPUT_FORMAT("elf64-ia64-little") OUTPUT_ARCH(ia64) ENTRY(phys_start) @@ -39,7 +44,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { - *(.text.ivt) + IVT_TEXT *(.text) SCHED_TEXT LOCK_TEXT -- cgit v1.2.3 From b445e26cbf784cdba10f2b6c3e2cd3ee7bab360a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Jun 2005 15:42:04 -0700 Subject: [SPARC64]: Avoid membar instructions in delay slots. In particular, avoid membar instructions in the delay slot of a jmpl instruction. UltraSPARC-I, II, IIi, and IIe have a bug, documented in the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51 The long and short of it is that if the IMU unit misses on a branch or jmpl, and there is a store buffer synchronizing membar in the delay slot, the chip can stop fetching instructions. If interrupts are enabled or some other trap is enabled, the chip will unwedge itself, but performance will suffer. We already had a workaround for this bug in a few spots, but it's better to have the entire tree sanitized for this rule. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 6 ++- arch/sparc64/kernel/semaphore.c | 12 +++-- arch/sparc64/kernel/trampoline.S | 3 +- arch/sparc64/lib/U1memcpy.S | 103 ++++++++++++++++++++------------------- arch/sparc64/lib/VISsave.S | 15 +++++- arch/sparc64/lib/atomic.S | 42 ++++++++++------ arch/sparc64/lib/bitops.S | 31 +++++++----- arch/sparc64/lib/debuglocks.c | 6 ++- arch/sparc64/lib/dec_and_lock.S | 6 ++- arch/sparc64/lib/rwsem.S | 15 ++++-- arch/sparc64/mm/init.c | 6 ++- arch/sparc64/mm/ultra.S | 3 +- 12 files changed, 150 insertions(+), 98 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a47f2d0b1a2..ffe717ab7f8 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -271,8 +271,9 @@ cplus_fptrap_insn_1: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 + membar #Sync b,pt %xcc, fpdis_exit - membar #Sync + nop 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -301,8 +302,9 @@ cplus_fptrap_insn_2: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 + membar #Sync ba,pt %xcc, fpdis_exit - membar #Sync + nop 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 ldxa [%g3] ASI_DMMU, %g5 diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 63496c43fe1..a809e63f03e 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c @@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr) " add %1, %4, %1\n" " cas [%3], %0, %1\n" " cmp %0, %1\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%icc, 1b\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (&sem->count), "r" (incr), "m" (sem->count) : "cc"); @@ -71,8 +72,9 @@ void up(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " addcc %%g7, 1, %%g0\n" +" membar #StoreLoad | #StoreStore\n" " ble,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %0, %%g1\n" @@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem) " cmp %%g1, %%g7\n" " bne,pn %%icc, 1b\n" " cmp %%g7, 1\n" +" membar #StoreLoad | #StoreStore\n" " bl,pn %%icc, 3f\n" -" membar #StoreLoad | #StoreStore\n" +" nop\n" "2:\n" " .subsection 2\n" "3: mov %2, %%g1\n" diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 2c8f9344b4e..3a145fc39cf 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -98,8 +98,9 @@ startup_continue: sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 + membar #StoreLoad | #StoreStore brnz,pn %g1, 1b - membar #StoreLoad | #StoreStore + nop sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index da9b520c718..bafd2fc07ac 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S @@ -87,14 +87,17 @@ #define LOOP_CHUNK3(src, dest, len, branch_dest) \ MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ EX_ST(STORE_BLK(%fsrc, %dest)); \ - add %dest, 0x40, %dest; + add %dest, 0x40, %dest; \ + DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ EX_ST(STORE_BLK(%fsrc, %dest)); \ add %dest, 0x40, %dest; \ - ba,pt %xcc, target; + ba,pt %xcc, target; \ + nop; #define FINISH_VISCHUNK(dest, f0, f1, left) \ subcc %left, 8, %left;\ @@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - STORE_JUMP(o0, f48, 40f) membar #Sync + STORE_JUMP(o0, f48, 40f) 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - STORE_JUMP(o0, f48, 48f) membar #Sync + STORE_JUMP(o0, f48, 48f) 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - STORE_JUMP(o0, f48, 56f) membar #Sync + STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - STORE_JUMP(o0, f48, 41f) membar #Sync + STORE_JUMP(o0, f48, 41f) 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - STORE_JUMP(o0, f48, 49f) membar #Sync + STORE_JUMP(o0, f48, 49f) 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - STORE_JUMP(o0, f48, 57f) membar #Sync + STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - STORE_JUMP(o0, f48, 42f) membar #Sync + STORE_JUMP(o0, f48, 42f) 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - STORE_JUMP(o0, f48, 50f) membar #Sync + STORE_JUMP(o0, f48, 50f) 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - STORE_JUMP(o0, f48, 58f) membar #Sync + STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - STORE_JUMP(o0, f48, 43f) membar #Sync + STORE_JUMP(o0, f48, 43f) 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - STORE_JUMP(o0, f48, 51f) membar #Sync + STORE_JUMP(o0, f48, 51f) 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - STORE_JUMP(o0, f48, 59f) membar #Sync + STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - STORE_JUMP(o0, f48, 44f) membar #Sync + STORE_JUMP(o0, f48, 44f) 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - STORE_JUMP(o0, f48, 52f) membar #Sync + STORE_JUMP(o0, f48, 52f) 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - STORE_JUMP(o0, f48, 60f) membar #Sync + STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - STORE_JUMP(o0, f48, 45f) membar #Sync + STORE_JUMP(o0, f48, 45f) 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - STORE_JUMP(o0, f48, 53f) membar #Sync + STORE_JUMP(o0, f48, 53f) 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - STORE_JUMP(o0, f48, 61f) membar #Sync + STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - STORE_JUMP(o0, f48, 46f) membar #Sync + STORE_JUMP(o0, f48, 46f) 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - STORE_JUMP(o0, f48, 54f) membar #Sync + STORE_JUMP(o0, f48, 54f) 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - STORE_JUMP(o0, f48, 62f) membar #Sync + STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) @@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - STORE_JUMP(o0, f48, 47f) membar #Sync + STORE_JUMP(o0, f48, 47f) 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - STORE_JUMP(o0, f48, 55f) membar #Sync + STORE_JUMP(o0, f48, 55f) 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - STORE_SYNC(o0, f48) membar #Sync + STORE_SYNC(o0, f48) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - STORE_JUMP(o0, f48, 63f) membar #Sync + STORE_JUMP(o0, f48, 63f) 40: FINISH_VISCHUNK(o0, f0, f2, g3) 41: FINISH_VISCHUNK(o0, f2, f4, g3) diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S index 65e328d600a..4e18989bd60 100644 --- a/arch/sparc64/lib/VISsave.S +++ b/arch/sparc64/lib/VISsave.S @@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stda %f48, [%g3 + %g1] ASI_BLK_P 5: membar #Sync - jmpl %g7 + %g0, %g0 + ba,pt %xcc, 80f + nop + + .align 32 +80: jmpl %g7 + %g0, %g0 nop 6: ldub [%g3 + TI_FPSAVED], %o5 @@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stda %f32, [%g2 + %g1] ASI_BLK_P stda %f48, [%g3 + %g1] ASI_BLK_P membar #Sync - jmpl %g7 + %g0, %g0 + ba,pt %xcc, 80f + nop + .align 32 +80: jmpl %g7 + %g0, %g0 nop .align 32 @@ -126,6 +133,10 @@ VISenterhalf: stda %f0, [%g2 + %g1] ASI_BLK_P stda %f16, [%g3 + %g1] ASI_BLK_P membar #Sync + ba,pt %xcc, 4f + nop + + .align 32 4: and %o5, FPRS_DU, %o5 jmpl %g7 + %g0, %g0 wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index e528b8d1a3e..faf87c31598 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S @@ -7,18 +7,6 @@ #include #include - /* On SMP we need to use memory barriers to ensure - * correct memory operation ordering, nop these out - * for uniprocessor. - */ -#ifdef CONFIG_SMP -#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad -#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore -#else -#define ATOMIC_PRE_BARRIER nop -#define ATOMIC_POST_BARRIER nop -#endif - .text /* Two versions of the atomic routines, one that @@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ nop .size atomic_sub, .-atomic_sub + /* On SMP we need to use memory barriers to ensure + * correct memory operation ordering, nop these out + * for uniprocessor. + */ +#ifdef CONFIG_SMP + +#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; +#define ATOMIC_POST_BARRIER \ + ba,pt %xcc, 80b; \ + membar #StoreLoad | #StoreStore + +80: retl + nop +#else +#define ATOMIC_PRE_BARRIER +#define ATOMIC_POST_BARRIER +#endif + .globl atomic_add_ret .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ @@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %icc, 1b add %g7, %o0, %g7 + sra %g7, 0, %o0 ATOMIC_POST_BARRIER retl - sra %g7, 0, %o0 + nop .size atomic_add_ret, .-atomic_add_ret .globl atomic_sub_ret @@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %icc, 1b sub %g7, %o0, %g7 + sra %g7, 0, %o0 ATOMIC_POST_BARRIER retl - sra %g7, 0, %o0 + nop .size atomic_sub_ret, .-atomic_sub_ret .globl atomic64_add @@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %xcc, 1b add %g7, %o0, %g7 + mov %g7, %o0 ATOMIC_POST_BARRIER retl - mov %g7, %o0 + nop .size atomic64_add_ret, .-atomic64_add_ret .globl atomic64_sub_ret @@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ cmp %g1, %g7 bne,pn %xcc, 1b sub %g7, %o0, %g7 + mov %g7, %o0 ATOMIC_POST_BARRIER retl - mov %g7, %o0 + nop .size atomic64_sub_ret, .-atomic64_sub_ret diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index 886dcd2b376..31afbfe6c1e 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S @@ -7,20 +7,26 @@ #include #include + .text + /* On SMP we need to use memory barriers to ensure * correct memory operation ordering, nop these out * for uniprocessor. */ + #ifdef CONFIG_SMP #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad -#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore +#define BITOP_POST_BARRIER \ + ba,pt %xcc, 80b; \ + membar #StoreLoad | #StoreStore + +80: retl + nop #else -#define BITOP_PRE_BARRIER nop -#define BITOP_POST_BARRIER nop +#define BITOP_PRE_BARRIER +#define BITOP_POST_BARRIER #endif - .text - .globl test_and_set_bit .type test_and_set_bit,#function test_and_set_bit: /* %o0=nr, %o1=addr */ @@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_set_bit, .-test_and_set_bit .globl test_and_clear_bit @@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_clear_bit, .-test_and_clear_bit .globl test_and_change_bit @@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ cmp %g7, %g1 bne,pn %xcc, 1b and %g7, %o2, %g2 - BITOP_POST_BARRIER clr %o0 + movrne %g2, 1, %o0 + BITOP_POST_BARRIER retl - movrne %g2, 1, %o0 + nop .size test_and_change_bit, .-test_and_change_bit .globl set_bit diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index c421e0c6532..f03344cf784 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c @@ -252,8 +252,9 @@ wlock_again: " andn %%g1, %%g3, %%g7\n" " casx [%0], %%g1, %%g7\n" " cmp %%g1, %%g7\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%xcc, 1b\n" -" membar #StoreLoad | #StoreStore" +" nop" : /* no outputs */ : "r" (&(rw->lock)) : "g3", "g1", "g7", "cc", "memory"); @@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str) " andn %%g1, %%g3, %%g7\n" " casx [%0], %%g1, %%g7\n" " cmp %%g1, %%g7\n" +" membar #StoreLoad | #StoreStore\n" " bne,pn %%xcc, 1b\n" -" membar #StoreLoad | #StoreStore" +" nop" : /* no outputs */ : "r" (&(rw->lock)) : "g3", "g1", "g7", "cc", "memory"); diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index 7e6fdaebedb..8ee288dd0af 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S @@ -48,8 +48,9 @@ start_to_zero: #endif to_zero: ldstub [%o1], %g3 + membar #StoreLoad | #StoreStore brnz,pn %g3, spin_on_lock - membar #StoreLoad | #StoreStore + nop loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ cmp %g2, %g7 @@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ nop spin_on_lock: ldub [%o1], %g3 + membar #LoadLoad brnz,pt %g3, spin_on_lock - membar #LoadLoad + nop ba,pt %xcc, to_zero nop nop diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S index 174ff7b9164..75f0e6b951d 100644 --- a/arch/sparc64/lib/rwsem.S +++ b/arch/sparc64/lib/rwsem.S @@ -17,8 +17,9 @@ __down_read: bne,pn %icc, 1b add %g7, 1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop @@ -57,8 +58,9 @@ __down_write: cmp %g3, %g7 bne,pn %icc, 1b cmp %g7, 0 + membar #StoreLoad | #StoreStore bne,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop 3: @@ -97,8 +99,9 @@ __up_read: cmp %g1, %g7 bne,pn %icc, 1b cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 @@ -126,8 +129,9 @@ __up_write: bne,pn %icc, 1b sub %g7, %g1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop @@ -151,8 +155,9 @@ __downgrade_write: bne,pn %icc, 1b sub %g7, %g1, %g7 cmp %g7, 0 + membar #StoreLoad | #StoreStore bl,pn %icc, 3f - membar #StoreLoad | #StoreStore + nop 2: retl nop diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9c5222075da..8fc413cb6ac 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) "or %%g1, %0, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t" + "membar #StoreLoad | #StoreStore\n\t" "bne,pn %%xcc, 1b\n\t" - " membar #StoreLoad | #StoreStore" + " nop" : /* no outputs */ : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) : "g1", "g7"); @@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c " andn %%g7, %1, %%g1\n\t" "casx [%2], %%g7, %%g1\n\t" "cmp %%g7, %%g1\n\t" + "membar #StoreLoad | #StoreStore\n\t" "bne,pn %%xcc, 1b\n\t" - " membar #StoreLoad | #StoreStore\n" + " nop\n" "2:" : /* no outputs */ : "r" (cpu), "r" (mask), "r" (&page->flags), diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 7a093432101..7a2431d3abc 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */ andn %o3, 1, %o3 stxa %g0, [%o3] ASI_IMMU_DEMAP 2: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync brnz,pt %o1, 1b - membar #Sync + nop stxa %g2, [%o4] ASI_DMMU flush %g6 wrpr %g0, 0, %tl -- cgit v1.2.3 From 63b614522cba5a015923c0e8f284be6e01c13f1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 27 Jun 2005 17:04:45 -0700 Subject: [SPARC64]: Get rid of fast IRQ feature. The only real user was the assembler floppy interrupt handler, which does not need to be in assembly. This makes it so that there are less pieces of code which know about the internal layout of ivector_table[] and friends. Signed-off-by: David S. Miller --- arch/sparc64/kernel/auxio.c | 2 +- arch/sparc64/kernel/entry.S | 110 ----------------------- arch/sparc64/kernel/irq.c | 171 +++++++++++------------------------- arch/sparc64/kernel/sparc64_ksyms.c | 1 - 4 files changed, 51 insertions(+), 233 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c index a0716ccc2f4..8852c20c8d9 100644 --- a/arch/sparc64/kernel/auxio.c +++ b/arch/sparc64/kernel/auxio.c @@ -16,7 +16,7 @@ #include #include -/* This cannot be static, as it is referenced in entry.S */ +/* This cannot be static, as it is referenced in irq.c */ void __iomem *auxio_register = NULL; enum auxio_type { diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index ffe717ab7f8..eee516a71c1 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -701,116 +701,6 @@ utrap_ill: ba,pt %xcc, rtrap clr %l6 -#ifdef CONFIG_BLK_DEV_FD - .globl floppy_hardint -floppy_hardint: - wr %g0, (1 << 11), %clear_softint - sethi %hi(doing_pdma), %g1 - ld [%g1 + %lo(doing_pdma)], %g2 - brz,pn %g2, floppy_dosoftint - sethi %hi(fdc_status), %g3 - ldx [%g3 + %lo(fdc_status)], %g3 - sethi %hi(pdma_vaddr), %g5 - ldx [%g5 + %lo(pdma_vaddr)], %g4 - sethi %hi(pdma_size), %g5 - ldx [%g5 + %lo(pdma_size)], %g5 - -next_byte: - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - andcc %g7, 0x80, %g0 - be,pn %icc, floppy_fifo_emptied - andcc %g7, 0x20, %g0 - be,pn %icc, floppy_overrun - andcc %g7, 0x40, %g0 - be,pn %icc, floppy_write - sub %g5, 1, %g5 - - inc %g3 - lduba [%g3] ASI_PHYS_BYPASS_EC_E, %g7 - dec %g3 - orcc %g0, %g5, %g0 - stb %g7, [%g4] - bne,pn %xcc, next_byte - add %g4, 1, %g4 - - b,pt %xcc, floppy_tdone - nop - -floppy_write: - ldub [%g4], %g7 - orcc %g0, %g5, %g0 - inc %g3 - stba %g7, [%g3] ASI_PHYS_BYPASS_EC_E - dec %g3 - bne,pn %xcc, next_byte - add %g4, 1, %g4 - -floppy_tdone: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(auxio_register), %g1 - ldx [%g1 + %lo(auxio_register)], %g7 - lduba [%g7] ASI_PHYS_BYPASS_EC_E, %g5 - or %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - andn %g5, AUXIO_AUX1_FTCNT, %g5 -/* andn %g5, AUXIO_AUX1_MASK, %g5 */ - - nop; nop; nop; nop; nop; nop; - nop; nop; nop; nop; nop; nop; - - stba %g5, [%g7] ASI_PHYS_BYPASS_EC_E - sethi %hi(doing_pdma), %g1 - b,pt %xcc, floppy_dosoftint - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_fifo_emptied: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(irq_action), %g1 - or %g1, %lo(irq_action), %g1 - ldx [%g1 + (11 << 3)], %g3 ! irqaction[floppy_irq] - ldx [%g3 + 0x08], %g4 ! action->flags>>48==ino - sethi %hi(ivector_table), %g3 - srlx %g4, 48, %g4 - or %g3, %lo(ivector_table), %g3 - sllx %g4, 5, %g4 - ldx [%g3 + %g4], %g4 ! &ivector_table[ino] - ldx [%g4 + 0x10], %g4 ! bucket->iclr - stwa %g0, [%g4] ASI_PHYS_BYPASS_EC_E ! ICLR_IDLE - membar #Sync ! probably not needed... - retry - -floppy_overrun: - sethi %hi(pdma_vaddr), %g1 - stx %g4, [%g1 + %lo(pdma_vaddr)] - sethi %hi(pdma_size), %g1 - stx %g5, [%g1 + %lo(pdma_size)] - sethi %hi(doing_pdma), %g1 - st %g0, [%g1 + %lo(doing_pdma)] - -floppy_dosoftint: - rdpr %pil, %g2 - wrpr %g0, 15, %pil - sethi %hi(109f), %g7 - b,pt %xcc, etrap_irq -109: or %g7, %lo(109b), %g7 - - mov 11, %o0 - mov 0, %o1 - call sparc_floppy_irq - add %sp, PTREGS_OFF, %o2 - - b,pt %xcc, rtrap_irq - nop - -#endif /* CONFIG_BLK_DEV_FD */ - /* XXX Here is stuff we still need to write... -DaveM XXX */ .globl netbsd_syscall netbsd_syscall: diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 4dcb8af9409..42471257730 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs) } #ifdef CONFIG_BLK_DEV_FD -extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs); +extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);; -void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) -{ - struct irqaction *action = *(irq + irq_action); - struct ino_bucket *bucket; - int cpu = smp_processor_id(); - - irq_enter(); - kstat_this_cpu.irqs[irq]++; - - *(irq_work(cpu, irq)) = 0; - bucket = get_ino_in_irqaction(action) + ivector_table; - - bucket->flags |= IBF_INPROGRESS; - - floppy_interrupt(irq, dev_cookie, regs); - upa_writel(ICLR_IDLE, bucket->iclr); - - bucket->flags &= ~IBF_INPROGRESS; - - irq_exit(); -} -#endif - -/* The following assumes that the branch lies before the place we - * are branching to. This is the case for a trap vector... - * You have been warned. - */ -#define SPARC_BRANCH(dest_addr, inst_addr) \ - (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff)) - -#define SPARC_NOP (0x01000000) +/* XXX No easy way to include asm/floppy.h XXX */ +extern unsigned char *pdma_vaddr; +extern unsigned long pdma_size; +extern volatile int doing_pdma; +extern unsigned long fdc_status; -static void install_fast_irq(unsigned int cpu_irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *)) +irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs) { - extern unsigned long sparc64_ttable_tl0; - unsigned long ttent = (unsigned long) &sparc64_ttable_tl0; - unsigned int *insns; - - ttent += 0x820; - ttent += (cpu_irq - 1) << 5; - insns = (unsigned int *) ttent; - insns[0] = SPARC_BRANCH(((unsigned long) handler), - ((unsigned long)&insns[0])); - insns[1] = SPARC_NOP; - __asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent)); -} - -int request_fast_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char *name, void *dev_id) -{ - struct irqaction *action; - struct ino_bucket *bucket = __bucket(irq); - unsigned long flags; - - /* No pil0 dummy buckets allowed here. */ - if (bucket < &ivector_table[0] || - bucket >= &ivector_table[NUM_IVECS]) { - unsigned int *caller; - - __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); - printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt " - "from %p, irq %08x.\n", caller, irq); - return -EINVAL; - } - - if (!handler) - return -EINVAL; + if (likely(doing_pdma)) { + void __iomem *stat = (void __iomem *) fdc_status; + unsigned char *vaddr = pdma_vaddr; + unsigned long size = pdma_size; + u8 val; + + while (size) { + val = readb(stat); + if (unlikely(!(val & 0x80))) { + pdma_vaddr = vaddr; + pdma_size = size; + return IRQ_HANDLED; + } + if (unlikely(!(val & 0x20))) { + pdma_vaddr = vaddr; + pdma_size = size; + doing_pdma = 0; + goto main_interrupt; + } + if (val & 0x40) { + /* read */ + *vaddr++ = readb(stat + 1); + } else { + unsigned char data = *vaddr++; - if ((bucket->pil == 0) || (bucket->pil == 14)) { - printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n"); - return -EBUSY; - } + /* write */ + writeb(data, stat + 1); + } + size--; + } - spin_lock_irqsave(&irq_action_lock, flags); + pdma_vaddr = vaddr; + pdma_size = size; - action = *(bucket->pil + irq_action); - if (action) { - if (action->flags & SA_SHIRQ) - panic("Trying to register fast irq when already shared.\n"); - if (irqflags & SA_SHIRQ) - panic("Trying to register fast irq as shared.\n"); - printk("request_fast_irq: Trying to register yet already owned.\n"); - spin_unlock_irqrestore(&irq_action_lock, flags); - return -EBUSY; - } + /* Send Terminal Count pulse to floppy controller. */ + val = readb(auxio_register); + val |= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); + val &= AUXIO_AUX1_FTCNT; + writeb(val, auxio_register); - /* - * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we - * support smp intr affinity in this path. - */ - if (irqflags & SA_STATIC_ALLOC) { - if (static_irq_count < MAX_STATIC_ALLOC) - action = &static_irqaction[static_irq_count++]; - else - printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed " - "using kmalloc\n", bucket->pil, name); - } - if (action == NULL) - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), - GFP_ATOMIC); - if (!action) { - spin_unlock_irqrestore(&irq_action_lock, flags); - return -ENOMEM; + doing_pdma = 0; } - install_fast_irq(bucket->pil, handler); - bucket->irq_info = action; - bucket->flags |= IBF_ACTIVE; - - action->handler = handler; - action->flags = irqflags; - action->dev_id = NULL; - action->name = name; - action->next = NULL; - put_ino_in_irqaction(action, irq); - put_smpaff_in_irqaction(action, CPU_MASK_NONE); - - *(bucket->pil + irq_action) = action; - enable_irq(irq); - - spin_unlock_irqrestore(&irq_action_lock, flags); - -#ifdef CONFIG_SMP - distribute_irqs(); -#endif - return 0; +main_interrupt: + return floppy_interrupt(irq, dev_cookie, regs); } +EXPORT_SYMBOL(sparc_floppy_irq); +#endif /* We really don't need these at all on the Sparc. We only have * stubs here because they are exported to modules. diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index e78cc53594f..56cd96f4a5c 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range); EXPORT_SYMBOL(mostek_lock); EXPORT_SYMBOL(mstk48t02_regs); -EXPORT_SYMBOL(request_fast_irq); #ifdef CONFIG_SUN_AUXIO EXPORT_SYMBOL(auxio_set_led); EXPORT_SYMBOL(auxio_set_lte); -- cgit v1.2.3