From 9959c888a38b0f25b0e81a480f537d6489348442 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Dec 2009 21:08:29 -0800 Subject: x86: Increase NR_IRQS and nr_irqs I have a system with lots of igb and ixgbe, when iov/vf are enabled for them, we hit the limit of 3064. when system has 20 pcie installed, and one card has 2 functions, and one function needs 64 msi-x, may need 20 * 2 * 64 = 2560 for msi-x but if iov and vf are enabled may need 20 * 2 * 64 * 3 = 7680 for msi-x assume system with 5 ioapic, nr_irqs_gsi will be 120. NR_CPUS = 512, and nr_cpu_ids = 128 will have NR_IRQS = 256 + 512 * 64 = 33024 will have nr_irqs = 120 + 8 * 128 + 120 * 64 = 8824 When SPARSE_IRQ is not set, there is no increase with kernel data size. when NR_CPUS=128, and SPARSE_IRQ is set: text data bss dec hex filename 21837444 4216564 12480736 38534744 24bfe58 vmlinux.before 21837442 4216580 12480736 38534758 24bfe66 vmlinux.after when NR_CPUS=4096, and SPARSE_IRQ is set text data bss dec hex filename 21878619 5610244 13415392 40904255 270263f vmlinux.before 21878617 5610244 13415392 40904253 270263d vmlinux.after Signed-off-by: Yinghai Lu Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B398ECD.1080506@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 12 ++++++------ arch/x86/kernel/apic/io_apic.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4611f085cd4..3ab43df089c 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -154,21 +154,21 @@ static inline int invalid_vm86_irq(int irq) #define NR_IRQS_LEGACY 16 -#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) #ifdef CONFIG_X86_IO_APIC # ifdef CONFIG_SPARSE_IRQ +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) # define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # else -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) -# else -# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) -# endif +# define CPU_VECTOR_LIMIT (32 * NR_CPUS) +# define NR_IRQS \ + (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS NR_IRQS_LEGACY diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c4619a5..d9cd1f1b9c0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3840,7 +3840,7 @@ int __init arch_probe_nr_irqs(void) /* * for MSI and HT dyn irq */ - nr += nr_irqs_gsi * 16; + nr += nr_irqs_gsi * 64; #endif if (nr < nr_irqs) nr_irqs = nr; -- cgit v1.2.3 From 99d113b17e8ca5a8b68a9d3f7691e2f552dd6a06 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Jan 2010 16:16:06 -0800 Subject: x86, apic: Reclaim IDT vectors 0x20-0x2f Reclaim 16 IDT vectors and make them available for general allocation. Reclaim vectors 0x20-0x2f by reallocating the IRQ_MOVE_CLEANUP_VECTOR to vector 0x1f. This is in the range of vector numbers that is officially reserved for the CPU (for exceptions), however, the use of the APIC to generate any vector 0x10 or above is documented, and the CPU internally can receive any vector number (the legacy BIOS uses INT 0x08-0x0f for interrupts, as messed up as that is.) Since IRQ_MOVE_CLEANUP_VECTOR has to be alone in the lowest-numbered priority level (block of 16), this effectively enables us to reclaim an otherwise-unusable APIC priority level and put it to use. Since this is a transient kernel-only allocation we can change it at any time, and if/when there is an exception at vector 0x1f this assignment needs to be changed as part of OS enabling that new feature. Signed-off-by: Yinghai Lu LKML-Reference: <4B4284C6.9030107@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3ab43df089c..dbc81acb7e9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -30,26 +30,38 @@ /* * IDT vectors usable for external interrupt sources start * at 0x20: + * hpa said we can start from 0x1f. + * 0x1f is documented as reserved. However, the ability for the APIC + * to generate vectors starting at 0x10 is documented, as is the + * ability for the CPU to receive any vector number as an interrupt. + * 0x1f is used for IRQ_MOVE_CLEANUP_VECTOR since that vector needs + * an entire privilege level (16 vectors) all by itself at a higher + * priority than any actual device vector. Thus, by placing it in the + * otherwise-unusable 0x10 privilege level, we avoid wasting a full + * 16-vector block. */ -#define FIRST_EXTERNAL_VECTOR 0x20 +#define FIRST_EXTERNAL_VECTOR 0x1f +#define IA32_SYSCALL_VECTOR 0x80 #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 -# define IA32_SYSCALL_VECTOR 0x80 -#else -# define IA32_SYSCALL_VECTOR 0x80 #endif /* - * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * Reserve the lowest usable priority level 0x10 - 0x1f for triggering * cleanup after irq migration. + * this overlaps with the reserved range for cpu exceptions so this + * will need to be changed to 0x20 - 0x2f if the last cpu exception is + * ever allocated. */ + #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* - * Vectors 0x30-0x3f are used for ISA interrupts. + * Vectors 0x20-0x2f are used for ISA interrupts. + * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) #define IRQ1_VECTOR (IRQ0_VECTOR + 1) #define IRQ2_VECTOR (IRQ0_VECTOR + 2) @@ -122,7 +134,7 @@ /* * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31(0x41) to spread out vectors evenly between priority + * start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -- cgit v1.2.3 From ea94396629a3e0cb9a3a9c75335b1de255b30426 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Jan 2010 21:14:41 -0800 Subject: x86, apic: Don't waste a vector to improve vector spread We want to use a vector-assignment sequence that avoids stumbling onto 0x80 earlier in the sequence, in order to improve the spread of vectors across priority levels on machines with a small number of interrupt sources. Right now, this is done by simply making the first vector (0x31 or 0x41) completely unusable. This is unnecessary; all we need is to start assignment at a +1 offset, we don't actually need to prohibit the usage of this vector once we have wrapped around. Signed-off-by: H. Peter Anvin LKML-Reference: <4B426550.6000209@kernel.org> --- arch/x86/include/asm/irq_vectors.h | 9 +++++---- arch/x86/kernel/apic/io_apic.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index dbc81acb7e9..585a42810cf 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -133,11 +133,12 @@ #define MCE_SELF_VECTOR 0xeb /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) + * First APIC vector available to drivers: (vectors 0x30-0xee). We + * start allocating at 0x31 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 1) +#define VECTOR_OFFSET_START 1 #define NR_VECTORS 256 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d9cd1f1b9c0..e9ba0903e9d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1162,7 +1162,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_DEVICE_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; -- cgit v1.2.3 From 722b3654852e48b93367a63f8ada9ee1cd43f2d3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Jan 2010 16:19:10 -0800 Subject: x86, vmi: Fix vmi_get_timer_vector() to use IRQ0_VECTOR FIRST_DEVICE_VECTOR is going away and it looks like a bad hack to steal FIRST_DEVICE_VECTOR / FIRST_EXTERNAL_VECTOR, when it looks like it needs IRQ0_VECTOR. Fix vmi_get_timer_vector() to use IRQ0_VECTOR. Signed-off-by: Suresh Siddha LKML-Reference: <20100114002118.436172066@sbs-t61.sc.intel.com> Cc: Alok N Kataria Cc: Zach Amsden Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmiclock_32.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 74c92bb194d..1268d993e9c 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) static inline unsigned int vmi_get_timer_vector(void) { -#ifdef CONFIG_X86_IO_APIC - return FIRST_DEVICE_VECTOR; -#else - return FIRST_EXTERNAL_VECTOR; -#endif + return IRQ0_VECTOR; } /** vmi clockchip */ @@ -239,8 +235,6 @@ void __init vmi_time_init(void) vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3 From 6579b474572fd54c583ac074e8e7aaae926c62ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Jan 2010 16:19:11 -0800 Subject: x86, irq: Use 0x20 for the IRQ_MOVE_CLEANUP_VECTOR instead of 0x1f After talking to some more folks inside intel (Peter Anvin, Asit Mallick), the safest option (for future compatibility etc) seen was to use vector 0x20 for IRQ_MOVE_CLEANUP_VECTOR instead of using vector 0x1f (which is documented as reserved vector in the Intel IA32 manuals). Also we don't need to reserve the entire privilege level (all 16 vectors in the priority bucket that IRQ_MOVE_CLEANUP_VECTOR falls into), as the x86 architecture (section 10.9.3 in SDM Vol3a) specifies that with in the priority level, the higher the vector number the higher the priority. And hence we don't need to reserve the complete priority level 0x20-0x2f for the IRQ migration cleanup logic. So change the IRQ_MOVE_CLEANUP_VECTOR to 0x20 and allow 0x21-0x2f to be used for device interrupts. 0x30-0x3f will be used for ISA interrupts (these also can be migrated in the context of IOAPIC and hence need to be at a higher priority level than IRQ_MOVE_CLEANUP_VECTOR). Signed-off-by: Suresh Siddha LKML-Reference: <20100114002118.521826763@sbs-t61.sc.intel.com> Cc: Yinghai Lu Cc: Eric W. Biederman Cc: Maciej W. Rozycki Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 47 +++++++++++++------------------------- arch/x86/kernel/apic/io_apic.c | 4 ++-- 2 files changed, 18 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 585a42810cf..8767d99c4f6 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -28,19 +28,22 @@ #define MCE_VECTOR 0x12 /* - * IDT vectors usable for external interrupt sources start - * at 0x20: - * hpa said we can start from 0x1f. - * 0x1f is documented as reserved. However, the ability for the APIC - * to generate vectors starting at 0x10 is documented, as is the - * ability for the CPU to receive any vector number as an interrupt. - * 0x1f is used for IRQ_MOVE_CLEANUP_VECTOR since that vector needs - * an entire privilege level (16 vectors) all by itself at a higher - * priority than any actual device vector. Thus, by placing it in the - * otherwise-unusable 0x10 privilege level, we avoid wasting a full - * 16-vector block. + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) */ -#define FIRST_EXTERNAL_VECTOR 0x1f +#define FIRST_EXTERNAL_VECTOR 0x20 +/* + * We start allocating at 0x21 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) + */ +#define VECTOR_OFFSET_START 1 + +/* + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 #ifdef CONFIG_X86_32 @@ -48,17 +51,7 @@ #endif /* - * Reserve the lowest usable priority level 0x10 - 0x1f for triggering - * cleanup after irq migration. - * this overlaps with the reserved range for cpu exceptions so this - * will need to be changed to 0x20 - 0x2f if the last cpu exception is - * ever allocated. - */ - -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. + * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) @@ -132,14 +125,6 @@ */ #define MCE_SELF_VECTOR 0xeb -/* - * First APIC vector available to drivers: (vectors 0x30-0xee). We - * start allocating at 0x31 to spread out vectors evenly between - * priority levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 1) -#define VECTOR_OFFSET_START 1 - #define NR_VECTORS 256 #define FPU_IRQ 13 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e9ba0903e9d..409f4943dc1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1162,7 +1162,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR + VECTOR_OFFSET_START; + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; @@ -1199,7 +1199,7 @@ next: if (vector >= first_system_vector) { /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; + vector = FIRST_EXTERNAL_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; -- cgit v1.2.3 From 97943390b043bcafca69f9163b86bbf627b75589 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 19 Jan 2010 12:20:54 -0800 Subject: x86, irq: Don't block IRQ0_VECTOR..IRQ15_VECTOR's on all cpu's Currently IRQ0..IRQ15 are assigned to IRQ0_VECTOR..IRQ15_VECTOR's on all the cpu's. If these IRQ's are handled by legacy pic controller, then the kernel handles them only on cpu 0. So there is no need to block this vector space on all cpu's. Similarly if these IRQ's are handled by IO-APIC, then the IRQ affinity will determine on which cpu's we need allocate the vector resource for that particular IRQ. This can be done dynamically and here also there is no need to block 16 vectors for IRQ0..IRQ15 on all cpu's. Fix this by initially assigning IRQ0..IRQ15 to IRQ0_VECTOR..IRQ15_VECTOR's only on cpu 0. If the legacy controllers like pic handles these irq's, then this configuration will be fixed. If more modern controllers like IO-APIC handle these IRQ's, then we start with this configuration and as IRQ's migrate, vectors (/and cpu's) associated with these IRQ's change dynamically. This will freeup the block of 16 vectors on other cpu's which don't handle IRQ0..IRQ15, which can now be used for other IRQ's that the particular cpu handle. [ hpa: this also an architectural cleanup for future legacy-PIC-free configurations. ] [ hpa: fixed typo NR_LEGACY_IRQS -> NR_IRQS_LEGACY ] Signed-off-by: Suresh Siddha LKML-Reference: <1263932453.2814.52.camel@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/apic/io_apic.c | 33 ++++++++++----------------------- arch/x86/kernel/irqinit.c | 35 +++++++++++++++++------------------ arch/x86/kernel/vmiclock_32.c | 2 ++ 4 files changed, 30 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 5458380b6ef..262292729fc 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -48,5 +48,6 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); +extern int nr_legacy_irqs; #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 409f4943dc1..1a30587a6bc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* Number of legacy interrupts */ -static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; /* GSI interrupts */ static int nr_irqs_gsi = NR_IRQS_LEGACY; @@ -140,27 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg irq_cfgx[] = { +static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; #else -static struct irq_cfg irq_cfgx[NR_IRQS] = { +static struct irq_cfg irq_cfgx[NR_IRQS]; #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, -}; void __init io_apic_disable_legacy(void) { @@ -185,8 +166,14 @@ int __init arch_early_irq_init(void) desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < nr_legacy_irqs) - cpumask_setall(cfg[i].domain); + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. + */ + if (i < nr_legacy_irqs) { + cfg[i].vector = IRQ0_VECTOR + i; + cpumask_set_cpu(0, cfg[i].domain); + } } return 0; diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d5932226614..fce55d53263 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -84,24 +84,7 @@ static struct irqaction irq2 = { }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 + [0 ... NR_VECTORS - 1] = -1, }; int vector_used_by_percpu_irq(unsigned int vector) @@ -116,6 +99,9 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } +/* Number of legacy interrupts */ +int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; + void __init init_ISA_irqs(void) { int i; @@ -142,6 +128,19 @@ void __init init_ISA_irqs(void) void __init init_IRQ(void) { + int i; + + /* + * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * If these IRQ's are handled by legacy interrupt-controllers like PIC, + * then this configuration will likely be static after the boot. If + * these IRQ's are handled by more mordern controllers like IO-APIC, + * then this vector space can be freed and re-used dynamically as the + * irq's migrate etc. + */ + for (i = 0; i < nr_legacy_irqs; i++) + per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + x86_init.irqs.intr_init(); } diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 1268d993e9c..2f1ca561429 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -235,6 +235,8 @@ void __init vmi_time_init(void) vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3 From 69c89efb51510b3dc0fa336f7fa257c6e1799ee4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 29 Jan 2010 11:42:20 -0800 Subject: x86, irq: Update the vector domain for legacy irqs handled by io-apic In the recent change of not reserving IRQ0_VECTOR..IRQ15_VECTOR's on all cpu's, we start with irq 0..15 getting directed to (and handled on) cpu-0. In the logical flat mode, once the AP's are online (and before irqbalance comes into picture), kernel intends to handle these IRQ's on any cpu (as the logical flat mode allows to specify multiple cpu's for the irq destination and the chipset based routing can deliver to the interrupt to any one of the specified cpu's). This was broken with our recent change, which was ending up using only cpu 0 as the destination, even when the kernel was specifying to use all online cpu's for the logical flat mode case. Fix this by updating vector allocation domain (cfg->domain) for legacy irqs, when the IO-APIC handles them. Signed-off-by: Suresh Siddha LKML-Reference: <20100129194330.207790269@sbs-t61.sc.intel.com> Tested-by: Li Zefan Cc: Yinghai Lu Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1a30587a6bc..2430b31c985 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1428,6 +1428,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; + /* + * For legacy irqs, cfg->domain starts with cpu 0 for legacy + * controllers like 8259. Now that IO-APIC can handle this irq, update + * the cfg->domain. + */ + if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) + apic->vector_allocation_domain(0, cfg->domain); + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; -- cgit v1.2.3 From 9d133e5db993d577bd868b54083869fe5479fcff Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 29 Jan 2010 11:42:21 -0800 Subject: x86, irq: Move __setup_vector_irq() before the first irq enable in cpu online path Lowest priority delivery of logical flat mode is broken on some systems, such that even when IO-APIC RTE says deliver the interrupt to a particular CPU, interrupt subsystem delivers the interrupt to totally different CPU. For example, this behavior was observed on a P4 based system with SiS chipset which was reported by Li Zefan. We have been handling this kind of behavior by making sure that in logical flat mode, we assign the same vector to irq mappings on all the 8 possible logical cpu's. But we have been doing this initial assignment (__setup_vector_irq()) a little late (before which interrupts were already enabled for a short duration). Move the __setup_vector_irq() before the first irq enable point in the cpu online path to avoid the issue of not handling some interrupts that wrongly hit the cpu which is still coming online. Signed-off-by: Suresh Siddha LKML-Reference: <20100129194330.283696385@sbs-t61.sc.intel.com> Tested-by: Li Zefan Cc: Yinghai Lu Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 8 +++++++- arch/x86/kernel/smpboot.c | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2430b31c985..937150e4c06 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1256,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; struct irq_desc *desc; + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; @@ -1279,6 +1284,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } + spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f..b2ebcba729d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -241,6 +241,11 @@ static void __cpuinit smp_callin(void) map_cpu_to_logical_apicid(); notify_cpu_starting(cpuid); + + /* + * Need to setup vector mappings before we enable interrupts. + */ + __setup_vector_irq(smp_processor_id()); /* * Get our bogomips. * @@ -315,7 +320,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ ipi_call_lock(); lock_vector_lock(); - __setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); ipi_call_unlock(); -- cgit v1.2.3 From 318f6b228ba88a394ef560efc1bfe028ad5ae6b6 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Feb 2010 20:55:16 +0100 Subject: x86, ia32_aout: do not kill argument mapping Do not set current->mm->mmap to NULL in 32-bit emulation on 64-bit load_aout_binary after flush_old_exec as it would destroy already set brpm mapping with arguments. Introduced by b6a2fea39318e43fee84fa7b0b90d68bed92d2ba mm: variable length argument support where the argument mapping in bprm was added. [ hpa: this is a regression from 2.6.22... time to kill a.out? ] Signed-off-by: Jiri Slaby LKML-Reference: <1265831716-7668-1-git-send-email-jslaby@suse.cz> Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ollie Wild Cc: x86@kernel.org Cc: Signed-off-by: H. Peter Anvin --- arch/x86/ia32/ia32_aout.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index f9f47246275..14531abdd0c 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -327,7 +327,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->free_area_cache = TASK_UNMAPPED_BASE; current->mm->cached_hole_size = 0; - current->mm->mmap = NULL; install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; -- cgit v1.2.3 From 18dce6ba5c8c6bd0f3ab4efa4cbdd698dab5c40a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:05 -0800 Subject: x86: Fix SCI on IOAPIC != 0 Thomas Renninger reported on IBM x3330 booting a latest kernel on this machine results in: PCI: PCI BIOS revision 2.10 entry at 0xfd61c, last bus=1 PCI: Using configuration type 1 for base access bio: create slab at 0 ACPI: SCI (IRQ30) allocation failed ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20090903/evevent-161) ACPI: Unable to start the ACPI Interpreter Later all kind of devices fail... and bisect it down to this commit: commit b9c61b70075c87a8612624736faf4a2de5b1ed30 x86/pci: update pirq_enable_irq() to setup io apic routing it turns out we need to set irq routing for the sci on ioapic1 early. -v2: make it work without sparseirq too. -v3: fix checkpatch.pl warning, and cc to stable Reported-by: Thomas Renninger Bisected-by: Thomas Renninger Tested-by: Thomas Renninger Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-2-git-send-email-yinghai@kernel.org> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 + arch/x86/kernel/acpi/boot.c | 9 +++++++- arch/x86/kernel/apic/io_apic.c | 50 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16cde1f..5f61f6e0ffd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -160,6 +160,7 @@ extern int io_apic_get_redir_entries(int ioapic); struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); +void setup_IO_APIC_irq_extra(u32 gsi); extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); extern void ioapic_insert_resources(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdfa5ca..5c96b75c6ea 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -446,6 +446,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { *irq = gsi; + +#ifdef CONFIG_X86_IO_APIC + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) + setup_IO_APIC_irq_extra(gsi); +#endif + return 0; } @@ -473,7 +479,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); } #endif - acpi_gsi_to_irq(plat_gsi, &irq); + irq = plat_gsi; + return irq; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816..5e4cce254e4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1538,6 +1538,56 @@ static void __init setup_IO_APIC_irqs(void) " (apicid-pin) not connected\n"); } +/* + * for the gsit that is not in first ioapic + * but could not use acpi_register_gsi() + * like some special sci in IBM x3330 + */ +void setup_IO_APIC_irq_extra(u32 gsi) +{ + int apic_id = 0, pin, idx, irq; + int node = cpu_to_node(boot_cpu_id); + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + apic_id = mp_find_ioapic(gsi); + if (apic_id < 0) + return; + + pin = mp_find_ioapic_pin(apic_id, gsi); + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) + return; + + irq = pin_2_irq(idx, apic_id, pin); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(irq); + if (desc) + return; +#endif + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return; + } + + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + + if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[apic_id].apicid, pin); + return; + } + set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); +} + /* * Set up the timer pin, possibly with the 8259A-master behind. */ -- cgit v1.2.3 From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001 From: Brandon Phiilps Date: Wed, 10 Feb 2010 01:20:06 -0800 Subject: x86: Avoid race condition in pci_enable_msix() Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816..c86591b906f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq) { unsigned long flags; struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); + cfg = irq_to_desc(irq)->chip_data; __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From dade7716925a4e9a31f249f9ca1ed4e2f1495a8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 18:39:36 +0200 Subject: x86: Convert ioapic_lock and vector_lock to raw_spinlock Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 106 ++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 937150e4c06..d55e43d352b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -73,8 +73,8 @@ */ int sis_apic_bug = -1; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -393,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) struct irq_pin_list *entry; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; @@ -402,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return false; } @@ -420,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } @@ -446,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -461,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin) unsigned long flags; union entry_union eu = { .entry.mask = 1 }; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -591,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) @@ -601,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void mask_IO_APIC_irq(unsigned int irq) @@ -1127,12 +1127,12 @@ void lock_vector_lock(void) /* Used to the online set of cpus does not change * during assign_irq_vector. */ - spin_lock(&vector_lock); + raw_spin_lock(&vector_lock); } void unlock_vector_lock(void) { - spin_unlock(&vector_lock); + raw_spin_unlock(&vector_lock); } static int @@ -1220,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int err; unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, cfg, mask); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } @@ -1265,7 +1265,7 @@ void __setup_vector_irq(int cpu) * assignments that might be happening on another cpu in parallel, * while we setup our initial vector to irq mappings. */ - spin_lock(&vector_lock); + raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; @@ -1284,7 +1284,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } - spin_unlock(&vector_lock); + raw_spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; @@ -1603,14 +1603,14 @@ __apicdebuginit(void) print_IO_APIC(void) for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); @@ -1905,9 +1905,9 @@ void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } @@ -2047,9 +2047,9 @@ void __init setup_ioapic_ids_from_mpc(void) for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic_id].apicid; @@ -2108,16 +2108,16 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid); reg_00.bits.ID = mp_ioapics[apic_id].apicid; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic_id, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else @@ -2200,7 +2200,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) unsigned long flags; struct irq_cfg *cfg; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); if (irq < nr_legacy_irqs) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) @@ -2208,7 +2208,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) } cfg = irq_cfg(irq); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } @@ -2219,9 +2219,9 @@ static int ioapic_retrigger_irq(unsigned int irq) struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } @@ -2314,14 +2314,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); ret = set_desc_affinity(desc, mask, &dest); if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -2549,9 +2549,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ack_apic_level(unsigned int irq) @@ -3133,13 +3133,13 @@ static int ioapic_resume(struct sys_device *dev) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); @@ -3202,7 +3202,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (irq_want < nr_irqs_gsi) irq_want = nr_irqs_gsi; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { @@ -3221,7 +3221,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) irq = new; break; } - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { dynamic_irq_init(irq); @@ -3261,9 +3261,9 @@ void destroy_irq(unsigned int irq) desc->chip_data = cfg; free_irte(irq); - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); } /* @@ -3800,9 +3800,9 @@ int __init io_apic_get_redir_entries (int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -3964,9 +3964,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (physids_empty(apic_id_map)) apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -4000,10 +4000,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -4024,9 +4024,9 @@ int __init io_apic_get_version(int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } -- cgit v1.2.3 From 0fdc7a8022c3eaff6b5ee27ffb9e913e5e58d8e9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 16:49:55 +0200 Subject: x86: Convert nmi_lock to raw_spinlock nmi_lock must be a spinning spinlock in -rt. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/nmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69396c..24e7742d633 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* We can be called before check_nmi_watchdog, hence NULL check. */ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - spin_lock(&lock); + raw_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); dump_stack(); - spin_unlock(&lock); + raw_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; -- cgit v1.2.3 From 5619c28061ff9d2559a93eaba492935530f2a513 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 18:35:11 +0200 Subject: x86: Convert i8259_lock to raw_spinlock Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/i8259.h | 2 +- arch/x86/kernel/apic/io_apic.c | 4 ++-- arch/x86/kernel/i8259.c | 30 +++++++++++++++--------------- arch/x86/kernel/time.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 6 +++--- 5 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1..7ec65b18085 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern spinlock_t i8259A_lock; +extern raw_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c86591b906f..f5e40339622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1830,7 +1830,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1844,7 +1844,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102bef8..8c93a84bb62 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -32,7 +32,7 @@ */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); struct irq_chip i8259A_chip = { @@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq) unsigned int mask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) @@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq) unsigned int mask = ~(1 << irq); unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) @@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return ret; } @@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq) unsigned int irqmask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -183,7 +183,7 @@ handle_real_irq: outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: @@ -285,24 +285,24 @@ void mask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void unmask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void init_8259A(int auto_eoi) @@ -311,7 +311,7 @@ void init_8259A(int auto_eoi) i8259A_auto_eoi = auto_eoi; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ @@ -356,5 +356,5 @@ void init_8259A(int auto_eoi) outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be2573448ed..fb5cc5e14cf 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + raw_spin_lock(&i8259A_lock); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + raw_spin_unlock(&i8259A_lock); } global_clock_event->event_handler(global_clock_event); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34a279a7471..ab38ce0984f 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -559,7 +559,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) struct irq_desc *desc; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); /* Find out what's interrupting in the PIIX4 master 8259 */ outb(0x0c, 0x20); /* OCW3 Poll command */ @@ -596,7 +596,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) outb(0x60 + realirq, 0x20); } - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); desc = irq_to_desc(realirq); @@ -614,7 +614,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) return IRQ_HANDLED; out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return IRQ_NONE; } -- cgit v1.2.3 From 0a832320f1bae6a4169bf683e201378f2437cfc1 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Tue, 16 Feb 2010 15:17:29 -0800 Subject: x86: Add iMac9,1 to pci_reboot_dmi_table On the iMac9,1 /sbin/reboot results in a black mangled screen. Adding this DMI entry gets the machine to reboot cleanly as it should. Signed-off-by: Justin P. Mattock LKML-Reference: <1266362249-3337-1-git-send-email-justinmattock@gmail.com> Signed-off-by: H. Peter Anvin Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 704bddcdf64..8e1aac86b50 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), }, }, + { /* Handle problems with rebooting on the iMac9,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac9,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), + }, + }, { } }; -- cgit v1.2.3 From 6738762d73a237ec322b04d8b9d55c8fd5d84713 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:36 -0800 Subject: x86, irq: Remove arch_probe_nr_irqs So keep nr_irqs == NR_IRQS. With radix trees is matters less. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-33-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f5e40339622..c64ddd9d997 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3826,28 +3826,6 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } -#ifdef CONFIG_SPARSE_IRQ -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = nr_irqs_gsi + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += nr_irqs_gsi * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return 0; -} -#endif - static int __io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { -- cgit v1.2.3 From 2b633e3fac5efada088b57d31e65401f22bcc18f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:37 -0800 Subject: smp: Use nr_cpus= to set nr_cpu_ids early On x86, before prefill_possible_map(), nr_cpu_ids will be NR_CPUS aka CONFIG_NR_CPUS. Add nr_cpus= to set nr_cpu_ids. so we can simulate cpus <=8 are installed on normal config. -v2: accordging to Christoph, acpi_numa_init should use nr_cpu_ids in stead of NR_CPUS. -v3: add doc in kernel-parameters.txt according to Andrew. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-34-git-send-email-yinghai@kernel.org> Acked-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Tony Luck --- arch/x86/kernel/smpboot.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f..eff2fe17542 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1213,11 +1213,12 @@ __init void prefill_possible_map(void) total_cpus = max_t(int, possible, num_processors + disabled_cpus); - if (possible > CONFIG_NR_CPUS) { + /* nr_cpu_ids could be reduced via nr_cpus= */ + if (possible > nr_cpu_ids) { printk(KERN_WARNING "%d Processors exceeds NR_CPUS limit of %d\n", - possible, CONFIG_NR_CPUS); - possible = CONFIG_NR_CPUS; + possible, nr_cpu_ids); + possible = nr_cpu_ids; } printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", -- cgit v1.2.3 From eb5b3794062824ba12d883901eea49ea89d0a678 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Feb 2010 13:02:50 -0800 Subject: x86, irq: Keep chip_data in create_irq_nr and destroy_irq Version 4: use get_irq_chip_data() in destroy_irq() to get rid of some local vars. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Brandon Phiilps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5e4cce254e4..e93a76bc867 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3305,19 +3302,12 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq, get_irq_chip_data(irq)); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From 21c2fd9970cc8e457058f84016450a2e9876125e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Feb 2010 11:17:16 +0100 Subject: x86: apic: Fix mismerge, add arch_probe_nr_irqs() again Merge commit aef55d4922 mis-merged io_apic.c so we lost the arch_probe_nr_irqs() method. This caused subtle boot breakages (udev confusion likely due to missing drivers) with certain configs. Cc: H. Peter Anvin Cc: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 979589881c8..72ac2a33299 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3876,6 +3876,28 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; + + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + nr += nr_irqs_gsi * 16; +#endif + if (nr < nr_irqs) + nr_irqs = nr; + + return 0; +} +#endif + static int __io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { -- cgit v1.2.3 From fad539956c9e69749a03f7817d22d1bab87657bf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 28 Feb 2010 01:06:34 -0800 Subject: x86: Fix out of order of gsi Iranna D Ankad reported that IBM x3950 systems have boot problems after this commit: | | commit b9c61b70075c87a8612624736faf4a2de5b1ed30 | | x86/pci: update pirq_enable_irq() to setup io apic routing | The problem is that with the patch, the machine freezes when console=ttyS0,... kernel serial parameter is passed. It seem to freeze at DVD initialization and the whole problem seem to be DVD/pata related, but somehow exposed through the serial parameter. Such apic problems can expose really weird behavior: ACPI: IOAPIC (id[0x10] address[0xfecff000] gsi_base[0]) IOAPIC[0]: apic_id 16, version 0, address 0xfecff000, GSI 0-2 ACPI: IOAPIC (id[0x0f] address[0xfec00000] gsi_base[3]) IOAPIC[1]: apic_id 15, version 0, address 0xfec00000, GSI 3-38 ACPI: IOAPIC (id[0x0e] address[0xfec01000] gsi_base[39]) IOAPIC[2]: apic_id 14, version 0, address 0xfec01000, GSI 39-74 ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 4 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 5 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 3 global_irq 6 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 7 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 9 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 10 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 11 low edge) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 12 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 15 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 13 global_irq 16 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 17 low edge) ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 18 dfl dfl) It turns out that the system has three io apic controllers, but boot ioapic routing is in the second one, and that gsi_base is not 0 - it is using a bunch of INT_SRC_OVR... So these recent changes: 1. one set routing for first io apic controller 2. assume irq = gsi ... will break that system. So try to remap those gsis, need to seperate boot_ioapic_idx detection out of enable_IO_APIC() and call them early. So introduce boot_ioapic_idx, and remap_ioapic_gsi()... -v2: shift gsi with delta instead of gsi_base of boot_ioapic_idx -v3: double check with find_isa_irq_apic(0, mp_INT) to get right boot_ioapic_idx -v4: nr_legacy_irqs -v5: add print out for boot_ioapic_idx, and also make it could be applied for current kernel and previous kernel -v6: add bus_irq, in acpi_sci_ioapic_setup, so can get overwride for sci right mapping... -v7: looks like pnpacpi get irq instead of gsi, so need to revert them back... -v8: split into two patches -v9: according to Eric, use fixed 16 for shifting instead of remap -v10: still need to touch rsparser.c -v11: just revert back to way Eric suggest... anyway the ioapic in first ioapic is blocked by second... -v12: two patches, this one will add more loop but check apic_id and irq > 16 Reported-by: Iranna D Ankad Bisected-by: Iranna D Ankad Tested-by: Gary Hade Signed-off-by: Yinghai Lu Cc: Eric W. Biederman Cc: Thomas Renninger Cc: Eric W. Biederman Cc: Suresh Siddha Cc: len.brown@intel.com LKML-Reference: <4B8A321A.1000008@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 72ac2a33299..97e1e3ec2ed 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1475,7 +1475,7 @@ static struct { static void __init setup_IO_APIC_irqs(void) { - int apic_id = 0, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1483,14 +1483,7 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - apic_id = mp_find_ioapic(0); - if (apic_id < 0) - apic_id = 0; - } -#endif - + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { @@ -1512,6 +1505,9 @@ static void __init setup_IO_APIC_irqs(void) irq = pin_2_irq(idx, apic_id, pin); + if ((apic_id > 0) && (irq > 16)) + continue; + /* * Skip the timer IRQ if there's a quirk handler * installed and if it returns 1: @@ -4105,27 +4101,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic = 0, irq, irq_entry; + int pin, ioapic, irq, irq_entry; struct irq_desc *desc; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - ioapic = 0; - } -#endif - + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); + if ((ioapic > 0) && (irq > 16)) + continue; + desc = irq_to_desc(irq); /* -- cgit v1.2.3