From cbf74cea070fa1f705de4712e25d9e56ae6543c7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 30 May 2011 16:31:11 +0200 Subject: oprofile, x86: Add comments to IBS LVT offset initialization Adding a comment in the code as IBS LVT setup is not obvious at all ... Signed-off-by: Robert Richter --- arch/x86/kernel/apic/apic.c | 3 ++- arch/x86/oprofile/op_model_amd.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fabf01eff77..a0bf78a0918 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -390,7 +390,8 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new) /* * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. + * enables the vector. See also the BKDGs. Must be called with + * preemption disabled. */ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 9fd8a567fe1..9cbb710dc94 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -609,16 +609,21 @@ static int setup_ibs_ctl(int ibs_eilvt_off) return 0; } +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_- + * amd_cpu_shutdown() using the new offset. + */ static int force_ibs_eilvt_setup(void) { int offset; int ret; - /* - * find the next free available EILVT entry, skip offset 0, - * pin search to this cpu - */ preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { if (get_eilvt(offset)) break; -- cgit v1.2.3 From 6e33a852a37dee02979ec9d82bea26c07cee5bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20N=C3=A9meth?= Date: Sat, 14 May 2011 19:27:33 +0200 Subject: x86/PCI/ACPI: fix type mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flags field of struct resource from linux/ioport.h is "unsigned long". Change the "type" parameter of coalesce_windows() function to match that field. This fixes the following warning messages when compiling with "make C=1 W=1 bzImage modules": arch/x86/pci/acpi.c: In function ‘coalesce_windows’: arch/x86/pci/acpi.c:198: warning: conversion to ‘long unsigned int’ from ‘int’ may change the sign of the result arch/x86/pci/acpi.c:203: warning: conversion to ‘long unsigned int’ from ‘int’ may change the sign of the result Signed-off-by: Márton Németh Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 0972315c386..68c3c139520 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -188,7 +188,7 @@ static bool resource_contains(struct resource *res, resource_size_t point) return false; } -static void coalesce_windows(struct pci_root_info *info, int type) +static void coalesce_windows(struct pci_root_info *info, unsigned long type) { int i, j; struct resource *res1, *res2; -- cgit v1.2.3 From f124c6ae59e193705c9ddac57684d50006d710e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Jun 2011 07:45:28 +0300 Subject: xen: off by one errors in multicalls.c b->args[] has MC_ARGS elements, so the comparison here should be ">=" instead of ">". Otherwise we read past the end of the array one space. CC: stable@kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/multicalls.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 8bff7e7c290..1b2b73ff0a6 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -189,10 +189,10 @@ struct multicall_space __xen_mc_entry(size_t args) unsigned argidx = roundup(b->argidx, sizeof(u64)); BUG_ON(preemptible()); - BUG_ON(b->argidx > MC_ARGS); + BUG_ON(b->argidx >= MC_ARGS); if (b->mcidx == MC_BATCH || - (argidx + args) > MC_ARGS) { + (argidx + args) >= MC_ARGS) { mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); xen_mc_flush(); argidx = roundup(b->argidx, sizeof(u64)); @@ -206,7 +206,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.args = &b->args[argidx]; b->argidx = argidx + args; - BUG_ON(b->argidx > MC_ARGS); + BUG_ON(b->argidx >= MC_ARGS); return ret; } @@ -216,7 +216,7 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) struct multicall_space ret = { NULL, NULL }; BUG_ON(preemptible()); - BUG_ON(b->argidx > MC_ARGS); + BUG_ON(b->argidx >= MC_ARGS); if (b->mcidx == 0) return ret; @@ -224,14 +224,14 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) if (b->entries[b->mcidx - 1].op != op) return ret; - if ((b->argidx + size) > MC_ARGS) + if ((b->argidx + size) >= MC_ARGS) return ret; ret.mc = &b->entries[b->mcidx - 1]; ret.args = &b->args[b->argidx]; b->argidx += size; - BUG_ON(b->argidx > MC_ARGS); + BUG_ON(b->argidx >= MC_ARGS); return ret; } -- cgit v1.2.3 From 221192bdff2583834984639121595fc9296120d3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 30 May 2011 15:23:14 -0300 Subject: KVM: x86: use proper port value when checking io instruction permission Commit f6511935f42 moved the permission check for io instructions to the ->check_perm callback. It failed to copy the port value from RDX register for string and "in,out ax,dx" instructions. Fix it by reading RDX register at decode stage when appropriate. Fixes FC8.32 installation. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 82 +++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d6e2477feb1..6df88c7885c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -47,38 +47,40 @@ #define DstDI (5<<1) /* Destination is in ES:(E)DI */ #define DstMem64 (6<<1) /* 64bit memory operand */ #define DstImmUByte (7<<1) /* 8-bit unsigned immediate operand */ -#define DstMask (7<<1) +#define DstDX (8<<1) /* Destination is in DX register */ +#define DstMask (0xf<<1) /* Source operand type. */ -#define SrcNone (0<<4) /* No source operand. */ -#define SrcReg (1<<4) /* Register operand. */ -#define SrcMem (2<<4) /* Memory operand. */ -#define SrcMem16 (3<<4) /* Memory operand (16-bit). */ -#define SrcMem32 (4<<4) /* Memory operand (32-bit). */ -#define SrcImm (5<<4) /* Immediate operand. */ -#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ -#define SrcOne (7<<4) /* Implied '1' */ -#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ -#define SrcImmU (9<<4) /* Immediate operand, unsigned */ -#define SrcSI (0xa<<4) /* Source is in the DS:RSI */ -#define SrcImmFAddr (0xb<<4) /* Source is immediate far address */ -#define SrcMemFAddr (0xc<<4) /* Source is far address in memory */ -#define SrcAcc (0xd<<4) /* Source Accumulator */ -#define SrcImmU16 (0xe<<4) /* Immediate operand, unsigned, 16 bits */ -#define SrcMask (0xf<<4) +#define SrcNone (0<<5) /* No source operand. */ +#define SrcReg (1<<5) /* Register operand. */ +#define SrcMem (2<<5) /* Memory operand. */ +#define SrcMem16 (3<<5) /* Memory operand (16-bit). */ +#define SrcMem32 (4<<5) /* Memory operand (32-bit). */ +#define SrcImm (5<<5) /* Immediate operand. */ +#define SrcImmByte (6<<5) /* 8-bit sign-extended immediate operand. */ +#define SrcOne (7<<5) /* Implied '1' */ +#define SrcImmUByte (8<<5) /* 8-bit unsigned immediate operand. */ +#define SrcImmU (9<<5) /* Immediate operand, unsigned */ +#define SrcSI (0xa<<5) /* Source is in the DS:RSI */ +#define SrcImmFAddr (0xb<<5) /* Source is immediate far address */ +#define SrcMemFAddr (0xc<<5) /* Source is far address in memory */ +#define SrcAcc (0xd<<5) /* Source Accumulator */ +#define SrcImmU16 (0xe<<5) /* Immediate operand, unsigned, 16 bits */ +#define SrcDX (0xf<<5) /* Source is in DX register */ +#define SrcMask (0xf<<5) /* Generic ModRM decode. */ -#define ModRM (1<<8) +#define ModRM (1<<9) /* Destination is only written; never read. */ -#define Mov (1<<9) -#define BitOp (1<<10) -#define MemAbs (1<<11) /* Memory operand is absolute displacement */ -#define String (1<<12) /* String instruction (rep capable) */ -#define Stack (1<<13) /* Stack instruction (push/pop) */ -#define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ -#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ -#define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ -#define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ -#define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ -#define Sse (1<<17) /* SSE Vector instruction */ +#define Mov (1<<10) +#define BitOp (1<<11) +#define MemAbs (1<<12) /* Memory operand is absolute displacement */ +#define String (1<<13) /* String instruction (rep capable) */ +#define Stack (1<<14) /* Stack instruction (push/pop) */ +#define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */ +#define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */ +#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ +#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ +#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Sse (1<<18) /* SSE Vector instruction */ /* Misc flags */ #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (1<<22) /* Vendor specific instruction */ @@ -3154,8 +3156,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ - D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ + D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -3212,8 +3214,8 @@ static struct opcode opcode_table[256] = { /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bvIP(SrcNone | DstAcc, in, check_perm_in), - D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), + D2bvIP(SrcDX | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstDX, out, check_perm_out), /* 0xF0 - 0xF7 */ N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), @@ -3613,6 +3615,12 @@ done_prefixes: memop.bytes = c->op_bytes + 2; goto srcmem_common; break; + case SrcDX: + c->src.type = OP_REG; + c->src.bytes = 2; + c->src.addr.reg = &c->regs[VCPU_REGS_RDX]; + fetch_register_operand(&c->src); + break; } if (rc != X86EMUL_CONTINUE) @@ -3682,6 +3690,12 @@ done_prefixes: c->dst.addr.mem.seg = VCPU_SREG_ES; c->dst.val = 0; break; + case DstDX: + c->dst.type = OP_REG; + c->dst.bytes = 2; + c->dst.addr.reg = &c->regs[VCPU_REGS_RDX]; + fetch_register_operand(&c->dst); + break; case ImplicitOps: /* Special instructions do their own operand decoding. */ default: @@ -4027,7 +4041,6 @@ special_insn: break; case 0xec: /* in al,dx */ case 0xed: /* in (e/r)ax,dx */ - c->src.val = c->regs[VCPU_REGS_RDX]; do_io_in: if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, &c->dst.val)) @@ -4035,7 +4048,6 @@ special_insn: break; case 0xee: /* out dx,al */ case 0xef: /* out dx,(e/r)ax */ - c->dst.val = c->regs[VCPU_REGS_RDX]; do_io_out: ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, &c->src.val, 1); -- cgit v1.2.3 From 0de66d5b35ee148455e268b2782873204ffdef4b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 6 Jun 2011 16:04:02 +0200 Subject: x86/amd-iommu: Fix 3 possible endless loops The driver contains several loops counting on an u16 value where the exit-condition is checked against variables that can have values up to 0xffff. In this case the loops will never exit. This patch fixed 3 such loops. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 9179c21120a..bfc8453bd98 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -731,8 +731,8 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, { u8 *p = (u8 *)h; u8 *end = p, flags = 0; - u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; - u32 ext_flags = 0; + u16 devid = 0, devid_start = 0, devid_to = 0; + u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; @@ -887,7 +887,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, /* Initializes the device->iommu mapping for the driver */ static int __init init_iommu_devices(struct amd_iommu *iommu) { - u16 i; + u32 i; for (i = iommu->first_device; i <= iommu->last_device; ++i) set_iommu_for_device(iommu, i); @@ -1177,7 +1177,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) */ static void init_device_table(void) { - u16 devid; + u32 devid; for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); -- cgit v1.2.3 From 27c2127a15d340706c0aa84e311188a14468d841 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 30 May 2011 15:56:24 +0200 Subject: x86/amd-iommu: Use only per-device dma_ops Unfortunatly there are systems where the AMD IOMMU does not cover all devices. This breaks with the current driver as it initializes the global dma_ops variable. This patch limits the AMD IOMMU to the devices listed in the IVRS table fixing DMA for devices not covered by the IOMMU. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index cd8cbeb5fa3..b2c309b0742 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -2383,6 +2384,23 @@ static struct dma_map_ops amd_iommu_dma_ops = { .dma_supported = amd_iommu_dma_supported, }; +static unsigned device_dma_ops_init(void) +{ + struct pci_dev *pdev = NULL; + unsigned unhandled = 0; + + for_each_pci_dev(pdev) { + if (!check_device(&pdev->dev)) { + unhandled += 1; + continue; + } + + pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; + } + + return unhandled; +} + /* * The function which clues the AMD IOMMU driver into dma_ops. */ @@ -2395,7 +2413,7 @@ void __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; - int ret; + int ret, unhandled; /* * first allocate a default protection domain for every IOMMU we @@ -2421,7 +2439,11 @@ int __init amd_iommu_init_dma_ops(void) swiotlb = 0; /* Make the driver finally visible to the drivers */ - dma_ops = &amd_iommu_dma_ops; + unhandled = device_dma_ops_init(); + if (unhandled && max_pfn > MAX_DMA32_PFN) { + /* There are unhandled devices - initialize swiotlb for them */ + swiotlb = 1; + } amd_iommu_stats_init(); -- cgit v1.2.3 From 26018874e3584f1658570d41d57d4c34f6a53aa0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 6 Jun 2011 16:50:14 +0200 Subject: x86/amd-iommu: Fix boot crash with hidden PCI devices Some PCIe cards ship with a PCI-PCIe bridge which is not visible as a PCI device in Linux. But the device-id of the bridge is present in the IOMMU tables which causes a boot crash in the IOMMU driver. This patch fixes by removing these cards from the IOMMU handling. This is a pure -stable fix, a real fix to handle this situation appriatly will follow for the next merge window. Cc: stable@kernel.org # > 2.6.32 Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b2c309b0742..7c3a95e54ec 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -155,6 +155,10 @@ static int iommu_init_device(struct device *dev) pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); if (pdev) dev_data->alias = &pdev->dev; + else { + kfree(dev_data); + return -ENOTSUPP; + } atomic_set(&dev_data->bind, 0); @@ -164,6 +168,20 @@ static int iommu_init_device(struct device *dev) return 0; } +static void iommu_ignore_device(struct device *dev) +{ + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + + memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); + + amd_iommu_rlookup_table[devid] = NULL; + amd_iommu_rlookup_table[alias] = NULL; +} + static void iommu_uninit_device(struct device *dev) { kfree(dev->archdata.iommu); @@ -193,7 +211,9 @@ int __init amd_iommu_init_devices(void) continue; ret = iommu_init_device(&pdev->dev); - if (ret) + if (ret == -ENOTSUPP) + iommu_ignore_device(&pdev->dev); + else if (ret) goto out_free; } -- cgit v1.2.3 From fd8a7de177b6f56a0fc59ad211c197a7df06b1ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jul 2010 14:34:50 +0200 Subject: x86: cpu-hotplug: Prevent softirq wakeup on wrong CPU After a newly plugged CPU sets the cpu_online bit it enables interrupts and goes idle. The cpu which brought up the new cpu waits for the cpu_online bit and when it observes it, it sets the cpu_active bit for this cpu. The cpu_active bit is the relevant one for the scheduler to consider the cpu as a viable target. With forced threaded interrupt handlers which imply forced threaded softirqs we observed the following race: cpu 0 cpu 1 bringup(cpu1); set_cpu_online(smp_processor_id(), true); local_irq_enable(); while (!cpu_online(cpu1)); timer_interrupt() -> wake_up(softirq_thread_cpu1); -> enqueue_on(softirq_thread_cpu1, cpu0); ^^^^ cpu_notify(CPU_ONLINE, cpu1); -> sched_cpu_active(cpu1) -> set_cpu_active((cpu1, true); When an interrupt happens before the cpu_active bit is set by the cpu which brought up the newly onlined cpu, then the scheduler refuses to enqueue the woken thread which is bound to that newly onlined cpu on that newly onlined cpu due to the not yet set cpu_active bit and selects a fallback runqueue. Not really an expected and desirable behaviour. So far this has only been observed with forced hard/softirq threading, but in theory this could happen without forced threaded hard/softirqs as well. It's probably unobservable as it would take a massive interrupt storm on the newly onlined cpu which causes the softirq loop to wake up the softirq thread and an even longer delay of the cpu which waits for the cpu_online bit. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra Cc: stable@kernel.org # 2.6.39 --- arch/x86/kernel/smpboot.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 33a0c11797d..9fd3137230d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -285,6 +285,19 @@ notrace static void __cpuinit start_secondary(void *unused) per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; x86_platform.nmi_init(); + /* + * Wait until the cpu which brought this one up marked it + * online before enabling interrupts. If we don't do that then + * we can end up waking up the softirq thread before this cpu + * reached the active state, which makes the scheduler unhappy + * and schedule the softirq thread on the wrong cpu. This is + * only observable with forced threaded interrupts, but in + * theory it could also happen w/o them. It's just way harder + * to achieve. + */ + while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) + cpu_relax(); + /* enable local interrupts */ local_irq_enable(); -- cgit v1.2.3 From a91d92875ee94e4703fd017ccaadb48cfb344994 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 3 Jun 2011 09:51:34 +0000 Subject: xen: partially revert "xen: set max_pfn_mapped to the last pfn mapped" We only need to set max_pfn_mapped to the last pfn mapped on x86_64 to make sure that cleanup_highmap doesn't remove important mappings at _end. We don't need to do this on x86_32 because cleanup_highmap is not called on x86_32. Besides lowering max_pfn_mapped on x86_32 has the unwanted side effect of limiting the amount of memory available for the 1:1 kernel pagetable allocation. This patch reverts the x86_32 part of the original patch. CC: stable@kernel.org Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dc708dcc62f..afe1d54f980 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1599,6 +1599,11 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; +#ifdef CONFIG_X86_32 + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; +#endif + if (!pte_none(pte_page[pteidx])) continue; @@ -1766,7 +1771,9 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + + xen_start_info->nr_pt_frames * PAGE_SIZE + + 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3 From 977cb76d52e7aa040e18a84b29fe6fd80d79319b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 6 Jun 2011 10:15:49 +0200 Subject: x86: devicetree: Add missing early_init_dt_setup_initrd_arch stub This patch fixes the following build failure: drivers/built-in.o: In function `early_init_dt_check_for_initrd': /home/florian/dev/kernel/x86/linux-2.6-x86/drivers/of/fdt.c:571: undefined reference to `early_init_dt_setup_initrd_arch' make: *** [.tmp_vmlinux1] Error 1 which happens as soon as we enable initrd support on a x86 devicetree platform such as Intel CE4100. Signed-off-by: Florian Fainelli Acked-by: Grant Likely Cc: Maxime Bizon Acked-by: Sebastian Andrzej Siewior Cc: stable@kernel.org # 2.6.39 Link: http://lkml.kernel.org/r/201106061015.50039.ffainelli@freebox.fr Signed-off-by: Thomas Gleixner --- arch/x86/kernel/devicetree.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 690bc846183..9aeb78a23de 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -98,6 +99,16 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); } +#ifdef CONFIG_BLK_DEV_INITRD +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; +} +#endif + void __init add_dtb(u64 data) { initial_dtb = data + offsetof(struct setup_data, data); -- cgit v1.2.3 From dac853ae89043f1b7752875300faf614de43c74b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 9 Jun 2011 20:05:18 +0200 Subject: exec: delay address limit change until point of no return Unconditionally changing the address limit to USER_DS and not restoring it to its old value in the error path is wrong because it prevents us using kernel memory on repeated calls to this function. This, in fact, breaks the fallback of hard coded paths to the init program from being ever successful if the first candidate fails to load. With this patch applied switching to USER_DS is delayed until the point of no return is reached which makes it possible to have a multi-arch rootfs with one arch specific init binary for each of the (hard coded) probed paths. Since the address limit is already set to USER_DS when start_thread() will be invoked, this redundancy can be safely removed. Signed-off-by: Mathias Krause Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/process_32.c | 1 - arch/x86/kernel/process_64.c | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af4..a3d0dc59067 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -245,7 +245,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { set_user_gs(regs, 0); regs->fs = 0; - set_fs(USER_DS); regs->ds = __USER_DS; regs->es = __USER_DS; regs->ss = __USER_DS; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c9dd922ac0..ca6f7ab8df3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -338,7 +338,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - set_fs(USER_DS); /* * Free the old FP and other extended state */ -- cgit v1.2.3 From 7ad35cf288fd63a19bf50e490440a992de808b2b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 25 May 2011 14:00:49 +1000 Subject: x86/uv/x2apic: update for change in pci bridge handling. When I added 3448a19da479b6bd1e28e2a2be9fa16c6a6feb39 I forgot about the special uv handling code for this, so this patch fixes it up. Acked-by: Jesse Barnes Acked-by: Ingo Molnar Signed-off-by: Dave Airlie --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b511a011b7d..adc66c3a1fe 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -632,14 +632,14 @@ late_initcall(uv_init_heartbeat); /* Direct Legacy VGA I/O traffic to designated IOH */ int uv_set_vga_state(struct pci_dev *pdev, bool decode, - unsigned int command_bits, bool change_bridge) + unsigned int command_bits, u32 flags) { int domain, bus, rc; - PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n", - pdev->devfn, decode, command_bits, change_bridge); + PR_DEVEL("devfn %x decode %d cmd %x flags %d\n", + pdev->devfn, decode, command_bits, flags); - if (!change_bridge) + if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE)) return 0; if ((command_bits & PCI_COMMAND_IO) == 0) -- cgit v1.2.3 From 60b8b1de0dd2bf246f0e074d287bb3f0bc42a755 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 14 Jun 2011 12:45:10 -0700 Subject: x86 idle: APM requires pm_idle/default_idle unconditionally when a module [ Also from Ben Hutchings and Vitaliy Ivanov ] Commit 06ae40ce073d ("x86 idle: EXPORT_SYMBOL(default_idle, pm_idle) only when APM demands it") removed the export for pm_idle/default_idle unless the apm module was modularised and CONFIG_APM_CPU_IDLE was set. But the apm module uses pm_idle/default_idle unconditionally, CONFIG_APM_CPU_IDLE only affects the bios idle threshold. Adjust the export accordingly. [ Used #ifdef instead of #if defined() as it's shorter, and what both Ben and Vitaliy used.. Andy, you're out-voted ;) - Linus ] Reported-by: Randy Dunlap Acked-by: Jiri Kosina Acked-by: Ingo Molnar Acked-by: Len Brown Signed-off-by: Andy Whitcroft Signed-off-by: Vitaliy Ivanov Signed-off-by: Ben Hutchings Signed-off-by: Linus Torvalds --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2e4928d45a2..e1ba8cb24e4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -337,7 +337,7 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); -#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) +#ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(pm_idle); #endif @@ -399,7 +399,7 @@ void default_idle(void) cpu_relax(); } } -#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) +#ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(default_idle); #endif -- cgit v1.2.3 From 8fe7e94eb71430cf63a742f3c19739d82a662758 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 1 Jun 2011 15:31:44 +0200 Subject: oprofile, x86: Fix race in nmi handler while starting counters In some rare cases, nmis are generated immediately after the nmi handler of the cpu was started. This causes the counter not to be enabled. Before enabling the nmi handlers we need to set variable ctr_running first and make sure its value is written to memory. Also, the patch makes all existing barriers a memory barrier instead of a compiler barrier only. Reported-by: Suravee Suthikulpanit Cc: # .35+ Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cf9750004a0..68894fdc034 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -112,8 +112,10 @@ static void nmi_cpu_start(void *dummy) static int nmi_start(void) { get_online_cpus(); - on_each_cpu(nmi_cpu_start, NULL, 1); ctr_running = 1; + /* make ctr_running visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_start, NULL, 1); put_online_cpus(); return 0; } @@ -504,15 +506,18 @@ static int nmi_setup(void) nmi_enabled = 0; ctr_running = 0; - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); err = register_die_notifier(&profile_exceptions_nb); if (err) goto fail; get_online_cpus(); register_cpu_notifier(&oprofile_cpu_nb); - on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; + /* make nmi_enabled visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_setup, NULL, 1); put_online_cpus(); return 0; @@ -531,7 +536,8 @@ static void nmi_shutdown(void) nmi_enabled = 0; ctr_running = 0; put_online_cpus(); - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); unregister_die_notifier(&profile_exceptions_nb); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); -- cgit v1.2.3 From 900cba8881b39dfbc7c8062098504ab93f5387a8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2009 10:31:31 +0100 Subject: xen: support CONFIG_MAXSMP The MAXSMP config option requires CPUMASK_OFFSTACK, which in turn requires we init the memory for the maps while we bring up the cpus. MAXSMP also increases NR_CPUS to 4096. This increase in size exposed an issue in the argument construction for multicalls from xen_flush_tlb_others. The args should only need space for the actual number of cpus. Also in 2.6.39 it exposes a bootup problem. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] set_cpu_sibling_map+0x123/0x30d ... Call Trace: [] ? xen_restore_fl_direct_reloc+0x4/0x4 [] xen_smp_prepare_cpus+0x36/0x135 .. CC: stable@kernel.org Signed-off-by: Andrew Jones [v2: Updated to compile on 3.0] [v3: Updated to compile when CONFIG_SMP is not defined] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 3 ++- arch/x86/xen/smp.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index afe1d54f980..673e968df3c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -1231,7 +1232,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, { struct { struct mmuext_op op; - DECLARE_BITMAP(mask, NR_CPUS); + DECLARE_BITMAP(mask, num_processors); } *args; struct multicall_space mcs; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 41038c01de4..b4533a86d7e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -205,11 +205,18 @@ static void __init xen_smp_prepare_boot_cpu(void) static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; + unsigned int i; xen_init_lock_cpu(0); smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; + + for_each_possible_cpu(i) { + zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); + } set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) -- cgit v1.2.3 From b2abe50688dcb470e2e46109da7e7e02245ed59b Mon Sep 17 00:00:00 2001 From: Tom Goetz Date: Mon, 16 May 2011 15:06:26 -0400 Subject: xen: When calling power_off, don't call the halt function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit .. As it won't actually power off the machine. Reported-by: Sven Köhler Tested-by: Sven Köhler Signed-off-by: Tom Goetz Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dd7b88f2ec7..5525163a039 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1033,6 +1033,13 @@ static void xen_machine_halt(void) xen_reboot(SHUTDOWN_poweroff); } +static void xen_machine_power_off(void) +{ + if (pm_power_off) + pm_power_off(); + xen_reboot(SHUTDOWN_poweroff); +} + static void xen_crash_shutdown(struct pt_regs *regs) { xen_reboot(SHUTDOWN_crash); @@ -1058,7 +1065,7 @@ int xen_panic_handler_init(void) static const struct machine_ops xen_machine_ops __initconst = { .restart = xen_restart, .halt = xen_machine_halt, - .power_off = xen_machine_halt, + .power_off = xen_machine_power_off, .shutdown = xen_machine_halt, .crash_shutdown = xen_crash_shutdown, .emergency_restart = xen_emergency_restart, -- cgit v1.2.3 From acd049c6e99d2ad1195666195230f6881d1c1588 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 16 Jun 2011 13:07:19 -0400 Subject: xen/setup: Fix for incorrect xen_extra_mem_start. The earlier attempts (24bdb0b62cc82120924762ae6bc85afc8c3f2b26) at fixing this problem caused other problems to surface (PV guests with no PCI passthrough would have SWIOTLB turned on - which meant 64MB of precious contingous DMA32 memory being eaten up per guest). The problem was: "on xen we add an extra memory region at the end of the e820, and on this particular machine this extra memory region would start below 4g and cross over the 4g boundary: [0xfee01000-0x192655000) Unfortunately e820_end_of_low_ram_pfn does not expect an e820 layout like that so it returns 4g, therefore initial_memory_mapping will map [0 - 0x100000000), that is a memory range that includes some reserved memory regions." The memory range was the IOAPIC regions, and with the 1-1 mapping turned on, it would map them as RAM, not as MMIO regions. This caused the hypervisor to complain. Fortunately this is experienced only under the initial domain so we guard for it. Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index be1a464f6d6..60aeeb56948 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -227,11 +227,7 @@ char * __init xen_memory_setup(void) memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; -#ifdef CONFIG_X86_32 xen_extra_mem_start = mem_end; -#else - xen_extra_mem_start = max((1ULL << 32), mem_end); -#endif for (i = 0; i < memmap.nr_entries; i++) { unsigned long long end; @@ -266,6 +262,12 @@ char * __init xen_memory_setup(void) if (map[i].size > 0) e820_add_region(map[i].addr, map[i].size, map[i].type); } + /* Align the balloon area so that max_low_pfn does not get set + * to be at the _end_ of the PCI gap at the far end (fee01000). + * Note that xen_extra_mem_start gets set in the loop above to be + * past the last E820 region. */ + if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) + xen_extra_mem_start = (1ULL<<32); /* * In domU, the ISA region is normal, usable memory, but we -- cgit v1.2.3 From 7d68dc3f1003a38948c55c803c32d1989dd49198 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 14 Jun 2011 19:53:09 +0200 Subject: x86, efi: Do not reserve boot services regions within reserved areas Commit 916f676f8dc started reserving boot service code since some systems require you to keep that code around until SetVirtualAddressMap is called. However, in some cases those areas will overlap with reserved regions. The proper medium-term fix is to fix the bootloader to prevent the conflicts from occurring by moving the kernel to a better position, but the kernel should check for this possibility, and only reserve regions which can be reserved. Signed-off-by: Maarten Lankhorst Link: http://lkml.kernel.org/r/4DF7A005.1050407@gmail.com Acked-by: Matthew Garrett Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/memblock.h | 2 +- arch/x86/mm/memblock.c | 4 ++-- arch/x86/platform/efi/efi.c | 29 +++++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 19ae14ba697..0cd3800f33b 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -4,7 +4,6 @@ #define ARCH_DISCARD_MEMBLOCK u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); -void memblock_x86_to_bootmem(u64 start, u64 end); void memblock_x86_reserve_range(u64 start, u64 end, char *name); void memblock_x86_free_range(u64 start, u64 end); @@ -19,5 +18,6 @@ u64 memblock_x86_hole_size(u64 start, u64 end); u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); u64 memblock_x86_memory_in_range(u64 addr, u64 limit); +bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index aa1169392b8..992da5ec5a6 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -8,7 +8,7 @@ #include /* Check for already reserved areas */ -static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align) +bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align) { struct memblock_region *r; u64 addr = *addrp, last; @@ -59,7 +59,7 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) if (addr >= ei_last) continue; *sizep = ei_last - addr; - while (check_with_memblock_reserved_size(&addr, sizep, align)) + while (memblock_x86_check_reserved_size(&addr, sizep, align)) ; if (*sizep) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0d3a4fa3456..474356b98ed 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -310,14 +310,31 @@ void __init efi_reserve_boot_services(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - - memblock_x86_reserve_range(start, start + size, "EFI Boot"); + /* Only reserve where possible: + * - Not within any already allocated areas + * - Not over any memory area (really needed, if above?) + * - Not within any part of the kernel + * - Not the bios reserved area + */ + if ((start+size >= virt_to_phys(_text) + && start <= virt_to_phys(_end)) || + !e820_all_mapped(start, start+size, E820_RAM) || + memblock_x86_check_reserved_size(&start, &size, + 1<num_pages = 0; + memblock_dbg(PFX "Could not reserve boot range " + "[0x%010llx-0x%010llx]\n", + start, start+size-1); + } else + memblock_x86_reserve_range(start, start+size, + "EFI Boot"); } } @@ -334,6 +351,10 @@ static void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + /* Could not reserve boot area */ + if (!size) + continue; + free_bootmem_late(start, size); } } -- cgit v1.2.3 From b72336355bb4c92d4a2be3f975dbea47089c83c1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2011 22:11:17 +0200 Subject: KVM: MMU: Fix build warnings in walk_addr_generic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 3.0-rc1 I get In file included from arch/x86/kvm/mmu.c:2856: arch/x86/kvm/paging_tmpl.h: In function ‘paging32_walk_addr_generic’: arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function In file included from arch/x86/kvm/mmu.c:2852: arch/x86/kvm/paging_tmpl.h: In function ‘paging64_walk_addr_generic’: arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function caused by 6e2ca7d1802bf8ed9908435e34daa116662e7790. According to Takuya Yoshikawa, ptep_user won't be used uninitialized so shut up gcc. Cc: Takuya Yoshikawa Link: http://lkml.kernel.org/r/20110530094604.GC21833@liondog.tnic Signed-off-by: Borislav Petkov Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c4dc010c4c..9d03ad4dd5e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -121,7 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gva_t addr, u32 access) { pt_element_t pte; - pt_element_t __user *ptep_user; + pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; -- cgit v1.2.3 From 5233dd51ece1615d54ab96c4cbe9ac3cc595e955 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jun 2011 14:27:47 -0300 Subject: KVM: VMX: do not overwrite uptodate vcpu->arch.cr3 on KVM_SET_SREGS Only decache guest CR3 value if vcpu->arch.cr3 is stale. Fixes loadvm with live guest. Signed-off-by: Marcelo Tosatti Tested-by: Markus Schade Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4c3fa0f6746..d48ec60ea42 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2047,7 +2047,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { - vmx_decache_cr3(vcpu); + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + vmx_decache_cr3(vcpu); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, -- cgit v1.2.3 From a0a8eaba1661232f094654422bdabe2df4e26863 Mon Sep 17 00:00:00 2001 From: Steve Date: Fri, 17 Jun 2011 10:25:39 +0800 Subject: KVM: MMU: fix opposite condition in mapping_level_dirty_bitmap The condition is opposite, it always maps huge page for the dirty tracked page Reported-by: Steve Signed-off-by: Steve Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bd14bb4c859..aee38623b76 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -565,7 +565,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) { - return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); + return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); } static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) -- cgit v1.2.3 From de2d1a524e94a79078d9fe22c57c0c6009237547 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 15 Jun 2011 20:50:04 -0700 Subject: KVM: Fix register corruption in pvclock_scale_delta The 128-bit multiply in pvclock.h was missing an output constraint for EDX which caused a register corruption to appear. Thanks to Ulrich for diagnosing the EDX corruption and Avi for providing this fix. Signed-off-by: Zachary Amsden Signed-off-by: Avi Kivity --- arch/x86/include/asm/pvclock.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 31d84acc151..a518c0a4504 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -22,6 +22,8 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) u64 product; #ifdef __i386__ u32 tmp1, tmp2; +#else + ulong tmp; #endif if (shift < 0) @@ -42,8 +44,11 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( - "mul %%rdx ; shrd $32,%%rdx,%%rax" - : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); + "mul %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a"(product), + [hi]"=d"(tmp) + : "0"(delta), + [mul_frac]"rm"((u64)mul_frac)); #else #error implement me! #endif -- cgit v1.2.3 From c6830c22603aaecf65405af23f6da2d55892f9cb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 16 Jun 2011 17:28:07 +0900 Subject: Fix node_start/end_pfn() definition for mm/page_cgroup.c commit 21a3c96 uses node_start/end_pfn(nid) for detection start/end of nodes. But, it's not defined in linux/mmzone.h but defined in /arch/???/include/mmzone.h which is included only under CONFIG_NEED_MULTIPLE_NODES=y. Then, we see mm/page_cgroup.c: In function 'page_cgroup_init': mm/page_cgroup.c:308: error: implicit declaration of function 'node_start_pfn' mm/page_cgroup.c:309: error: implicit declaration of function 'node_end_pfn' So, fixiing page_cgroup.c is an idea... But node_start_pfn()/node_end_pfn() is a very generic macro and should be implemented in the same manner for all archs. (m32r has different implementation...) This patch removes definitions of node_start/end_pfn() in each archs and defines a unified one in linux/mmzone.h. It's not under CONFIG_NEED_MULTIPLE_NODES, now. A result of macro expansion is here (mm/page_cgroup.c) for !NUMA start_pfn = ((&contig_page_data)->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (&contig_page_data); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); for NUMA (x86-64) start_pfn = ((node_data[nid])->node_start_pfn); end_pfn = ({ pg_data_t *__pgdat = (node_data[nid]); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;}); Changelog: - fixed to avoid using "nid" twice in node_end_pfn() macro. Reported-and-acked-by: Randy Dunlap Reported-and-tested-by: Ingo Molnar Acked-by: Mel Gorman Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Linus Torvalds --- arch/x86/include/asm/mmzone_32.h | 11 ----------- arch/x86/include/asm/mmzone_64.h | 3 --- 2 files changed, 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 5e83a416eca..224e8c5eb30 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -48,17 +48,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -/* - * Following are macros that each numa implmentation must define. - */ - -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ -}) - static inline int pfn_valid(int pfn) { int nid = pfn_to_nid(pfn); diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index b3f88d7867c..129d9aa3ceb 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -13,8 +13,5 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_spanned_pages) #endif #endif /* _ASM_X86_MMZONE_64_H */ -- cgit v1.2.3 From e376fd664b1547e29e264e3cfb97553a1be9054b Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 26 May 2011 11:12:53 +0200 Subject: watchdog: Intel SCU Watchdog: Fix build and remove duplicate code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to build the Intel SCU Watchdog fails for me with gcc 4.6.0 - $ gcc --version | head -n 1 gcc (GCC) 4.6.0 20110513 (prerelease) like this : CC drivers/watchdog/intel_scu_watchdog.o In file included from drivers/watchdog/intel_scu_watchdog.c:49:0: /home/jj/src/linux-2.6/arch/x86/include/asm/apb_timer.h: In function ‘apbt_time_init’: /home/jj/src/linux-2.6/arch/x86/include/asm/apb_timer.h:65:42: warning: ‘return’ with a value, in function returning void [enabled by default] drivers/watchdog/intel_scu_watchdog.c: In function ‘intel_scu_watchdog_init’: drivers/watchdog/intel_scu_watchdog.c:468:2: error: implicit declaration of function ‘sfi_get_mtmr’ [-Werror=implicit-function-declaration] drivers/watchdog/intel_scu_watchdog.c:468:32: warning: assignment makes pointer from integer without a cast [enabled by default] cc1: some warnings being treated as errors make[1]: *** [drivers/watchdog/intel_scu_watchdog.o] Error 1 make: *** [drivers/watchdog/intel_scu_watchdog.o] Error 2 Additionally, linux/types.h is needlessly being included twice in drivers/watchdog/intel_scu_watchdog.c Signed-off-by: Jesper Juhl Signed-off-by: Wim Van Sebroeck --- arch/x86/include/asm/apb_timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index 2fefa501d3b..af60d8a2e28 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h @@ -62,7 +62,7 @@ extern int sfi_mtimer_num; #else /* CONFIG_APB_TIMER */ static inline unsigned long apbt_quick_calibrate(void) {return 0; } -static inline void apbt_time_init(void) {return 0; } +static inline void apbt_time_init(void) { } #endif #endif /* ASM_X86_APBT_H */ -- cgit v1.2.3 From cb16c348760ad2bc79b67b20aefac05529569ed7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 19 Jun 2011 19:21:11 +0300 Subject: KVM: x86 emulator: fix %rip-relative addressing with immediate source operand %rip-relative addressing is relative to the first byte of the next instruction, so we need to add %rip only after we've fetched any immediate bytes. Based on original patch by Li Xin . Signed-off-by: Avi Kivity Acked-by: Li Xin Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6df88c7885c..adc98675cda 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3372,7 +3372,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) int def_op_bytes, def_ad_bytes, goffset, simd_prefix; bool op_prefix = false; struct opcode opcode; - struct operand memop = { .type = OP_NONE }; + struct operand memop = { .type = OP_NONE }, *memopp = NULL; c->eip = ctxt->eip; c->fetch.start = c->eip; @@ -3547,9 +3547,6 @@ done_prefixes: if (memop.type == OP_MEM && c->ad_bytes != 8) memop.addr.mem.ea = (u32)memop.addr.mem.ea; - if (memop.type == OP_MEM && c->rip_relative) - memop.addr.mem.ea += c->eip; - /* * Decode and fetch the source operand: register, memory * or immediate. @@ -3571,6 +3568,7 @@ done_prefixes: c->op_bytes; srcmem_common: c->src = memop; + memopp = &c->src; break; case SrcImmU16: rc = decode_imm(ctxt, &c->src, 2, false); @@ -3667,6 +3665,7 @@ done_prefixes: case DstMem: case DstMem64: c->dst = memop; + memopp = &c->dst; if ((c->d & DstMask) == DstMem64) c->dst.bytes = 8; else @@ -3700,10 +3699,13 @@ done_prefixes: /* Special instructions do their own operand decoding. */ default: c->dst.type = OP_NONE; /* Disable writeback. */ - return 0; + break; } done: + if (memopp && memopp->type == OP_MEM && c->rip_relative) + memopp->addr.mem.ea += c->eip; + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } -- cgit v1.2.3 From 32dd11942aeb47f91209a446d6b10063c5b69389 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 30 Jun 2011 09:12:40 -0400 Subject: xen/mmu: Fix for linker errors when CONFIG_SMP is not defined. Simple enough - we use an extern defined symbol which is not defined when CONFIG_SMP is not defined. This fixes the linker dying. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 673e968df3c..0ccccb67a99 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1232,7 +1232,11 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, { struct { struct mmuext_op op; +#ifdef CONFIG_SMP DECLARE_BITMAP(mask, num_processors); +#else + DECLARE_BITMAP(mask, NR_CPUS); +#endif } *args; struct multicall_space mcs; -- cgit v1.2.3 From 155a16f21923bc2f04161ac92acca986371ef27b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 30 Jun 2011 09:18:27 -0400 Subject: xen/pci: Use the INT_SRC_OVR IRQ (instead of GSI) to preset the ACPI SCI IRQ. In the past we would use the GSI value to preset the ACPI SCI IRQ which worked great as GSI == IRQ: ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) While that is most often seen, there are some oddities: ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) which means that GSI 20 (or pin 20) is to be overriden for IRQ 9. Our code that presets the interrupt for ACPI SCI however would use the GSI 20 instead of IRQ 9 ending up with: xen: sci override: global_irq=20 trigger=0 polarity=1 xen: registering gsi 20 triggering 0 polarity 1 xen: --> pirq=20 -> irq=20 xen: acpi sci 20 .. snip.. calling acpi_init+0x0/0xbc @ 1 ACPI: SCI (IRQ9) allocation failed ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20110413/evevent-119) ACPI: Unable to start the ACPI Interpreter as the ACPI interpreter made a call to 'acpi_gsi_to_irq' which got nine. It used that value to request an IRQ (request_irq) and since that was not present it failed. The fix is to recognize that for interrupts that are overriden (in our case we only care about the ACPI SCI) we should use the IRQ number to present the IRQ instead of the using GSI. End result is that we get: xen: sci override: global_irq=20 trigger=0 polarity=1 xen: registering gsi 20 triggering 0 polarity 1 xen: --> pirq=20 -> irq=9 (gsi=9) xen: acpi sci 9 which fixes the ACPI interpreter failing on startup. CC: stable@kernel.org Reported-by: Liwei Tested-by: Liwei [http://lists.xensource.com/archives/html/xen-devel/2011-06/msg01727.html] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 8214724ce54..fe008309ffe 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -333,6 +333,7 @@ static int xen_register_pirq(u32 gsi, int triggering) struct physdev_map_pirq map_irq; int shareable = 0; char *name; + bool gsi_override = false; if (!xen_pv_domain()) return -1; @@ -349,11 +350,32 @@ static int xen_register_pirq(u32 gsi, int triggering) if (pirq < 0) goto out; - irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); + /* Before we bind the GSI to a Linux IRQ, check whether + * we need to override it with bus_irq (IRQ) value. Usually for + * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) + * but there are oddballs where the IRQ != GSI: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) + * which ends up being: gsi_to_irq[9] == 20 + * (which is what acpi_gsi_to_irq ends up calling when starting the + * the ACPI interpreter and keels over since IRQ 9 has not been + * setup as we had setup IRQ 20 for it). + */ + if (gsi == acpi_sci_override_gsi) { + /* Check whether the GSI != IRQ */ + acpi_gsi_to_irq(gsi, &irq); + if (irq != gsi) + /* Bugger, we MUST have that IRQ. */ + gsi_override = true; + } + if (gsi_override) + irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name); + else + irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); if (irq < 0) goto out; - printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d\n", pirq, irq); + printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi); map_irq.domid = DOMID_SELF; map_irq.type = MAP_PIRQ_TYPE_GSI; -- cgit v1.2.3 From a26474e8649643e82d71e3a386d5c4bcc0b207ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Jun 2011 11:41:07 +0200 Subject: x86-32, NUMA: Fix boot regression caused by NUMA init unification on highmem machines During 32/64 NUMA init unification, commit 797390d855 ("x86-32, NUMA: use sparse_memory_present_with_active_regions()") made 32bit mm init call memory_present() automatically from active_regions instead of leaving it to each NUMA init path. This commit description is inaccurate - memory_present() calls aren't the same for flat and numaq. After the commit, memory_present() is only called for the intersection of e820 and NUMA layout. Before, on flatmem, memory_present() would be called from 0 to max_pfn. After, it would be called only on the areas that e820 indicates to be populated. This is how x86_64 works and should be okay as memmap is allowed to contain holes; however, x86_32 DISCONTIGMEM is missing early_pfn_valid(), which makes memmap_init_zone() assume that memmap doesn't contain any hole. This leads to the following oops if e820 map contains holes as it often does on machine with near or more 4GiB of memory by calling pfn_to_page() on a pfn which isn't mapped to a NUMA node, a reported by Conny Seidel: BUG: unable to handle kernel paging request at 000012b0 IP: [] memmap_init_zone+0x6c/0xf2 *pdpt =3D 0000000000000000 *pde =3D f000eef3f000ee00 Oops: 0000 [#1] SMP last sysfs file: Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.39-rc5-00164-g797390d #1 To Be Filled By O.E.M. To Be Filled By O.E.M./E350M1 EIP: 0060:[] EFLAGS: 00010012 CPU: 0 EIP is at memmap_init_zone+0x6c/0xf2 EAX: 00000000 EBX: 000a8000 ECX: 000a7fff EDX: f2c00b80 ESI: 000a8000 EDI: f2c00800 EBP: c19ffe54 ESP: c19ffe34 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 Process swapper (pid: 0, ti=3Dc19fe000 task=3Dc1a07f60 task.ti=3Dc19fe000) Stack: 00000002 00000000 0023f000 00000000 10000000 00000a00 f2c00000 f2c00b58 c19ffeb0 c1a80f24 000375fe 00000000 f2c00800 00000800 00000100 00000030 c1abb768 0000003c 00000000 00000000 00000004 00207a02 f2c00800 000375fe Call Trace: [] free_area_init_node+0x358/0x385 [] free_area_init_nodes+0x420/0x487 [] paging_init+0x114/0x11b [] setup_arch+0xb37/0xc0a [] start_kernel+0x76/0x316 [] i386_start_kernel+0xa8/0xb0 This patch fixes the bug by defining early_pfn_valid() to be the same as pfn_valid() when DISCONTIGMEM. Reported-bisected-and-tested-by: Conny Seidel Signed-off-by: Tejun Heo Cc: hans.rosenfeld@amd.com Cc: Christoph Lameter Cc: Conny Seidel Link: http://lkml.kernel.org/r/20110628094107.GB3386@htj.dyndns.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmzone_32.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 224e8c5eb30..ffa037f28d3 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -57,6 +57,8 @@ static inline int pfn_valid(int pfn) return 0; } +#define early_pfn_valid(pfn) pfn_valid((pfn)) + #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_NEED_MULTIPLE_NODES -- cgit v1.2.3 From b49c78d4827be8d7e67e5b94adac6b30a4a9ad14 Mon Sep 17 00:00:00 2001 From: Peter Chubb Date: Wed, 6 Jul 2011 10:56:30 +1000 Subject: x86, reboot: Acer Aspire One A110 reboot quirk Since git commit 660e34cebf0a11d54f2d5dd8838607452355f321 x86: reorder reboot method preferences, my Acer Aspire One hangs on reboot. It appears that its ACPI method for rebooting is broken. The attached patch adds a quirk so that the machine will reboot via the BIOS. [ hpa: verified that the ACPI control on this machine is just plain broken. ] Signed-off-by: Peter Chubb Link: http://lkml.kernel.org/r/w439iki5vl.wl%25peter@chubb.wattle.id.au Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0c016f72769..4f0d46fefa7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -294,6 +294,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), }, }, + { /* Handle reboot issue on Acer Aspire one */ + .callback = set_bios_reboot, + .ident = "Acer Aspire One A110", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), + }, + }, { } }; -- cgit v1.2.3 From 7a3136666bc0f0419f7aaa7b1fabb4b0e0a7fb76 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 Jul 2011 18:10:34 -0700 Subject: x86, suspend: Restore MISC_ENABLE MSR in realmode wakeup Some BIOSes will reset the Intel MISC_ENABLE MSR (specifically the XD_DISABLE bit) when resuming from S3, which can interact poorly with ebba638ae723d8a8fc2f7abce5ec18b688b791d7. In 32bit PAE mode, this can lead to a fault when EFER is restored by the kernel wakeup routines, due to it setting the NX bit for a CPU that (thanks to the BIOS reset) now incorrectly thinks it lacks the NX feature. (64bit is not affected because it uses a common CPU bring-up that specifically handles the XD_DISABLE bit.) The need for MISC_ENABLE being restored so early is specific to the S3 resume path. Normally, MISC_ENABLE is saved in save_processor_state(), but this happens after the resume header is created, so just reproduce the logic here. (acpi_suspend_lowlevel() creates the header, calls do_suspend_lowlevel, which calls save_processor_state(), so the saved processor context isn't available during resume header creation.) [ hpa: Consider for stable if OK in mainline ] Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20110707011034.GA8523@outflux.net Signed-off-by: H. Peter Anvin Cc: Rafael J. Wysocki Cc: 2.6.38+ --- arch/x86/kernel/acpi/realmode/wakeup.S | 14 ++++++++++++++ arch/x86/kernel/acpi/realmode/wakeup.h | 6 ++++++ arch/x86/kernel/acpi/sleep.c | 6 ++++++ 3 files changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index ead21b66311..b4fd836e405 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -28,6 +28,8 @@ pmode_cr3: .long 0 /* Saved %cr3 */ pmode_cr4: .long 0 /* Saved %cr4 */ pmode_efer: .quad 0 /* Saved EFER */ pmode_gdt: .quad 0 +pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ +pmode_behavior: .long 0 /* Wakeup behavior flags */ realmode_flags: .long 0 real_magic: .long 0 trampoline_segment: .word 0 @@ -91,6 +93,18 @@ wakeup_code: /* Call the C code */ calll main + /* Restore MISC_ENABLE before entering protected mode, in case + BIOS decided to clear XD_DISABLE during S3. */ + movl pmode_behavior, %eax + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + jnc 1f + + movl pmode_misc_en, %eax + movl pmode_misc_en + 4, %edx + movl $MSR_IA32_MISC_ENABLE, %ecx + wrmsr +1: + /* Do any other stuff... */ #ifndef CONFIG_64BIT diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index e1828c07e79..97a29e1430e 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -21,6 +21,9 @@ struct wakeup_header { u32 pmode_efer_low; /* Protected mode EFER */ u32 pmode_efer_high; u64 pmode_gdt; + u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ + u32 pmode_misc_en_high; + u32 pmode_behavior; /* Wakeup routine behavior flags */ u32 realmode_flags; u32 real_magic; u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ @@ -39,4 +42,7 @@ extern struct wakeup_header wakeup_header; #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 #define WAKEUP_END_SIGNATURE 0x65a22c82 +/* Wakeup behavior bits */ +#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 + #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 18a857ba7a2..103b6ab368d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -77,6 +77,12 @@ int acpi_suspend_lowlevel(void) header->pmode_cr0 = read_cr0(); header->pmode_cr4 = read_cr4_safe(); + header->pmode_behavior = 0; + if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, + &header->pmode_misc_en_low, + &header->pmode_misc_en_high)) + header->pmode_behavior |= + (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; -- cgit v1.2.3 From f70e957cda22d309c769805cbb932407a5232219 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 6 Jul 2011 16:52:37 -0400 Subject: x86: Don't use the EFI reboot method by default Testing suggests that at least some Lenovos and some Intels will fail to reboot via EFI, attempting to jump to an unmapped physical address. In the long run we could handle this by providing a page table with a 1:1 mapping of physical addresses, but for now it's probably just easier to assume that ACPI or legacy methods will be present and reboot via those. Signed-off-by: Matthew Garrett Cc: Linus Torvalds Cc: Andrew Morton Cc: Alan Cox Link: http://lkml.kernel.org/r/1309985557-15350-1-git-send-email-mjg@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 474356b98ed..899e393d8e7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -504,9 +504,6 @@ void __init efi_init(void) x86_platform.set_wallclock = efi_set_rtc_mmss; #endif - /* Setup for EFI runtime service */ - reboot_type = BOOT_EFI; - #if EFI_DEBUG print_efi_memmap(); #endif -- cgit v1.2.3 From ee339fe63ac408e4604c1c88b1f9a428f2511b70 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 6 Jul 2011 09:43:16 -0400 Subject: xen/pci: Move check for acpi_sci_override_gsi to xen_setup_acpi_sci. Previously we would check for acpi_sci_override_gsi == gsi every time a PCI device was enabled. That works during early bootup, but later on it could lead to triggering unnecessarily the acpi_gsi_to_irq(..) lookup. The reason is that acpi_sci_override_gsi was declared in __initdata and after early bootup could contain bogus values. This patch moves the check for acpi_sci_override_gsi to the site where the ACPI SCI is preset. CC: stable@kernel.org Reported-by: Raghavendra D Prabhu Tested-by: Raghavendra D Prabhu [http://lists.xensource.com/archives/html/xen-devel/2011-07/msg00154.html] Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index fe008309ffe..f567965c062 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -327,13 +327,12 @@ int __init pci_xen_hvm_init(void) } #ifdef CONFIG_XEN_DOM0 -static int xen_register_pirq(u32 gsi, int triggering) +static int xen_register_pirq(u32 gsi, int gsi_override, int triggering) { int rc, pirq, irq = -1; struct physdev_map_pirq map_irq; int shareable = 0; char *name; - bool gsi_override = false; if (!xen_pv_domain()) return -1; @@ -345,31 +344,12 @@ static int xen_register_pirq(u32 gsi, int triggering) shareable = 1; name = "ioapic-level"; } - pirq = xen_allocate_pirq_gsi(gsi); if (pirq < 0) goto out; - /* Before we bind the GSI to a Linux IRQ, check whether - * we need to override it with bus_irq (IRQ) value. Usually for - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) - * but there are oddballs where the IRQ != GSI: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) - * which ends up being: gsi_to_irq[9] == 20 - * (which is what acpi_gsi_to_irq ends up calling when starting the - * the ACPI interpreter and keels over since IRQ 9 has not been - * setup as we had setup IRQ 20 for it). - */ - if (gsi == acpi_sci_override_gsi) { - /* Check whether the GSI != IRQ */ - acpi_gsi_to_irq(gsi, &irq); - if (irq != gsi) - /* Bugger, we MUST have that IRQ. */ - gsi_override = true; - } - if (gsi_override) - irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name); + if (gsi_override >= 0) + irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name); else irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); if (irq < 0) @@ -392,7 +372,7 @@ out: return irq; } -static int xen_register_gsi(u32 gsi, int triggering, int polarity) +static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) { int rc, irq; struct physdev_setup_gsi setup_gsi; @@ -403,7 +383,7 @@ static int xen_register_gsi(u32 gsi, int triggering, int polarity) printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", gsi, triggering, polarity); - irq = xen_register_pirq(gsi, triggering); + irq = xen_register_pirq(gsi, gsi_override, triggering); setup_gsi.gsi = gsi; setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); @@ -425,6 +405,8 @@ static __init void xen_setup_acpi_sci(void) int rc; int trigger, polarity; int gsi = acpi_sci_override_gsi; + int irq = -1; + int gsi_override = -1; if (!gsi) return; @@ -441,7 +423,25 @@ static __init void xen_setup_acpi_sci(void) printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " "polarity=%d\n", gsi, trigger, polarity); - gsi = xen_register_gsi(gsi, trigger, polarity); + /* Before we bind the GSI to a Linux IRQ, check whether + * we need to override it with bus_irq (IRQ) value. Usually for + * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) + * but there are oddballs where the IRQ != GSI: + * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) + * which ends up being: gsi_to_irq[9] == 20 + * (which is what acpi_gsi_to_irq ends up calling when starting the + * the ACPI interpreter and keels over since IRQ 9 has not been + * setup as we had setup IRQ 20 for it). + */ + /* Check whether the GSI != IRQ */ + if (acpi_gsi_to_irq(gsi, &irq) == 0) { + if (irq >= 0 && irq != gsi) + /* Bugger, we MUST have that IRQ. */ + gsi_override = irq; + } + + gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); printk(KERN_INFO "xen: acpi sci %d\n", gsi); return; @@ -450,7 +450,7 @@ static __init void xen_setup_acpi_sci(void) static int acpi_register_gsi_xen(struct device *dev, u32 gsi, int trigger, int polarity) { - return xen_register_gsi(gsi, trigger, polarity); + return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); } static int __init pci_xen_initial_domain(void) @@ -489,7 +489,7 @@ void __init xen_setup_pirqs(void) if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) continue; - xen_register_pirq(irq, + xen_register_pirq(irq, -1 /* no GSI override */, trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); } } -- cgit v1.2.3 From 5da0ef9a8554a8d03dc880a53f213289fe7b576d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 Jul 2011 10:34:32 +0200 Subject: x86: Disable AMD_NUMA for 32bit for now Commit 2706a0bf7b ("x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too") enabled AMD NUMA for 32bit too. Unfortunately, SPARSEMEM on 32bit had rather coarse (512MiB) addr->node mapping granularity due to lack of space in page->flags. This led to boot failure on certain AMD NUMA machines which had 128MiB alignment on nodes. Patches to properly detect this condition and reject NUMA configuration are posted[1] but deemed too pervasive for merge at this point (-rc6). Disable AMD NUMA for 32bit for now and re-enable once the detection logic is merged. [1] http://thread.gmane.org/gmane.linux.kernel/1161279/focus=1162583 Reported-by: Hans Rosenfeld Signed-off-by: Tejun Heo Cc: Conny Seidel Link: http://lkml.kernel.org/r/20110711083432.GC943@htj.dyndns.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index da349723d41..37357a599dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1170,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" config AMD_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" - depends on NUMA && PCI + depends on X86_64 && NUMA && PCI ---help--- Enable AMD NUMA node topology detection. You should say Y here if you have a multi processor AMD system. This uses an old method to -- cgit v1.2.3 From a63fdc5156f2ef5690b6cf03d72b0c4917efbba7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 Jun 2011 10:57:50 +1000 Subject: mm: Move definition of MIN_MEMORY_BLOCK_SIZE to a header The macro MIN_MEMORY_BLOCK_SIZE is currently defined twice in two .c files, and I need it in a third one to fix a powerpc bug, so let's first move it into a header Signed-off-by: Benjamin Herrenschmidt Acked-by: Ingo Molnar --- arch/x86/mm/init_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d865c4aeec5..bbaaa005bf0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -895,8 +896,6 @@ const char *arch_vma_name(struct vm_area_struct *vma) } #ifdef CONFIG_X86_UV -#define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS) - unsigned long memory_block_size_bytes(void) { if (is_uv_system()) { -- cgit v1.2.3 From 3628c3f5c818cfc6e588d1ccb31f19aa12345c02 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 28 Jun 2011 15:57:31 +0200 Subject: x86. reboot: Make Dell Latitude E6320 use reboot=pci The Dell Latitude E6320 doesn't reboot unless reboot=pci is set. Force it thanks to DMI. Signed-off-by: Maxime Ripard Link: http://lkml.kernel.org/r/1309269451-4966-1-git-send-email-maxime.ripard@free-electrons.com Cc: Matthew Garrett Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4f0d46fefa7..14eed214b58 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -419,6 +419,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the Latitude E6320. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E6320", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), + }, + }, { } }; -- cgit v1.2.3 From b7798d28ec15d20fd34b70fa57eb13f0cf6d1ecd Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Fri, 13 May 2011 09:04:59 +0800 Subject: x86: Make Dell Latitude E5420 use reboot=pci Rebooting on the Dell E5420 often hangs with the keyboard or ACPI methods, but is reliable via the PCI method. [ hpa: this was deferred because we believed for a long time that the recent reshuffling of the boot priorities in commit 660e34cebf0a11d54f2d5dd8838607452355f321 fixed this platform. Unfortunately that turned out to be incorrect. ] Signed-off-by: Daniel J Blueman Link: http://lkml.kernel.org/r/1305248699-2347-1-git-send-email-daniel.blueman@gmail.com Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 14eed214b58..3dfd38f122d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -427,6 +427,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), }, }, + { /* Handle problems with rebooting on the Latitude E5420. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E5420", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"), + }, + }, { } }; -- cgit v1.2.3 From a536877e77f73ea22d12d94a019fedd9671b6acd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jul 2011 11:22:21 -0700 Subject: x86: Make Dell Latitude E6420 use reboot=pci Yet another variant of the Dell Latitude series which requires reboot=pci. From the E5420 bug report by Daniel J Blueman: > The E6420 is affected also (same platform, different casing and > features), which provides an external confirmation of the issue; I can > submit a patch for that later or include it if you prefer: > http://linux.koolsolutions.com/2009/08/04/howto-fix-linux-hangfreeze-during-reboots-and-restarts/ Reported-by: Daniel J Blueman Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 3dfd38f122d..9242436e993 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -435,6 +435,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"), }, }, + { /* Handle problems with rebooting on the Latitude E6420. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E6420", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), + }, + }, { } }; -- cgit v1.2.3