summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/arm/Kconfig19
-rw-r--r--arch/avr32/Kconfig1
-rw-r--r--arch/blackfin/Kconfig1
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/pci/pci.c25
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c16
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68knommu/Kconfig1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/ppc/Kconfig1
-rw-r--r--arch/s390/defconfig87
-rw-r--r--arch/s390/kernel/binfmt_elf32.c11
-rw-r--r--arch/s390/kernel/compat_wrapper.S20
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/syscalls.S3
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/mm/fault.c40
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/mmap.c65
-rw-r--r--arch/s390/mm/pgtable.c176
-rw-r--r--arch/s390/mm/vmem.c28
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/kernel/errtbls.c144
-rw-r--r--arch/sparc/kernel/of_device.c27
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S4
-rw-r--r--arch/sparc64/Kconfig5
-rw-r--r--arch/sparc64/kernel/iommu.c352
-rw-r--r--arch/sparc64/kernel/iommu_common.h33
-rw-r--r--arch/sparc64/kernel/kprobes.c113
-rw-r--r--arch/sparc64/kernel/of_device.c26
-rw-r--r--arch/sparc64/kernel/pci_sun4v.c278
-rw-r--r--arch/v850/Kconfig1
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Kconfig.debug6
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/.gitignore2
-rw-r--r--arch/x86/boot/printf.c24
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S18
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S151
-rw-r--r--arch/x86/kernel/mfgpt_32.c123
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup_32.c4
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/mm/init_32.c74
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/ioremap.c55
-rw-r--r--arch/x86/mm/pageattr.c140
-rw-r--r--arch/x86/pci/common.c25
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/fixup.c6
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c41
-rw-r--r--arch/x86/pci/mmconfig_32.c20
-rw-r--r--arch/x86/pci/mmconfig_64.c18
-rw-r--r--arch/x86/pci/pci.h22
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/power/Makefile4
-rw-r--r--arch/x86/power/cpu_32.c (renamed from arch/x86/power/cpu.c)2
-rw-r--r--arch/x86/power/cpu_64.c (renamed from arch/x86/kernel/suspend_64.c)160
-rw-r--r--arch/x86/power/hibernate_32.c (renamed from arch/x86/power/suspend.c)6
-rw-r--r--arch/x86/power/hibernate_64.c169
-rw-r--r--arch/x86/power/hibernate_asm_32.S (renamed from arch/x86/power/swsusp.S)3
-rw-r--r--arch/x86/power/hibernate_asm_64.S (renamed from arch/x86/kernel/suspend_asm_64.S)9
-rw-r--r--arch/x86/xen/mmu.c6
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--arch/xtensa/Kconfig1
81 files changed, 1624 insertions, 1027 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5b7dcd5a0e7..002703b8c0b 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -5,6 +5,7 @@
config ALPHA
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
help
The Alpha is a 64-bit general-purpose processor designed and
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4127af93c5f..9619c43783f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -222,6 +222,7 @@ config ARCH_CLPS7500
select TIMER_ACORN
select ISA
select NO_IOPORT
+ select HAVE_IDE
help
Support for the Cirrus Logic PS7500FE system-on-a-chip.
@@ -234,6 +235,7 @@ config ARCH_CO285
bool "Co-EBSA285"
select FOOTBRIDGE
select FOOTBRIDGE_ADDIN
+ select HAVE_IDE
help
Support for Intel's EBSA285 companion chip.
@@ -258,6 +260,7 @@ config ARCH_EP93XX
config ARCH_FOOTBRIDGE
bool "FootBridge"
select FOOTBRIDGE
+ select HAVE_IDE
help
Support for systems based on the DC21285 companion chip
("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
@@ -296,6 +299,7 @@ config ARCH_IOP32X
depends on MMU
select PLAT_IOP
select PCI
+ select HAVE_IDE
help
Support for Intel's 80219 and IOP32X (XScale) family of
processors.
@@ -305,12 +309,14 @@ config ARCH_IOP33X
depends on MMU
select PLAT_IOP
select PCI
+ select HAVE_IDE
help
Support for Intel's IOP33X (XScale) family of processors.
config ARCH_IXP23XX
bool "IXP23XX-based"
depends on MMU
+ select HAVE_IDE
select PCI
help
Support for Intel's IXP23xx (XScale) family of processors.
@@ -328,12 +334,14 @@ config ARCH_IXP4XX
select GENERIC_GPIO
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
+ select HAVE_IDE
help
Support for Intel's IXP4XX (XScale) family of processors.
config ARCH_L7200
bool "LinkUp-L7200"
select FIQ
+ select HAVE_IDE
help
Say Y here if you intend to run this kernel on a LinkUp Systems
L7200 Software Development Board which uses an ARM720T processor.
@@ -388,6 +396,7 @@ config ARCH_PXA
depends on MMU
select ARCH_MTD_XIP
select GENERIC_GPIO
+ select HAVE_IDE
select HAVE_GPIO_LIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
@@ -403,6 +412,7 @@ config ARCH_RPC
select ARCH_MAY_HAVE_PC_FDC
select ISA_DMA_API
select NO_IOPORT
+ select HAVE_IDE
help
On the Acorn Risc-PC, Linux can support the internal IDE disk and
CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -414,12 +424,14 @@ config ARCH_SA1100
select ARCH_MTD_XIP
select GENERIC_GPIO
select GENERIC_TIME
+ select HAVE_IDE
help
Support for StrongARM 11x0 based boards.
config ARCH_S3C2410
bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443"
select GENERIC_GPIO
+ select HAVE_IDE
help
Samsung S3C2410X CPU based systems, such as the Simtec Electronics
BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or
@@ -427,6 +439,7 @@ config ARCH_S3C2410
config ARCH_SHARK
bool "Shark"
+ select HAVE_IDE
select ISA
select ISA_DMA
select PCI
@@ -436,6 +449,7 @@ config ARCH_SHARK
config ARCH_LH7A40X
bool "Sharp LH7A40X"
+ select HAVE_IDE
help
Say Y here for systems based on one of the Sharp LH7A40X
System on a Chip processors. These CPUs include an ARM922T
@@ -1093,12 +1107,7 @@ source "drivers/block/Kconfig"
source "drivers/misc/Kconfig"
-if PCMCIA || ARCH_CLPS7500 || ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX \
- || ARCH_L7200 || ARCH_LH7A40X || ARCH_PXA || ARCH_RPC \
- || ARCH_S3C2410 || ARCH_SA1100 || ARCH_SHARK || FOOTBRIDGE \
- || ARCH_IXP23XX
source "drivers/ide/Kconfig"
-endif
source "drivers/scsi/Kconfig"
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 28e0caf4156..c75d7089f98 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -10,6 +10,7 @@ config AVR32
# With EMBEDDED=n, we get lots of stuff automatically selected
# that we usually don't need on AVR32.
select EMBEDDED
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
help
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 368bc7fe167..589c6aca480 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -24,6 +24,7 @@ config RWSEM_XCHGADD_ALGORITHM
config BLACKFIN
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
config ZONE_DMA
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 8456bc8efb7..9389d38f222 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -54,6 +54,7 @@ config FORCE_MAX_ZONEORDER
config CRIS
bool
default y
+ select HAVE_IDE
config HZ
int
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9e561ede092..a5aac1b0756 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -5,6 +5,7 @@
config FRV
bool
default y
+ select HAVE_IDE
config ZONE_DMA
bool
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index f69e5ea3855..085dc6ec152 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -8,6 +8,7 @@ mainmenu "uClinux/h8300 (w/o MMU) Kernel Configuration"
config H8300
bool
default y
+ select HAVE_IDE
config MMU
bool
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index b0de1132dfc..2d4fcd01bc9 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -15,6 +15,7 @@ config IA64
select ACPI if (!IA64_HP_SIM)
select PM if (!IA64_HP_SIM)
select ARCH_SUPPORTS_MSI
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
default y
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 488e48a5dee..8fd7e825192 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -43,8 +43,7 @@
#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
(((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
-static int
-pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *value)
{
u64 addr, data = 0;
@@ -68,8 +67,7 @@ pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
return 0;
}
-static int
-pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
+int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn,
int reg, int len, u32 value)
{
u64 addr;
@@ -91,24 +89,17 @@ pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
return 0;
}
-static struct pci_raw_ops pci_sal_ops = {
- .read = pci_sal_read,
- .write = pci_sal_write
-};
-
-struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
-
-static int
-pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 *value)
{
- return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+ return raw_pci_read(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
-static int
-pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 value)
{
- return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+ return raw_pci_write(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e1a3e19d3d9..999f14f986e 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -752,13 +752,13 @@ tioce_kern_init(struct tioce_common *tioce_common)
* Determine the secondary bus number of the port2 logical PPB.
* This is used to decide whether a given pci device resides on
* port1 or port2. Note: We don't have enough plumbing set up
- * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
+ * here to use pci_read_config_xxx() so use raw_pci_read().
*/
seg = tioce_common->ce_pcibus.bs_persist_segment;
bus = tioce_common->ce_pcibus.bs_persist_busnum;
- raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
+ raw_pci_read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
tioce_kern->ce_port1_secondary = (u8) tmp;
/*
@@ -799,11 +799,11 @@ tioce_kern_init(struct tioce_common *tioce_common)
/* mem base/limit */
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_MEMORY_BASE, 2, &tmp);
base = (u64)tmp << 16;
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_MEMORY_LIMIT, 2, &tmp);
limit = (u64)tmp << 16;
limit |= 0xfffffUL;
@@ -817,21 +817,21 @@ tioce_kern_init(struct tioce_common *tioce_common)
* attributes.
*/
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_PREF_MEMORY_BASE, 2, &tmp);
base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_PREF_BASE_UPPER32, 4, &tmp);
base |= (u64)tmp << 32;
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_PREF_MEMORY_LIMIT, 2, &tmp);
limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
limit |= 0xfffffUL;
- raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ raw_pci_read(seg, bus, PCI_DEVFN(dev, 0),
PCI_PREF_LIMIT_UPPER32, 4, &tmp);
limit |= (u64)tmp << 32;
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index d4679ab55b9..de153de2ea9 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux/M32R Kernel Configuration"
config M32R
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
config SBUS
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 2b0ed89cd17..65db2261b9e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -5,6 +5,7 @@
config M68K
bool
default y
+ select HAVE_IDE
config MMU
bool
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 548a7b32163..07eb4c4bab8 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -8,6 +8,7 @@ mainmenu "uClinux/68k (w/o MMU) Kernel Configuration"
config M68K
bool
default y
+ select HAVE_IDE
config MMU
bool
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ec78a5762e9..ade230d445d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1,6 +1,7 @@
config MIPS
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
# Horrible source of confusion. Die, die, die ...
select EMBEDDED
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index d929ac84f25..bc7a19da624 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -7,6 +7,7 @@ mainmenu "Linux/PA-RISC Kernel Configuration"
config PARISC
def_bool y
+ select HAVE_IDE
select HAVE_OPROFILE
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 26b963c33c8..485513c9f1a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -87,6 +87,7 @@ config ARCH_NO_VIRT_TO_BUS
config PPC
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index 531156f8919..abc877faf12 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -42,6 +42,7 @@ config GENERIC_CALIBRATE_DELAY
config PPC
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index ece7b99da89..39921f3a968 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,12 +1,13 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.23
-# Mon Oct 22 12:10:44 2007
+# Linux kernel version: 2.6.24
+# Sat Feb 9 12:13:01 2008
#
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
@@ -15,6 +16,7 @@ CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_BUG=y
CONFIG_NO_IOMEM=y
CONFIG_NO_DMA=y
+CONFIG_GENERIC_LOCKBREAK=y
CONFIG_S390=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -32,7 +34,6 @@ CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
-# CONFIG_USER_NS is not set
CONFIG_AUDIT=y
# CONFIG_AUDITSYSCALL is not set
CONFIG_IKCONFIG=y
@@ -41,13 +42,19 @@ CONFIG_LOG_BUF_SHIFT=17
CONFIG_CGROUPS=y
# CONFIG_CGROUP_DEBUG is not set
CONFIG_CGROUP_NS=y
-CONFIG_CGROUP_CPUACCT=y
# CONFIG_CPUSETS is not set
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_FAIR_USER_SCHED=y
# CONFIG_FAIR_CGROUP_SCHED is not set
+# CONFIG_CGROUP_CPUACCT is not set
+# CONFIG_RESOURCE_COUNTERS is not set
CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
+CONFIG_NAMESPACES=y
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -61,17 +68,26 @@ CONFIG_HOTPLUG=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
+# CONFIG_COMPAT_BRK is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_ANON_INODES=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLAB=y
# CONFIG_SLUB is not set
# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
@@ -99,6 +115,8 @@ CONFIG_DEFAULT_DEADLINE=y
# CONFIG_DEFAULT_CFQ is not set
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="deadline"
+CONFIG_CLASSIC_RCU=y
+# CONFIG_PREEMPT_RCU is not set
#
# Base setup
@@ -137,7 +155,7 @@ CONFIG_ARCH_POPULATES_NODE_MAP=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
-CONFIG_PREEMPT_BKL=y
+# CONFIG_RCU_TRACE is not set
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
@@ -151,7 +169,6 @@ CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
-CONFIG_HOLES_IN_ZONE=y
#
# I/O subsystem configuration
@@ -180,6 +197,7 @@ CONFIG_HZ_100=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
CONFIG_NO_IDLE_HZ=y
CONFIG_NO_IDLE_HZ_INIT=y
CONFIG_S390_HYPFS_FS=y
@@ -201,6 +219,7 @@ CONFIG_XFRM=y
# CONFIG_XFRM_USER is not set
# CONFIG_XFRM_SUB_POLICY is not set
# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
CONFIG_NET_KEY=y
# CONFIG_NET_KEY_MIGRATE is not set
CONFIG_IUCV=m
@@ -251,6 +270,7 @@ CONFIG_IPV6_SIT=y
# CONFIG_NETWORK_SECMARK is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_NETFILTER_ADVANCED=y
#
# Core Netfilter Configuration
@@ -258,7 +278,6 @@ CONFIG_NETFILTER=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NF_CONNTRACK_ENABLED=m
CONFIG_NF_CONNTRACK=m
# CONFIG_NF_CT_ACCT is not set
# CONFIG_NF_CONNTRACK_MARK is not set
@@ -286,7 +305,7 @@ CONFIG_NF_CONNTRACK=m
# CONFIG_IP_NF_ARPTABLES is not set
#
-# IPv6: Netfilter Configuration (EXPERIMENTAL)
+# IPv6: Netfilter Configuration
#
# CONFIG_NF_CONNTRACK_IPV6 is not set
# CONFIG_IP6_NF_QUEUE is not set
@@ -343,6 +362,7 @@ CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
# CONFIG_NET_EMATCH is not set
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
@@ -351,7 +371,6 @@ CONFIG_NET_ACT_POLICE=y
CONFIG_NET_ACT_NAT=m
# CONFIG_NET_ACT_PEDIT is not set
# CONFIG_NET_ACT_SIMP is not set
-CONFIG_NET_CLS_POLICE=y
# CONFIG_NET_CLS_IND is not set
CONFIG_NET_SCH_FIFO=y
@@ -360,6 +379,15 @@ CONFIG_NET_SCH_FIFO=y
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
+CONFIG_CAN=m
+CONFIG_CAN_RAW=m
+CONFIG_CAN_BCM=m
+
+#
+# CAN Device Drivers
+#
+CONFIG_CAN_VCAN=m
+# CONFIG_CAN_DEBUG_DEVICES is not set
# CONFIG_AF_RXRPC is not set
# CONFIG_RFKILL is not set
# CONFIG_NET_9P is not set
@@ -389,7 +417,7 @@ CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+CONFIG_BLK_DEV_XIP=y
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
@@ -406,6 +434,7 @@ CONFIG_DASD_DIAG=y
CONFIG_DASD_EER=y
CONFIG_MISC_DEVICES=y
# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
#
# SCSI device support
@@ -487,6 +516,7 @@ CONFIG_NET_ETHERNET=y
# CONFIG_IBM_NEW_EMAC_TAH is not set
# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
CONFIG_NETDEV_1000=y
+# CONFIG_E1000E_ENABLED is not set
CONFIG_NETDEV_10000=y
# CONFIG_TR is not set
# CONFIG_WAN is not set
@@ -508,7 +538,6 @@ CONFIG_QETH=y
CONFIG_CCWGROUP=y
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set
@@ -558,6 +587,7 @@ CONFIG_S390_TAPE_34XX=m
CONFIG_MONWRITER=m
CONFIG_S390_VMUR=m
# CONFIG_POWER_SUPPLY is not set
+# CONFIG_THERMAL is not set
# CONFIG_WATCHDOG is not set
#
@@ -584,12 +614,10 @@ CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
+CONFIG_DNOTIFY=y
CONFIG_INOTIFY=y
CONFIG_INOTIFY_USER=y
# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
# CONFIG_AUTOFS4_FS is not set
# CONFIG_FUSE_FS is not set
@@ -632,8 +660,10 @@ CONFIG_CONFIGFS_FS=m
# CONFIG_EFS_FS is not set
# CONFIG_CRAMFS is not set
# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
@@ -686,16 +716,13 @@ CONFIG_MSDOS_PARTITION=y
# CONFIG_NLS is not set
CONFIG_DLM=m
# CONFIG_DLM_DEBUG is not set
-CONFIG_INSTRUMENTATION=y
-# CONFIG_PROFILING is not set
-CONFIG_KPROBES=y
-# CONFIG_MARKERS is not set
#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
+CONFIG_ENABLE_WARN_DEPRECATED=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
@@ -721,12 +748,18 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
# CONFIG_FRAME_POINTER is not set
CONFIG_FORCED_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_KPROBES_SANITY_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
CONFIG_SAMPLES=y
+# CONFIG_SAMPLE_KOBJECT is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
#
# Security options
@@ -738,6 +771,7 @@ CONFIG_CRYPTO=y
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_AEAD=m
CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_SEQIV=m
CONFIG_CRYPTO_HASH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_HMAC=m
@@ -745,17 +779,20 @@ CONFIG_CRYPTO_HMAC=m
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=m
-# CONFIG_CRYPTO_SHA1 is not set
+CONFIG_CRYPTO_SHA1=m
# CONFIG_CRYPTO_SHA256 is not set
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_GF128MUL is not set
+CONFIG_CRYPTO_GF128MUL=m
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_PCBC=m
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_XTS is not set
+CONFIG_CRYPTO_CTR=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CCM=m
# CONFIG_CRYPTO_CRYPTD is not set
# CONFIG_CRYPTO_DES is not set
CONFIG_CRYPTO_FCRYPT=m
@@ -770,20 +807,22 @@ CONFIG_CRYPTO_FCRYPT=m
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_ANUBIS is not set
CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SALSA20=m
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_CRC32C is not set
CONFIG_CRYPTO_CAMELLIA=m
# CONFIG_CRYPTO_TEST is not set
CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_HW=y
+CONFIG_ZCRYPT=m
+# CONFIG_ZCRYPT_MONOLITHIC is not set
# CONFIG_CRYPTO_SHA1_S390 is not set
# CONFIG_CRYPTO_SHA256_S390 is not set
# CONFIG_CRYPTO_DES_S390 is not set
# CONFIG_CRYPTO_AES_S390 is not set
CONFIG_S390_PRNG=m
-CONFIG_ZCRYPT=m
-# CONFIG_ZCRYPT_MONOLITHIC is not set
#
# Library routines
@@ -794,5 +833,7 @@ CONFIG_BITREVERSE=m
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=m
CONFIG_CRC7=m
-# CONFIG_LIBCRC32C is not set
+CONFIG_LIBCRC32C=m
+CONFIG_LZO_COMPRESS=m
+CONFIG_LZO_DECOMPRESS=m
CONFIG_PLIST=y
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
index f1e40ca00d8..3e1c315b736 100644
--- a/arch/s390/kernel/binfmt_elf32.c
+++ b/arch/s390/kernel/binfmt_elf32.c
@@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
}
#include <asm/processor.h>
+#include <asm/pgalloc.h>
#include <linux/module.h>
#include <linux/elfcore.h>
#include <linux/binfmts.h>
@@ -183,6 +184,16 @@ struct elf_prpsinfo32
#undef start_thread
#define start_thread start_thread31
+static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
+ unsigned long new_stackp)
+{
+ set_fs(USER_DS);
+ regs->psw.mask = psw_user32_bits;
+ regs->psw.addr = new_psw;
+ regs->gprs[15] = new_stackp;
+ crst_table_downgrade(current->mm, 1UL << 31);
+}
+
MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
" Copyright 2000 IBM Corporation");
MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 062c3d4c039..743d54f0b8d 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1712,3 +1712,23 @@ sys_fallocate_wrapper:
sllg %r5,%r6,32 # get high word of 64bit loff_t
l %r5,164(%r15) # get low word of 64bit loff_t
jg sys_fallocate
+
+ .globl sys_timerfd_create_wrapper
+sys_timerfd_create_wrapper:
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ jg sys_timerfd_create
+
+ .globl compat_sys_timerfd_settime_wrapper
+compat_sys_timerfd_settime_wrapper:
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ llgtr %r4,%r4 # struct compat_itimerspec *
+ llgtr %r5,%r5 # struct compat_itimerspec *
+ jg compat_sys_timerfd_settime
+
+ .globl compat_sys_timerfd_gettime_wrapper
+compat_sys_timerfd_gettime_wrapper:
+ lgfr %r2,%r2 # int
+ llgtr %r3,%r3 # struct compat_itimerspec *
+ jg compat_sys_timerfd_gettime
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 0e7aca03930..a6a4729e0e9 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/reboot.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f9f8779022a..290e504061a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -24,7 +24,6 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 25eac7802fc..c87ec687d4c 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -327,3 +327,6 @@ SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */
SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper)
NI_SYSCALL /* 317 old sys_timer_fd */
SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper)
+SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper)
+SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */
+SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a4d29025ddb..60f728aeaf1 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0;
extern pgm_check_handler_t do_protection_exception;
extern pgm_check_handler_t do_dat_exception;
extern pgm_check_handler_t do_monitor_call;
+extern pgm_check_handler_t do_asce_exception;
#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -730,7 +731,7 @@ void __init trap_init(void)
pgm_check_table[0x12] = &translation_exception;
pgm_check_table[0x13] = &special_op_exception;
#ifdef CONFIG_64BIT
- pgm_check_table[0x38] = &do_dat_exception;
+ pgm_check_table[0x38] = &do_asce_exception;
pgm_check_table[0x39] = &do_dat_exception;
pgm_check_table[0x3A] = &do_dat_exception;
pgm_check_table[0x3B] = &do_dat_exception;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52ed06..ed13d429a48 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
do_exception(regs, error_code & 0xff, 0);
}
+#ifdef CONFIG_64BIT
+void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ int space;
+
+ mm = current->mm;
+ address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
+ space = check_space(current);
+
+ if (unlikely(space == 0 || in_atomic() || !mm))
+ goto no_context;
+
+ local_irq_enable();
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ up_read(&mm->mmap_sem);
+
+ if (vma) {
+ update_mm(mm, current);
+ return;
+ }
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (regs->psw.mask & PSW_MASK_PSTATE) {
+ current->thread.prot_addr = address;
+ current->thread.trap_no = error_code;
+ do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ return;
+ }
+
+no_context:
+ do_no_context(regs, error_code, address);
+}
+#endif
+
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 983ec6ec0e7..8053245fe25 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -112,6 +112,7 @@ void __init paging_init(void)
init_mm.pgd = swapper_pg_dir;
S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
#ifdef CONFIG_64BIT
+ /* A three level page table (4TB) is enough for the kernel space. */
S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION3_ENTRY_EMPTY;
#else
@@ -184,7 +185,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
if (!enable) {
- ptep_invalidate(address, pte);
+ ptep_invalidate(&init_mm, address, pte);
continue;
}
*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 356257c171d..5932a824547 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <asm/pgalloc.h>
/*
* Top of mmap area (just below the process stack).
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
}
+#ifndef CONFIG_64BIT
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+#else
+
+static unsigned long
+s390_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+
+static unsigned long
+s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int rc;
+
+ addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+
+#endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 019f518cd5a..fd072013f88 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,11 +23,18 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
+#define TABLES_PER_PAGE 4
+#define FRAG_MASK 15UL
+#define SECOND_HALVES 10UL
#else
#define ALLOC_ORDER 2
+#define TABLES_PER_PAGE 2
+#define FRAG_MASK 3UL
+#define SECOND_HALVES 2UL
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -45,52 +52,179 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
}
page->index = page_to_phys(shadow);
}
+ spin_lock(&mm->page_table_lock);
+ list_add(&page->lru, &mm->context.crst_list);
+ spin_unlock(&mm->page_table_lock);
return (unsigned long *) page_to_phys(page);
}
-void crst_table_free(unsigned long *table)
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
unsigned long *shadow = get_shadow_table(table);
+ struct page *page = virt_to_page(table);
+ spin_lock(&mm->page_table_lock);
+ list_del(&page->lru);
+ spin_unlock(&mm->page_table_lock);
if (shadow)
free_pages((unsigned long) shadow, ALLOC_ORDER);
free_pages((unsigned long) table, ALLOC_ORDER);
}
+#ifdef CONFIG_64BIT
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+ unsigned long *table, *pgd;
+ unsigned long entry;
+
+ BUG_ON(limit > (1UL << 53));
+repeat:
+ table = crst_table_alloc(mm, mm->context.noexec);
+ if (!table)
+ return -ENOMEM;
+ spin_lock(&mm->page_table_lock);
+ if (mm->context.asce_limit < limit) {
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit <= (1UL << 31)) {
+ entry = _REGION3_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ } else {
+ entry = _REGION2_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION2;
+ }
+ crst_table_init(table, entry);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ table = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (table)
+ crst_table_free(mm, table);
+ if (mm->context.asce_limit < limit)
+ goto repeat;
+ update_mm(mm, current);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+ pgd_t *pgd;
+
+ if (mm->context.asce_limit <= limit)
+ return;
+ __tlb_flush_mm(mm);
+ while (mm->context.asce_limit > limit) {
+ pgd = mm->pgd;
+ switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+ case _REGION_ENTRY_TYPE_R2:
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ break;
+ case _REGION_ENTRY_TYPE_R3:
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_SEGMENT;
+ break;
+ default:
+ BUG();
+ }
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ crst_table_free(mm, (unsigned long *) pgd);
+ }
+ update_mm(mm, current);
+}
+#endif
+
/*
* page table entry allocation/free routines.
*/
-unsigned long *page_table_alloc(int noexec)
+unsigned long *page_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_page(GFP_KERNEL);
+ struct page *page;
unsigned long *table;
+ unsigned long bits;
- if (!page)
- return NULL;
- page->index = 0;
- if (noexec) {
- struct page *shadow = alloc_page(GFP_KERNEL);
- if (!shadow) {
- __free_page(page);
+ bits = mm->context.noexec ? 3UL : 1UL;
+ spin_lock(&mm->page_table_lock);
+ page = NULL;
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+ page = NULL;
+ }
+ if (!page) {
+ spin_unlock(&mm->page_table_lock);
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
return NULL;
- }
- table = (unsigned long *) page_to_phys(shadow);
+ pgtable_page_ctor(page);
+ page->flags &= ~FRAG_MASK;
+ table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
- page->index = (addr_t) table;
+ spin_lock(&mm->page_table_lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
}
- pgtable_page_ctor(page);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ while (page->flags & bits) {
+ table += 256;
+ bits <<= 1;
+ }
+ page->flags |= bits;
+ if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+ list_move_tail(&page->lru, &mm->context.pgtable_list);
+ spin_unlock(&mm->page_table_lock);
return table;
}
-void page_table_free(unsigned long *table)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned long *shadow = get_shadow_pte(table);
+ struct page *page;
+ unsigned long bits;
- pgtable_page_dtor(virt_to_page(table));
- if (shadow)
- free_page((unsigned long) shadow);
- free_page((unsigned long) table);
+ bits = mm->context.noexec ? 3UL : 1UL;
+ bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ spin_lock(&mm->page_table_lock);
+ page->flags ^= bits;
+ if (page->flags & FRAG_MASK) {
+ /* Page now has some free pgtable fragments. */
+ list_move(&page->lru, &mm->context.pgtable_list);
+ page = NULL;
+ } else
+ /* All fragments of the 4K page have been freed. */
+ list_del(&page->lru);
+ spin_unlock(&mm->page_table_lock);
+ if (page) {
+ pgtable_page_dtor(page);
+ __free_page(page);
+ }
+}
+void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
+{
+ struct page *page;
+
+ spin_lock(&mm->page_table_lock);
+ /* Free shadow region and segment tables. */
+ list_for_each_entry(page, &mm->context.crst_list, lru)
+ if (page->index) {
+ free_pages((unsigned long) page->index, ALLOC_ORDER);
+ page->index = 0;
+ }
+ /* "Free" second halves of page tables. */
+ list_for_each_entry(page, &mm->context.pgtable_list, lru)
+ page->flags &= ~SECOND_HALVES;
+ spin_unlock(&mm->page_table_lock);
+ mm->context.noexec = 0;
+ update_mm(mm, tsk);
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7c1287ccf78..35d90a4720f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,19 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
}
-#define vmem_pud_alloc() ({ BUG(); ((pud_t *) NULL); })
+static inline pud_t *vmem_pud_alloc(void)
+{
+ pud_t *pud = NULL;
+
+#ifdef CONFIG_64BIT
+ pud = vmem_alloc_pages(2);
+ if (!pud)
+ return NULL;
+ pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+ memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t));
+#endif
+ return pud;
+}
static inline pmd_t *vmem_pmd_alloc(void)
{
@@ -84,13 +96,18 @@ static inline pmd_t *vmem_pmd_alloc(void)
return pmd;
}
-static inline pte_t *vmem_pte_alloc(void)
+static pte_t __init_refok *vmem_pte_alloc(void)
{
- pte_t *pte = vmem_alloc_pages(0);
+ pte_t *pte;
+ if (slab_is_available())
+ pte = (pte_t *) page_table_alloc(&init_mm);
+ else
+ pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -360,6 +377,9 @@ void __init vmem_map_init(void)
{
int i;
+ INIT_LIST_HEAD(&init_mm.context.crst_list);
+ INIT_LIST_HEAD(&init_mm.context.pgtable_list);
+ init_mm.context.noexec = 0;
NODE_DATA(0)->node_mem_map = VMEM_MAP;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6e035d1cf78..d87d4bf8880 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux/SuperH Kernel Configuration"
config SUPERH
def_bool y
select EMBEDDED
+ select HAVE_IDE
select HAVE_OPROFILE
help
The SuperH is a RISC processor targeted for use in embedded systems
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 7c674a3503b..c40343c5492 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -69,6 +69,7 @@ config NR_CPUS
config SPARC
bool
default y
+ select HAVE_IDE
select HAVE_OPROFILE
# Identify this as a Sparc32 build
diff --git a/arch/sparc/kernel/errtbls.c b/arch/sparc/kernel/errtbls.c
index bb36f6eadfe..ed14df7116e 100644
--- a/arch/sparc/kernel/errtbls.c
+++ b/arch/sparc/kernel/errtbls.c
@@ -1,21 +1,18 @@
-/* $Id: errtbls.c,v 1.2 1995/11/25 00:57:55 davem Exp $
- * errtbls.c: Error number conversion tables between various syscall
- * OS semantics.
+/* errtbls.c: Error number conversion tables.
*
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
*
* Based upon preliminary work which is:
*
* Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
*/
-#include <asm/bsderrno.h> /* NetBSD (bsd4.4) errnos */
#include <asm/solerrno.h> /* Solaris errnos */
-/* Here are tables which convert between Linux/SunOS error number
- * values to the equivalent in other OSs. Note that since the Linux
- * ones have been set up to match exactly those of SunOS, no
- * translation table is needed for that OS.
+/* Here is the table which converts between Linux error number values
+ * to the equivalent under Solaris. Note that since the Linux ones
+ * have been set up to match exactly those of SunOS, no translation
+ * table is needed for that OS.
*/
int solaris_errno[] = {
@@ -145,132 +142,3 @@ int solaris_errno[] = {
SOL_ELIBMAX,
SOL_ELIBSCN,
};
-
-int netbsd_errno[] = {
- 0,
- BSD_EPERM,
- BSD_ENOENT,
- BSD_ESRCH,
- BSD_EINTR,
- BSD_EIO,
- BSD_ENXIO,
- BSD_E2BIG,
- BSD_ENOEXEC,
- BSD_EBADF,
- BSD_ECHILD,
- BSD_EAGAIN,
- BSD_ENOMEM,
- BSD_EACCES,
- BSD_EFAULT,
- BSD_NOTBLK,
- BSD_EBUSY,
- BSD_EEXIST,
- BSD_EXDEV,
- BSD_ENODEV,
- BSD_ENOTDIR,
- BSD_EISDIR,
- BSD_EINVAL,
- BSD_ENFILE,
- BSD_EMFILE,
- BSD_ENOTTY,
- BSD_ETXTBSY,
- BSD_EFBIG,
- BSD_ENOSPC,
- BSD_ESPIPE,
- BSD_EROFS,
- BSD_EMLINK,
- BSD_EPIPE,
- BSD_EDOM,
- BSD_ERANGE,
- BSD_EWOULDBLOCK,
- BSD_EINPROGRESS,
- BSD_EALREADY,
- BSD_ENOTSOCK,
- BSD_EDESTADDRREQ,
- BSD_EMSGSIZE,
- BSD_EPROTOTYPE,
- BSD_ENOPROTOOPT,
- BSD_EPROTONOSUPPORT,
- BSD_ESOCKTNOSUPPORT,
- BSD_EOPNOTSUPP,
- BSD_EPFNOSUPPORT,
- BSD_EAFNOSUPPORT,
- BSD_EADDRINUSE,
- BSD_EADDRNOTAVAIL,
- BSD_ENETDOWN,
- BSD_ENETUNREACH,
- BSD_ENETRESET,
- BSD_ECONNABORTED,
- BSD_ECONNRESET,
- BSD_ENOBUFS,
- BSD_EISCONN,
- BSD_ENOTONN,
- BSD_ESHUTDOWN,
- BSD_ETOOMANYREFS,
- BSD_ETIMEDOUT,
- BSD_ECONNREFUSED,
- BSD_ELOOP,
- BSD_ENAMETOOLONG,
- BSD_EHOSTDOWN,
- BSD_EHOSTUNREACH,
- BSD_ENOTEMPTY,
- BSD_EPROCLIM,
- BSD_EUSERS,
- BSD_EDQUOT,
- BSD_ESTALE,
- BSD_EREMOTE,
- BSD_ENOSTR,
- BSD_ETIME,
- BSD_ENOSR,
- BSD_ENOMSG,
- BSD_EBADMSG,
- BSD_IDRM,
- BSD_EDEADLK,
- BSD_ENOLCK,
- BSD_ENONET,
- BSD_ERREMOTE,
- BSD_ENOLINK,
- BSD_EADV,
- BSD_ESRMNT,
- BSD_ECOMM,
- BSD_EPROTO,
- BSD_EMULTIHOP,
- BSD_EINVAL, /* EDOTDOT XXX??? */
- BSD_REMCHG,
- BSD_NOSYS,
- BSD_STRPIPE,
- BSD_EOVERFLOW,
- BSD_EBADFD,
- BSD_ECHRNG,
- BSD_EL2NSYNC,
- BSD_EL3HLT,
- BSD_EL3RST,
- BSD_NRNG,
- BSD_EUNATCH,
- BSD_ENOCSI,
- BSD_EL2HLT,
- BSD_EBADE,
- BSD_EBADR,
- BSD_EXFULL,
- BSD_ENOANO,
- BSD_EBADRQC,
- BSD_EBADSLT,
- BSD_EDEADLOCK,
- BSD_EBFONT,
- BSD_ELIBEXEC,
- BSD_ENODATA,
- BSD_ELIBBAD,
- BSD_ENOPKG,
- BSD_ELIBACC,
- BSD_ENOTUNIQ,
- BSD_ERESTART,
- BSD_EUCLEAN,
- BSD_ENOTNAM,
- BSD_ENAVAIL,
- BSD_EISNAM,
- BSD_EREMOTEIO,
- BSD_EILSEQ,
- BSD_ELIBMAX,
- BSD_ELIBSCN,
-};
-
diff --git a/arch/sparc/kernel/of_device.c b/arch/sparc/kernel/of_device.c
index 3ea000d15e3..cc4c235c4f5 100644
--- a/arch/sparc/kernel/of_device.c
+++ b/arch/sparc/kernel/of_device.c
@@ -584,30 +584,3 @@ static int __init of_debug(char *str)
}
__setup("of_debug=", of_debug);
-
-struct of_device* of_platform_device_create(struct device_node *np,
- const char *bus_id,
- struct device *parent,
- struct bus_type *bus)
-{
- struct of_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return NULL;
-
- dev->dev.parent = parent;
- dev->dev.bus = bus;
- dev->dev.release = of_release_dev;
-
- strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
-
- if (of_device_register(dev) != 0) {
- kfree(dev);
- return NULL;
- }
-
- return dev;
-}
-
-EXPORT_SYMBOL(of_platform_device_create);
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 216147d6e61..b1002c60719 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -89,6 +89,10 @@ SECTIONS
.data.cacheline_aligned : {
*(.data.cacheline_aligned)
}
+ . = ALIGN(32);
+ .data.read_mostly : {
+ *(.data.read_mostly)
+ }
__bss_start = .;
.sbss : {
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index b810f2b7526..3af378ddb6a 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -14,6 +14,7 @@ config SPARC
config SPARC64
bool
default y
+ select HAVE_IDE
help
SPARC is a family of RISC microprocessors designed and marketed by
Sun Microsystems, incorporated. This port covers the newer 64-bit
@@ -40,6 +41,10 @@ config MMU
bool
default y
+config IOMMU_HELPER
+ bool
+ default y
+
config QUICKLIST
bool
default y
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 5623a4d59df..d3276ebcfb4 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -1,6 +1,6 @@
/* iommu.c: Generic sparc64 IOMMU support.
*
- * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
* Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
*/
@@ -10,6 +10,7 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
+#include <linux/iommu-helper.h>
#ifdef CONFIG_PCI
#include <linux/pci.h>
@@ -41,7 +42,7 @@
"i" (ASI_PHYS_BYPASS_EC_E))
/* Must be invoked under the IOMMU lock. */
-static void __iommu_flushall(struct iommu *iommu)
+static void iommu_flushall(struct iommu *iommu)
{
if (iommu->iommu_flushinv) {
iommu_write(iommu->iommu_flushinv, ~(u64)0);
@@ -83,54 +84,91 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
iopte_val(*iopte) = val;
}
-/* Based largely upon the ppc64 iommu allocator. */
-static long arena_alloc(struct iommu *iommu, unsigned long npages)
+/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
+ * facility it must all be done in one pass while under the iommu lock.
+ *
+ * On sun4u platforms, we only flush the IOMMU once every time we've passed
+ * over the entire page table doing allocations. Therefore we only ever advance
+ * the hint and cannot backtrack it.
+ */
+unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle)
{
+ unsigned long n, end, start, limit, boundary_size;
struct iommu_arena *arena = &iommu->arena;
- unsigned long n, i, start, end, limit;
- int pass;
+ int pass = 0;
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = arena->hint;
limit = arena->limit;
- start = arena->hint;
- pass = 0;
-again:
- n = find_next_zero_bit(arena->map, limit, start);
- end = n + npages;
- if (unlikely(end >= limit)) {
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the beginning and flush.
+ */
+ if (start >= limit) {
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ }
+
+ again:
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << IO_PAGE_SHIFT);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
+
+ n = iommu_area_alloc(arena->map, limit, start, npages, 0,
+ boundary_size >> IO_PAGE_SHIFT, 0);
+ if (n == -1) {
if (likely(pass < 1)) {
- limit = start;
+ /* First failure, rescan from the beginning. */
start = 0;
- __iommu_flushall(iommu);
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
pass++;
goto again;
} else {
- /* Scanned the whole thing, give up. */
- return -1;
- }
- }
-
- for (i = n; i < end; i++) {
- if (test_bit(i, arena->map)) {
- start = i + 1;
- goto again;
+ /* Second failure, give up */
+ return DMA_ERROR_CODE;
}
}
- for (i = n; i < end; i++)
- __set_bit(i, arena->map);
+ end = n + npages;
arena->hint = end;
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
return n;
}
-static void arena_free(struct iommu_arena *arena, unsigned long base, unsigned long npages)
+void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
{
- unsigned long i;
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long entry;
- for (i = base; i < (base + npages); i++)
- __clear_bit(i, arena->map);
+ entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+
+ iommu_area_free(arena->map, entry, npages);
}
int iommu_table_init(struct iommu *iommu, int tsbsize,
@@ -156,6 +194,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
}
iommu->arena.limit = num_tsb_entries;
+ if (tlb_type != hypervisor)
+ iommu->flush_all = iommu_flushall;
+
/* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to.
*/
@@ -192,22 +233,18 @@ out_free_map:
return -ENOMEM;
}
-static inline iopte_t *alloc_npages(struct iommu *iommu, unsigned long npages)
+static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
+ unsigned long npages)
{
- long entry;
+ unsigned long entry;
- entry = arena_alloc(iommu, npages);
- if (unlikely(entry < 0))
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
+ if (unlikely(entry == DMA_ERROR_CODE))
return NULL;
return iommu->page_table + entry;
}
-static inline void free_npages(struct iommu *iommu, dma_addr_t base, unsigned long npages)
-{
- arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
-}
-
static int iommu_alloc_ctx(struct iommu *iommu)
{
int lowest = iommu->ctx_lowest_free;
@@ -258,7 +295,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
- iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(iopte == NULL)) {
@@ -296,7 +333,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
spin_lock_irqsave(&iommu->lock, flags);
- free_npages(iommu, dvma - iommu->page_table_map_base, npages);
+ iommu_range_free(iommu, dvma, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -327,7 +364,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
- base = alloc_npages(iommu, npages);
+ base = alloc_npages(dev, iommu, npages);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu);
@@ -465,7 +502,7 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i);
- free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
+ iommu_range_free(iommu, bus_addr, npages);
iommu_free_ctx(iommu, ctx);
@@ -475,124 +512,209 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, ctx, i, npages, iopte_protection;
- struct scatterlist *sg;
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot, ctx;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ int outcount, incount, i;
struct strbuf *strbuf;
struct iommu *iommu;
- iopte_t *base;
- u32 dma_base;
-
- /* Fast path single entry scatterlists. */
- if (nelems == 1) {
- sglist->dma_address =
- dma_4u_map_single(dev, sg_virt(sglist),
- sglist->length, direction);
- if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
- return 0;
- sglist->dma_length = sglist->length;
- return 1;
- }
+
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
strbuf = dev->archdata.stc;
-
- if (unlikely(direction == DMA_NONE))
- goto bad_no_ctx;
-
- npages = calc_npages(sglist, nelems);
+ if (nelems == 0 || !iommu)
+ return 0;
spin_lock_irqsave(&iommu->lock, flags);
- base = alloc_npages(iommu, npages);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
-
- if (base == NULL)
- goto bad;
-
- dma_base = iommu->page_table_map_base +
- ((base - iommu->page_table) << IO_PAGE_SHIFT);
-
if (strbuf->strbuf_enabled)
- iopte_protection = IOPTE_STREAMING(ctx);
+ prot = IOPTE_STREAMING(ctx);
else
- iopte_protection = IOPTE_CONSISTENT(ctx);
+ prot = IOPTE_CONSISTENT(ctx);
if (direction != DMA_TO_DEVICE)
- iopte_protection |= IOPTE_WRITE;
-
- for_each_sg(sglist, sg, nelems, i) {
- unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg);
- unsigned long slen = sg->length;
- unsigned long this_npages;
+ prot |= IOPTE_WRITE;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, slen;
+ iopte_t *base;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
- this_npages = iommu_num_pages(paddr, slen);
+ base = iommu->page_table + entry;
- sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK);
- sg->dma_length = slen;
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+ /* Insert into HW table */
paddr &= IO_PAGE_MASK;
- while (this_npages--) {
- iopte_val(*base) = iopte_protection | paddr;
-
+ while (npages--) {
+ iopte_val(*base) = prot | paddr;
base++;
paddr += IO_PAGE_SIZE;
- dma_base += IO_PAGE_SIZE;
}
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
}
- return nelems;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
+
+iommu_map_failed:
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages, entry, i;
+ iopte_t *base;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length);
+ iommu_range_free(iommu, vaddr, npages);
+
+ entry = (vaddr - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT;
+ base = iommu->page_table + entry;
+
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
-bad:
- iommu_free_ctx(iommu, ctx);
-bad_no_ctx:
- if (printk_ratelimit())
- WARN_ON(1);
return 0;
}
+/* If contexts are being used, they are the same in all of the mappings
+ * we make for a particular SG.
+ */
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
+{
+ unsigned long ctx = 0;
+
+ if (iommu->iommu_ctxflush) {
+ iopte_t *base;
+ u32 bus_addr;
+
+ bus_addr = sg->dma_address & IO_PAGE_MASK;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ }
+ return ctx;
+}
+
static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, ctx, i, npages;
+ unsigned long flags, ctx;
+ struct scatterlist *sg;
struct strbuf *strbuf;
struct iommu *iommu;
- iopte_t *base;
- u32 bus_addr;
- if (unlikely(direction == DMA_NONE)) {
- if (printk_ratelimit())
- WARN_ON(1);
- }
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
strbuf = dev->archdata.stc;
- bus_addr = sglist->dma_address & IO_PAGE_MASK;
+ ctx = fetch_sg_ctx(iommu, sglist);
- npages = calc_npages(sglist, nelems);
+ spin_lock_irqsave(&iommu->lock, flags);
- base = iommu->page_table +
- ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+ iopte_t *base;
+ int i;
- spin_lock_irqsave(&iommu->lock, flags);
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len);
+ iommu_range_free(iommu, dma_handle, npages);
- /* Record the context, if any. */
- ctx = 0;
- if (iommu->iommu_ctxflush)
- ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ entry = ((dma_handle - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT);
+ base = iommu->page_table + entry;
- /* Step 1: Kick data out of streaming buffers if necessary. */
- if (strbuf->strbuf_enabled)
- strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+ dma_handle &= IO_PAGE_MASK;
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, dma_handle, ctx,
+ npages, direction);
- /* Step 2: Clear out the TSB entries. */
- for (i = 0; i < npages; i++)
- iopte_make_dummy(iommu, base + i);
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
- free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
+ sg = sg_next(sg);
+ }
iommu_free_ctx(iommu, ctx);
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index 4b5cafa2877..0713bd58499 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -1,9 +1,11 @@
-/* $Id: iommu_common.h,v 1.5 2001/12/11 09:41:01 davem Exp $
- * iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
+/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
*
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
*/
+#ifndef _IOMMU_COMMON_H
+#define _IOMMU_COMMON_H
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -56,21 +58,12 @@ static inline unsigned long calc_npages(struct scatterlist *sglist, int nelems)
return npages;
}
-/* You are _strongly_ advised to enable the following debugging code
- * any time you make changes to the sg code below, run it for a while
- * with filesystems mounted read-only before buying the farm... -DaveM
- */
-#undef VERIFY_SG
-
-#ifdef VERIFY_SG
-extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages);
-#endif
-
-/* Two addresses are "virtually contiguous" if and only if:
- * 1) They are equal, or...
- * 2) They are both on a page boundary
- */
-#define VCONTIG(__X, __Y) (((__X) == (__Y)) || \
- (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL)
+extern unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle);
+extern void iommu_range_free(struct iommu *iommu,
+ dma_addr_t dma_addr,
+ unsigned long npages);
-extern unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents);
+#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index d94f901d321..34fc3ddd500 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -480,8 +480,117 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-/* architecture specific initialization */
-int arch_init_kprobes(void)
+/* Called with kretprobe_lock held. The value stored in the return
+ * address register is actually 2 instructions before where the
+ * callee will return to. Sequences usually look something like this
+ *
+ * call some_function <--- return register points here
+ * nop <--- call delay slot
+ * whatever <--- where callee returns to
+ *
+ * To keep trampoline_probe_handler logic simpler, we normalize the
+ * value kept in ri->ret_addr so we don't need to keep adjusting it
+ * back and forth.
+ */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
+
+ /* Replace the return addr with trampoline addr */
+ regs->u_regs[UREG_RETPC] =
+ ((unsigned long)kretprobe_trampoline) - 8;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ regs->tpc = orig_ret_address;
+ regs->tnpc = orig_ret_address + 4;
+
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "\tnop\n"
+ "\tnop\n");
+}
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
{
+ return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
return 0;
}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index fc5c0cc793b..0fd9db95b89 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -868,29 +868,3 @@ static int __init of_debug(char *str)
}
__setup("of_debug=", of_debug);
-
-struct of_device* of_platform_device_create(struct device_node *np,
- const char *bus_id,
- struct device *parent,
- struct bus_type *bus)
-{
- struct of_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return NULL;
-
- dev->dev.parent = parent;
- dev->dev.bus = bus;
- dev->dev.release = of_release_dev;
-
- strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
-
- if (of_device_register(dev) != 0) {
- kfree(dev);
- return NULL;
- }
-
- return dev;
-}
-EXPORT_SYMBOL(of_platform_device_create);
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 61baf8dc095..ddca6c6c0b4 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -1,6 +1,6 @@
/* pci_sun4v.c: SUN4V specific PCI controller support.
*
- * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
*/
#include <linux/kernel.h>
@@ -89,6 +89,17 @@ static long iommu_batch_flush(struct iommu_batch *p)
return 0;
}
+static inline void iommu_batch_new_entry(unsigned long entry)
+{
+ struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+
+ if (p->entry + p->npages == entry)
+ return;
+ if (p->entry != ~0UL)
+ iommu_batch_flush(p);
+ p->entry = entry;
+}
+
/* Interrupts must be disabled. */
static inline long iommu_batch_add(u64 phys_page)
{
@@ -113,54 +124,6 @@ static inline long iommu_batch_end(void)
return iommu_batch_flush(p);
}
-static long arena_alloc(struct iommu_arena *arena, unsigned long npages)
-{
- unsigned long n, i, start, end, limit;
- int pass;
-
- limit = arena->limit;
- start = arena->hint;
- pass = 0;
-
-again:
- n = find_next_zero_bit(arena->map, limit, start);
- end = n + npages;
- if (unlikely(end >= limit)) {
- if (likely(pass < 1)) {
- limit = start;
- start = 0;
- pass++;
- goto again;
- } else {
- /* Scanned the whole thing, give up. */
- return -1;
- }
- }
-
- for (i = n; i < end; i++) {
- if (test_bit(i, arena->map)) {
- start = i + 1;
- goto again;
- }
- }
-
- for (i = n; i < end; i++)
- __set_bit(i, arena->map);
-
- arena->hint = end;
-
- return n;
-}
-
-static void arena_free(struct iommu_arena *arena, unsigned long base,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = base; i < (base + npages); i++)
- __clear_bit(i, arena->map);
-}
-
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp)
{
@@ -185,11 +148,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
spin_unlock_irqrestore(&iommu->lock, flags);
- if (unlikely(entry < 0L))
- goto arena_alloc_fail;
+ if (unlikely(entry == DMA_ERROR_CODE))
+ goto range_alloc_fail;
*dma_addrp = (iommu->page_table_map_base +
(entry << IO_PAGE_SHIFT));
@@ -219,10 +182,10 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, *dma_addrp, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
-arena_alloc_fail:
+range_alloc_fail:
free_pages(first_page, order);
return NULL;
}
@@ -243,7 +206,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, dvma, npages);
do {
unsigned long num;
@@ -281,10 +244,10 @@ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
spin_unlock_irqrestore(&iommu->lock, flags);
- if (unlikely(entry < 0L))
+ if (unlikely(entry == DMA_ERROR_CODE))
goto bad;
bus_addr = (iommu->page_table_map_base +
@@ -319,7 +282,7 @@ bad:
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, bus_addr, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
return DMA_ERROR_CODE;
@@ -350,9 +313,9 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
spin_lock_irqsave(&iommu->lock, flags);
- entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, bus_addr, npages);
+ entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
do {
unsigned long num;
@@ -368,88 +331,131 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, npages, i, prot;
- struct scatterlist *sg;
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ int outcount, incount, i;
struct iommu *iommu;
- long entry, err;
- u32 dma_base;
-
- /* Fast path single entry scatterlists. */
- if (nelems == 1) {
- sglist->dma_address =
- dma_4v_map_single(dev, sg_virt(sglist),
- sglist->length, direction);
- if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
- return 0;
- sglist->dma_length = sglist->length;
- return 1;
- }
+ long err;
+
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
+ if (nelems == 0 || !iommu)
+ return 0;
- if (unlikely(direction == DMA_NONE))
- goto bad;
-
- npages = calc_npages(sglist, nelems);
+ prot = HV_PCI_MAP_ATTR_READ;
+ if (direction != DMA_TO_DEVICE)
+ prot |= HV_PCI_MAP_ATTR_WRITE;
- spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
- if (unlikely(entry < 0L))
- goto bad;
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
- dma_base = iommu->page_table_map_base +
- (entry << IO_PAGE_SHIFT);
+ spin_lock_irqsave(&iommu->lock, flags);
- prot = HV_PCI_MAP_ATTR_READ;
- if (direction != DMA_TO_DEVICE)
- prot |= HV_PCI_MAP_ATTR_WRITE;
+ iommu_batch_start(dev, prot, ~0UL);
- local_irq_save(flags);
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, slen;
- iommu_batch_start(dev, prot, entry);
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
- for_each_sg(sglist, sg, nelems, i) {
- unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg);
- unsigned long slen = sg->length;
- unsigned long this_npages;
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
- this_npages = iommu_num_pages(paddr, slen);
+ iommu_batch_new_entry(entry);
- sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK);
- sg->dma_length = slen;
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+ /* Insert into HW table */
paddr &= IO_PAGE_MASK;
- while (this_npages--) {
+ while (npages--) {
err = iommu_batch_add(paddr);
- if (unlikely(err < 0L)) {
- local_irq_restore(flags);
+ if (unlikely(err < 0L))
goto iommu_map_failed;
+ paddr += IO_PAGE_SIZE;
+ }
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
}
+ }
- paddr += IO_PAGE_SIZE;
- dma_base += IO_PAGE_SIZE;
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
}
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
}
err = iommu_batch_end();
- local_irq_restore(flags);
-
if (unlikely(err < 0L))
goto iommu_map_failed;
- return nelems;
+ spin_unlock_irqrestore(&iommu->lock, flags);
-bad:
- if (printk_ratelimit())
- WARN_ON(1);
- return 0;
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
iommu_map_failed:
- spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length);
+ iommu_range_free(iommu, vaddr, npages);
+ /* XXX demap? XXX */
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
@@ -458,39 +464,43 @@ iommu_map_failed:
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, npages;
struct pci_pbm_info *pbm;
- u32 devhandle, bus_addr;
+ struct scatterlist *sg;
struct iommu *iommu;
- long entry;
+ unsigned long flags;
+ u32 devhandle;
- if (unlikely(direction == DMA_NONE)) {
- if (printk_ratelimit())
- WARN_ON(1);
- }
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle;
- bus_addr = sglist->dma_address & IO_PAGE_MASK;
-
- npages = calc_npages(sglist, nelems);
-
- entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
-
spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
-
- do {
- unsigned long num;
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len);
+ iommu_range_free(iommu, dma_handle, npages);
+
+ entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ while (npages) {
+ unsigned long num;
+
+ num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+ npages);
+ entry += num;
+ npages -= num;
+ }
- num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
- npages);
- entry += num;
- npages -= num;
- } while (npages != 0);
+ sg = sg_next(sg);
+ }
spin_unlock_irqrestore(&iommu->lock, flags);
}
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index 7b6d3716efc..4379f43505e 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -78,6 +78,7 @@ config MCA
config V850
bool
default y
+ select HAVE_IDE
menu "Processor type and features"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 65a70b777c1..aaed1a3b92d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,7 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
@@ -102,6 +103,9 @@ config ARCH_HAS_ILOG2_U32
config ARCH_HAS_ILOG2_U64
def_bool n
+config ARCH_HAS_CPU_IDLE_WAIT
+ def_bool y
+
config GENERIC_CALIBRATE_DELAY
def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index fa555148823..864affc9a7b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
This option will slow down process creation somewhat.
-comment "Page alloc debug is incompatible with Software Suspend on i386"
- depends on DEBUG_KERNEL && HIBERNATION
- depends on X86_32
-
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
- depends on DEBUG_KERNEL && X86_32
+ depends on DEBUG_KERNEL
help
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 364865b1b08..204af43535c 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-ifeq ($(CONFIG_X86_32),y)
+# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/
+
+ifeq ($(CONFIG_X86_32),y)
drivers-$(CONFIG_FB) += arch/x86/video/
endif
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 18465143cfa..b1bdc4c6f9f 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,3 +3,5 @@ bzImage
setup
setup.bin
setup.elf
+cpustr.h
+mkcpustr
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 1a09f9309d3..7e7e890699b 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
-#define SPECIAL 32 /* 0x */
-#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
#define do_div(n,base) ({ \
int __res; \
@@ -45,12 +45,16 @@ __res; })
static char *number(char *str, long num, int base, int size, int precision,
int type)
{
- char c, sign, tmp[66];
- const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
int i;
- if (type & LARGE)
- digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ /* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters */
+ locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
tmp[i++] = '0';
else
while (num != 0)
- tmp[i++] = digits[do_div(num, base)];
+ tmp[i++] = (digits[do_div(num, base)] | locase);
if (i > precision)
precision = i;
size -= precision;
@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
*str++ = '0';
else if (base == 16) {
*str++ = '0';
- *str++ = digits[33];
+ *str++ = ('X' | locase);
}
}
if (!(type & LEFT))
@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
base = 8;
break;
- case 'X':
- flags |= LARGE;
case 'x':
+ flags |= SMALL;
+ case 'X':
base = 16;
break;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 77562e7cdab..3df340b54e5 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9e2b0ef851d..eef98cb00c6 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 21dc1a061bf..76ec0f8f138 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
- obj-$(CONFIG_PM) += suspend_64.o
- obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 24885be5c48..9b7e01daa1c 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
{
- return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+ sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
}
/* Mutex protecting device creation against CPU hotplug */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index be5c31d0488..824e21b80aa 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,8 @@ restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
-1: INTERRUPT_RETURN
+ENTRY(irq_return)
+ INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
pushl $0 # no error code
@@ -418,7 +419,7 @@ iret_exc:
.previous
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long irq_return,iret_exc
.previous
CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
-1: INTERRUPT_RETURN
+ jmp irq_return
CFI_ENDPROC
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
KPROBE_END(nmi)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
-1: iret
+ iret
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long native_iret, iret_exc
.previous
END(native_iret)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7341e81941..6be39a387c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -581,16 +581,24 @@ retint_restore_args: /* return to kernel space */
*/
TRACE_IRQS_IRETQ
restore_args:
- RESTORE_ARGS 0,8,0
-#ifdef CONFIG_PARAVIRT
+ RESTORE_ARGS 0,8,0
+
+ENTRY(irq_return)
INTERRUPT_RETURN
-#endif
+
+ .section __ex_table, "a"
+ .quad irq_return, bad_iret
+ .previous
+
+#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+#endif
+
.section .fixup,"ax"
bad_iret:
/*
@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
- INTERRUPT_RETURN
+ jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
@@ -919,7 +927,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq native_iret(%rip),%rbp
+ leaq irq_return(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9c7f7d39596..9dad6ca6cd7 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
static int __init geode_southbridge_init(void)
{
- int timers;
-
if (!is_geode())
return -ENODEV;
init_lbars();
- timers = geode_mfgpt_detect();
- printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers);
+ (void) mfgpt_timer_setup();
return 0;
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5d8c5730686..74ef4a41f22 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,10 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
/*
* References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
*/
.section .text.head,"ax",@progbits
ENTRY(startup_32)
- /* check to see if KEEP_SEGMENTS flag is meaningful */
- cmpw $0x207, BP_version(%esi)
- jb 1f
-
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
/*
* Set segments to known values.
*/
-1: lgdt boot_gdt_descr - __PAGE_OFFSET
+ lgdt pa(boot_gdt_descr)
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
*/
cld
xorl %eax,%eax
- movl $__bss_start - __PAGE_OFFSET,%edi
- movl $__bss_stop - __PAGE_OFFSET,%ecx
+ movl $pa(__bss_start),%edi
+ movl $pa(__bss_stop),%ecx
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
* (kexec on panic case). Hence copy out the parameters before initializing
* page tables.
*/
- movl $(boot_params - __PAGE_OFFSET),%edi
+ movl $pa(boot_params),%edi
movl $(PARAM_SIZE/4),%ecx
cld
rep
movsl
- movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+ movl pa(boot_params) + NEW_CL_POINTER,%esi
andl %esi,%esi
jz 1f # No comand line
- movl $(boot_command_line - __PAGE_OFFSET),%edi
+ movl $pa(boot_command_line),%edi
movl $(COMMAND_LINE_SIZE/4),%ecx
rep
movsl
1:
#ifdef CONFIG_PARAVIRT
- cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+ /* This is can only trip for a broken bootloader... */
+ cmpw $0x207, pa(boot_params + BP_version)
jb default_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
- movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+ movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae bad_subarch
- movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+ movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
*
- * Warning: don't use %esi or the stack in this code. However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
*/
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
+#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
default_entry:
- movl $(pg0 - __PAGE_OFFSET), %edi
- movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
- movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+
+ /*
+ * In PAE mode swapper_pg_dir is statically defined to contain enough
+ * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD
+ * entries to the first kernel PMD.
+ *
+ * Note the upper half of each PMD or PTE are always zero at
+ * this stage.
+ */
+
+#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
+
+ xorl %ebx,%ebx /* %ebx is kept at zero */
+
+ movl $pa(pg0), %edi
+ movl $pa(swapper_pg_pmd), %edx
+ movl $PTE_ATTR, %eax
+10:
+ leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
+ movl %ecx,(%edx) /* Store PMD entry */
+ /* Upper half already zero */
+ addl $8,%edx
+ movl $512,%ecx
+11:
+ stosl
+ xchgl %eax,%ebx
+ stosl
+ xchgl %eax,%ebx
+ addl $0x1000,%eax
+ loop 11b
+
+ /*
+ * End condition: we must map up to and including INIT_MAP_BEYOND_END
+ * bytes beyond the end of our own page tables.
+ */
+ leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+ cmpl %ebp,%eax
+ jb 10b
+1:
+ movl %edi,pa(init_pg_tables_end)
+
+ /* Do early initialization of the fixmap area */
+ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
+#else /* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+ movl $pa(pg0), %edi
+ movl $pa(swapper_pg_dir), %edx
+ movl $PTE_ATTR, %eax
10:
- leal 0x007(%edi),%ecx /* Create PDE entry */
+ leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
stosl
addl $0x1000,%eax
loop 11b
- /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
- /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
- leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+ /*
+ * End condition: we must map up to and including INIT_MAP_BEYOND_END
+ * bytes beyond the end of our own page tables; the +0x007 is
+ * the attribute bits
+ */
+ leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
- movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
- /* Do an early initialization of the fixmap area */
- movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
- movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
- addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
- movl %eax, 4092(%edx)
+ movl %edi,pa(init_pg_tables_end)
+ /* Do early initialization of the fixmap area */
+ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
jmp 3f
/*
* Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
movl cr4_bits,%edx
andl %edx,%edx
jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
- movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+ movl $pa(swapper_pg_dir),%eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
- orl $0x80000000,%eax
+ orl $X86_CR0_PG,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
@@ -552,16 +605,44 @@ ENTRY(_stext)
*/
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
+#ifdef CONFIG_X86_PAE
+ENTRY(swapper_pg_pmd)
+ .fill 1024*KPMDS,4,0
+#else
ENTRY(swapper_pg_dir)
.fill 1024,4,0
-ENTRY(swapper_pg_pmd)
+#endif
+ENTRY(swapper_pg_fixmap)
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
-
/*
* This starts the data section.
*/
+#ifdef CONFIG_X86_PAE
+.section ".data.page_aligned","wa"
+ /* Page-aligned for the benefit of paravirt? */
+ .align PAGE_SIZE_asm
+ENTRY(swapper_pg_dir)
+ .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
+# if KPMDS == 3
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
+# elif KPMDS == 2
+ .long 0,0
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+# elif KPMDS == 1
+ .long 0,0
+ .long 0,0
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+# else
+# error "Kernel PMDs should be 1, 2 or 3"
+# endif
+ .align PAGE_SIZE_asm /* needs to be page-sized too */
+#endif
+
.data
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 219f86eb612..027fc067b39 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -12,48 +12,37 @@
*/
/*
- * We are using the 32Khz input clock - its the only one that has the
+ * We are using the 32.768kHz input clock - it's the only one that has the
* ranges we find desirable. The following table lists the suitable
- * divisors and the associated hz, minimum interval
- * and the maximum interval:
+ * divisors and the associated Hz, minimum interval and the maximum interval:
*
- * Divisor Hz Min Delta (S) Max Delta (S)
- * 1 32000 .0005 2.048
- * 2 16000 .001 4.096
- * 4 8000 .002 8.192
- * 8 4000 .004 16.384
- * 16 2000 .008 32.768
- * 32 1000 .016 65.536
- * 64 500 .032 131.072
- * 128 250 .064 262.144
- * 256 125 .128 524.288
+ * Divisor Hz Min Delta (s) Max Delta (s)
+ * 1 32768 .00048828125 2.000
+ * 2 16384 .0009765625 4.000
+ * 4 8192 .001953125 8.000
+ * 8 4096 .00390625 16.000
+ * 16 2048 .0078125 32.000
+ * 32 1024 .015625 64.000
+ * 64 512 .03125 128.000
+ * 128 256 .0625 256.000
+ * 256 128 .125 512.000
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
-#include <linux/module.h>
#include <asm/geode.h>
-#define F_AVAIL 0x01
-
static struct mfgpt_timer_t {
- int flags;
- struct module *owner;
+ unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
/* Selected from the table above */
#define MFGPT_DIVISOR 16
#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
-#define MFGPT_HZ (32000 / MFGPT_DIVISOR)
+#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-static int __init mfgpt_timer_setup(void);
-#else
-#define mfgpt_timer_setup() (0)
-#endif
-
/* Allow for disabling of MFGPTs */
static int disable;
static int __init mfgpt_disable(char *s)
@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
* In other cases (such as with VSAless OpenFirmware), the system firmware
* leaves timers available for us to use.
*/
-int __init geode_mfgpt_detect(void)
+
+
+static int timers = -1;
+
+static void geode_mfgpt_detect(void)
{
- int count = 0, i;
+ int i;
u16 val;
+ timers = 0;
+
if (disable) {
- printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n");
- return 0;
+ printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
+ goto done;
+ }
+
+ if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
+ printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
+ goto done;
}
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
if (!(val & MFGPT_SETUP_SETUP)) {
- mfgpt_timers[i].flags = F_AVAIL;
- count++;
+ mfgpt_timers[i].avail = 1;
+ timers++;
}
}
- /* set up clock event device, if desired */
- i = mfgpt_timer_setup();
-
- return count;
+done:
+ printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
}
int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
return 0;
}
-static int mfgpt_get(int timer, struct module *owner)
+static int mfgpt_get(int timer)
{
- mfgpt_timers[timer].flags &= ~F_AVAIL;
- mfgpt_timers[timer].owner = owner;
+ mfgpt_timers[timer].avail = 0;
printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
return timer;
}
-int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner)
+int geode_mfgpt_alloc_timer(int timer, int domain)
{
int i;
- if (!geode_get_dev_base(GEODE_DEV_MFGPT))
- return -ENODEV;
+ if (timers == -1) {
+ /* timers haven't been detected yet */
+ geode_mfgpt_detect();
+ }
+
+ if (!timers)
+ return -1;
+
if (timer >= MFGPT_MAX_TIMERS)
- return -EIO;
+ return -1;
if (timer < 0) {
/* Try to find an available timer */
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- if (mfgpt_timers[i].flags & F_AVAIL)
- return mfgpt_get(i, owner);
+ if (mfgpt_timers[i].avail)
+ return mfgpt_get(i);
if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
break;
}
} else {
/* If they requested a specific timer, try to honor that */
- if (mfgpt_timers[timer].flags & F_AVAIL)
- return mfgpt_get(timer, owner);
+ if (mfgpt_timers[timer].avail)
+ return mfgpt_get(timer);
}
/* No timers available - too bad */
@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
}
__setup("mfgpt_irq=", mfgpt_setup);
-static inline void mfgpt_disable_timer(u16 clock)
+static void mfgpt_disable_timer(u16 clock)
{
- u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP);
- geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN);
+ /* avoid races by clearing CMP1 and CMP2 unconditionally */
+ geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
+ MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
}
static int mfgpt_next_event(unsigned long, struct clock_event_device *);
@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
.shift = 32
};
-static inline void mfgpt_start_timer(u16 clock, u16 delta)
+static void mfgpt_start_timer(u16 delta)
{
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
mfgpt_disable_timer(mfgpt_event_clock);
if (mode == CLOCK_EVT_MODE_PERIODIC)
- mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC);
+ mfgpt_start_timer(MFGPT_PERIODIC);
mfgpt_tick_mode = mode;
}
static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
{
- mfgpt_start_timer(mfgpt_event_clock, delta);
+ mfgpt_start_timer(delta);
return 0;
}
-/* Assume (foolishly?), that this interrupt was due to our tick */
-
static irqreturn_t mfgpt_tick(int irq, void *dev_id)
{
+ u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
+
+ /* See if the interrupt was for us */
+ if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
+ return IRQ_NONE;
+
/* Turn off the clock (and clear the event) */
mfgpt_disable_timer(mfgpt_event_clock);
@@ -320,13 +328,12 @@ static struct irqaction mfgptirq = {
.name = "mfgpt-timer"
};
-static int __init mfgpt_timer_setup(void)
+int __init mfgpt_timer_setup(void)
{
int timer, ret;
u16 val;
- timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING,
- THIS_MODULE);
+ timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
if (timer < 0) {
printk(KERN_ERR
"mfgpt-timer: Could not allocate a MFPGT timer\n");
@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
- "mfgpt-timer: registering the MFGT timer as a clock event.\n");
+ "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
clockevents_register_device(&mfgpt_clockevent);
return 0;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 6ba33ca8715..1941482d4ca 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -27,7 +27,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
pci_write_config_byte(dev, 0xf4, config|0x2);
/* read xTPR register */
- raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+ raw_pci_read(0, 0, 0x40, 0x4c, 2, &word);
if (!(word & (1 << 13))) {
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index d1d8c347cc0..691ab4cb167 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
EXPORT_SYMBOL(boot_cpu_data);
+#ifndef CONFIG_X86_PAE
unsigned long mmu_cr4_features;
+#else
+unsigned long mmu_cr4_features = X86_CR4_PAE;
+#endif
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index e6757aaa202..a40051b71d9 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
void arch_unregister_cpu(int num)
{
- return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
+ unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
EXPORT_SYMBOL(arch_unregister_cpu);
#else
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d1bc04006d1..8106bba41ec 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -46,6 +46,7 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
+#include <asm/setup.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
void __init native_pagetable_setup_start(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- int i;
+ unsigned long pfn, va;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
/*
- * Init entries of the first-level page table to the
- * zero page, if they haven't already been set up.
- *
- * In a normal native boot, we'll be running on a
- * pagetable rooted in swapper_pg_dir, but not in PAE
- * mode, so this will end up clobbering the mappings
- * for the lower 24Mbytes of the address space,
- * without affecting the kernel address space.
+ * Remove any mappings which extend past the end of physical
+ * memory from the boot time page table:
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
- set_pgd(&base[i],
- __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
- /* Make sure kernel address space is empty so that a pagetable
- will be allocated for it. */
- memset(&base[USER_PTRS_PER_PGD], 0,
- KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
+ for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+ va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
+ pgd = base + pgd_index(va);
+ if (!pgd_present(*pgd))
+ break;
+
+ pud = pud_offset(pgd, va);
+ pmd = pmd_offset(pud, va);
+ if (!pmd_present(*pmd))
+ break;
+
+ pte = pte_offset_kernel(pmd, va);
+ if (!pte_present(*pte))
+ break;
+
+ pte_clear(NULL, va, pte);
+ }
paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
-#endif
}
void __init native_pagetable_setup_done(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
}
/*
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
* the boot process.
*
* If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE). The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S. The root of the
+ * pagetable will be swapper_pg_dir.
*
* If we're booting paravirtualized under a hypervisor, then there are
* more options: we may already be running PAE, and the pagetable may
@@ -537,14 +531,6 @@ void __init paging_init(void)
load_cr3(swapper_pg_dir);
-#ifdef CONFIG_X86_PAE
- /*
- * We will bail out later - printk doesn't work right now so
- * the user would just see a hanging kernel.
- */
- if (cpu_has_pae)
- set_in_cr4(X86_CR4_PAE);
-#endif
__flush_tlb_all();
kmap_init();
@@ -675,13 +661,11 @@ void __init mem_init(void)
BUG_ON((unsigned long)high_memory > VMALLOC_START);
#endif /* double-sanity-check paranoia */
-#ifdef CONFIG_X86_PAE
- if (!cpu_has_pae)
- panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
+ cpa_init();
+
/*
* Subtle. SMP is doing it's boot stuff late (because it has to
* fork idle threads) - but it also needs low mappings for the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5fe880fc305..b59fc238151 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -528,13 +528,15 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
+
+ cpa_init();
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
- unsigned long addr;
+ unsigned long addr = begin;
- if (begin >= end)
+ if (addr >= end)
return;
/*
@@ -549,7 +551,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
#else
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ for (; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)),
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ee6648fe6b1..a4897a85268 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str)
early_param("early_ioremap_debug", early_ioremap_debug_setup);
static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
__attribute__((aligned(PAGE_SIZE)));
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
- return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+ pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+
+ return pmd;
}
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
{
- return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+ return &bm_pte[pte_index(addr)];
}
void __init early_ioremap_init(void)
{
- unsigned long *pgd;
+ pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_init()\n");
- pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
- *pgd = __pa(bm_pte) | _PAGE_TABLE;
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
+ pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
/*
- * The boot-ioremap range spans multiple pgds, for which
+ * The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
- if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+ if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
- printk(KERN_WARNING "pgd %p != %p\n",
- pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+ printk(KERN_WARNING "pmd %p != %p\n",
+ pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
- fix_to_virt(FIX_BTMAP_BEGIN));
+ fix_to_virt(FIX_BTMAP_BEGIN));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
- fix_to_virt(FIX_BTMAP_END));
+ fix_to_virt(FIX_BTMAP_END));
printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)
void __init early_ioremap_clear(void)
{
- unsigned long *pgd;
+ pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_clear()\n");
- pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
- *pgd = 0;
- paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+ pmd_clear(pmd);
+ paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
__flush_tlb_all();
}
void __init early_ioremap_reset(void)
{
enum fixed_addresses idx;
- unsigned long *pte, phys, addr;
+ unsigned long addr, phys;
+ pte_t *pte;
after_paging_init = 1;
for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
addr = fix_to_virt(idx);
pte = early_ioremap_pte(addr);
- if (*pte & _PAGE_PRESENT) {
- phys = *pte & PAGE_MASK;
+ if (pte_present(*pte)) {
+ phys = pte_val(*pte) & PAGE_MASK;
set_fixmap(idx, phys);
}
}
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
static void __init __early_set_fixmap(enum fixed_addresses idx,
unsigned long phys, pgprot_t flags)
{
- unsigned long *pte, addr = __fix_to_virt(idx);
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
if (idx >= __end_of_fixed_addresses) {
BUG();
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
}
pte = early_ioremap_pte(addr);
if (pgprot_val(flags))
- *pte = (phys & PAGE_MASK) | pgprot_val(flags);
+ set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
- *pte = 0;
+ pte_clear(NULL, addr, pte);
__flush_tlb_one(addr);
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8493c855582..440210a2277 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/interrupt.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
* or when the present bit is not set. Otherwise we would return a
* pointer to a nonexisting mapping.
*/
-pte_t *lookup_address(unsigned long address, int *level)
+pte_t *lookup_address(unsigned long address, unsigned int *level)
{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
@@ -252,10 +253,11 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags;
+ unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot;
- int level, do_split = 1;
+ int i, do_split = 1;
+ unsigned int level;
spin_lock_irqsave(&pgd_lock, flags);
/*
@@ -302,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(new_prot, address);
/*
+ * We need to check the full range, whether
+ * static_protection() requires a different pgprot for one of
+ * the pages in the range we try to preserve:
+ */
+ addr = address + PAGE_SIZE;
+ for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
+ pgprot_t chk_prot = static_protections(new_prot, addr);
+
+ if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+ goto out_unlock;
+ }
+
+ /*
* If there are no changes, return. maxpages has been updated
* above:
*/
@@ -335,23 +350,103 @@ out_unlock:
return do_split;
}
+static LIST_HEAD(page_pool);
+static unsigned long pool_size, pool_pages, pool_low;
+static unsigned long pool_used, pool_failed, pool_refill;
+
+static void cpa_fill_pool(void)
+{
+ struct page *p;
+ gfp_t gfp = GFP_KERNEL;
+
+ /* Do not allocate from interrupt context */
+ if (in_irq() || irqs_disabled())
+ return;
+ /*
+ * Check unlocked. I does not matter when we have one more
+ * page in the pool. The bit lock avoids recursive pool
+ * allocations:
+ */
+ if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ return;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * We could do:
+ * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
+ * but this fails on !PREEMPT kernels
+ */
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
+#endif
+
+ while (pool_pages < pool_size) {
+ p = alloc_pages(gfp, 0);
+ if (!p) {
+ pool_failed++;
+ break;
+ }
+ spin_lock_irq(&pgd_lock);
+ list_add(&p->lru, &page_pool);
+ pool_pages++;
+ spin_unlock_irq(&pgd_lock);
+ }
+ clear_bit_unlock(0, &pool_refill);
+}
+
+#define SHIFT_MB (20 - PAGE_SHIFT)
+#define ROUND_MB_GB ((1 << 10) - 1)
+#define SHIFT_MB_GB 10
+#define POOL_PAGES_PER_GB 16
+
+void __init cpa_init(void)
+{
+ struct sysinfo si;
+ unsigned long gb;
+
+ si_meminfo(&si);
+ /*
+ * Calculate the number of pool pages:
+ *
+ * Convert totalram (nr of pages) to MiB and round to the next
+ * GiB. Shift MiB to Gib and multiply the result by
+ * POOL_PAGES_PER_GB:
+ */
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ pool_low = pool_size;
+
+ cpa_fill_pool();
+ printk(KERN_DEBUG
+ "CPA: page pool initialized %lu of %lu pages preallocated\n",
+ pool_pages, pool_size);
+}
+
static int split_large_page(pte_t *kpte, unsigned long address)
{
unsigned long flags, pfn, pfninc = 1;
- gfp_t gfp_flags = GFP_KERNEL;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
struct page *base;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
-#endif
- base = alloc_pages(gfp_flags, 0);
- if (!base)
+ /*
+ * Get a page from the pool. The pool list is protected by the
+ * pgd_lock, which we have to take anyway for the split
+ * operation:
+ */
+ spin_lock_irqsave(&pgd_lock, flags);
+ if (list_empty(&page_pool)) {
+ spin_unlock_irqrestore(&pgd_lock, flags);
return -ENOMEM;
+ }
+
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
- spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
base = NULL;
out_unlock:
+ /*
+ * If we dropped out via the lookup_address check under
+ * pgd_lock then stick the page back into the pool:
+ */
+ if (base) {
+ list_add(&base->lru, &page_pool);
+ pool_pages++;
+ } else
+ pool_used++;
spin_unlock_irqrestore(&pgd_lock, flags);
- if (base)
- __free_pages(base, 0);
-
return 0;
}
static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
- int level, do_split, err;
+ int do_split, err;
+ unsigned int level;
struct page *kpte_page;
pte_t *kpte;
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* Check whether we really changed something:
*/
if (!cpa.flushtlb)
- return ret;
+ goto out;
/*
* No need to flush, when we did not set any of the caching
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
else
cpa_flush_all(cache);
+out:
+ cpa_fill_pool();
return ret;
}
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
+
+ /*
+ * Try to refill the page pool here. We can do this only after
+ * the tlb flush.
+ */
+ cpa_fill_pool();
}
#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 52deabc72a6..b7c67a187b6 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -26,16 +26,37 @@ int pcibios_last_bus = -1;
unsigned long pirq_table_addr;
struct pci_bus *pci_root_bus;
struct pci_raw_ops *raw_pci_ops;
+struct pci_raw_ops *raw_pci_ext_ops;
+
+int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *val)
+{
+ if (reg < 256 && raw_pci_ops)
+ return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
+ if (raw_pci_ext_ops)
+ return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
+ return -EINVAL;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 val)
+{
+ if (reg < 256 && raw_pci_ops)
+ return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
+ if (raw_pci_ext_ops)
+ return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
+ return -EINVAL;
+}
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
- return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+ return raw_pci_read(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
{
- return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+ return raw_pci_write(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 431c9a51b15..42f3e4cad17 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -14,7 +14,7 @@
#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-int pci_conf1_read(unsigned int seg, unsigned int bus,
+static int pci_conf1_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
@@ -45,7 +45,7 @@ int pci_conf1_read(unsigned int seg, unsigned int bus,
return 0;
}
-int pci_conf1_write(unsigned int seg, unsigned int bus,
+static int pci_conf1_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 74d30ff33c4..a5ef5f55137 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -215,7 +215,8 @@ static int quirk_aspm_offset[MAX_PCIEROOT << 3];
static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
- return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+ return raw_pci_read(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}
/*
@@ -231,7 +232,8 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
if ((offset) && (where == offset))
value = value & 0xfffffffc;
- return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+ return raw_pci_write(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}
static struct pci_ops quirk_pcie_aspm_ops = {
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 5565d7016b7..e041ced0ce1 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -22,7 +22,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
if (pci_find_bus(0, n))
continue;
for (devfn = 0; devfn < 256; devfn += 8) {
- if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
+ if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
l != 0x0000 && l != 0xffff) {
DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 4df637e34f8..8d54df4dfaa 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -22,46 +22,13 @@
#define MMCONFIG_APER_MIN (2 * 1024*1024)
#define MMCONFIG_APER_MAX (256 * 1024*1024)
-DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
-
/* Indicate if the mmcfg resources have been placed into the resource table. */
static int __initdata pci_mmcfg_resources_inserted;
-/* K8 systems have some devices (typically in the builtin northbridge)
- that are only accessible using type1
- Normally this can be expressed in the MCFG by not listing them
- and assigning suitable _SEGs, but this isn't implemented in some BIOS.
- Instead try to discover all devices on bus 0 that are unreachable using MM
- and fallback for them. */
-static void __init unreachable_devices(void)
-{
- int i, bus;
- /* Use the max bus number from ACPI here? */
- for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) {
- for (i = 0; i < 32; i++) {
- unsigned int devfn = PCI_DEVFN(i, 0);
- u32 val1, val2;
-
- pci_conf1_read(0, bus, devfn, 0, 4, &val1);
- if (val1 == 0xffffffff)
- continue;
-
- if (pci_mmcfg_arch_reachable(0, bus, devfn)) {
- raw_pci_ops->read(0, bus, devfn, 0, 4, &val2);
- if (val1 == val2)
- continue;
- }
- set_bit(i + 32 * bus, pci_mmcfg_fallback_slots);
- printk(KERN_NOTICE "PCI: No mmconfig possible on device"
- " %02x:%02x\n", bus, i);
- }
- }
-}
-
static const char __init *pci_mmcfg_e7520(void)
{
u32 win;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
win = win & 0xf000;
if(win == 0x0000 || win == 0xf000)
@@ -86,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void)
pci_mmcfg_config_num = 1;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
/* Enable bit */
if (!(pciexbar & 1))
@@ -151,7 +118,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
int i;
const char *name;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
vendor = l & 0xffff;
device = (l >> 16) & 0xffff;
@@ -270,8 +237,6 @@ void __init pci_mmcfg_init(int type)
return;
if (pci_mmcfg_arch_init()) {
- if (type == 1)
- unreachable_devices();
if (known_bridge)
pci_mmcfg_insert_resources(IORESOURCE_BUSY);
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 1bf5816d34c..081816ada05 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -30,10 +30,6 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
struct acpi_mcfg_allocation *cfg;
int cfg_num;
- if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
- test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots))
- return 0;
-
for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
cfg = &pci_mmcfg_config[cfg_num];
if (cfg->pci_segment == seg &&
@@ -68,13 +64,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
u32 base;
if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
- *value = -1;
+err: *value = -1;
return -EINVAL;
}
base = get_base_addr(seg, bus, devfn);
if (!base)
- return pci_conf1_read(seg,bus,devfn,reg,len,value);
+ goto err;
spin_lock_irqsave(&pci_config_lock, flags);
@@ -107,7 +103,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
base = get_base_addr(seg, bus, devfn);
if (!base)
- return pci_conf1_write(seg,bus,devfn,reg,len,value);
+ return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
@@ -134,15 +130,9 @@ static struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
-int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn)
-{
- return get_base_addr(seg, bus, devfn) != 0;
-}
-
int __init pci_mmcfg_arch_init(void)
{
- printk(KERN_INFO "PCI: Using MMCONFIG\n");
- raw_pci_ops = &pci_mmcfg;
+ printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n");
+ raw_pci_ext_ops = &pci_mmcfg;
return 1;
}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 4095e4d66a1..9207fd49233 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -40,9 +40,7 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus)
static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
{
char __iomem *addr;
- if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
- test_bit(32*bus + PCI_SLOT(devfn), pci_mmcfg_fallback_slots))
- return NULL;
+
addr = get_virt(seg, bus);
if (!addr)
return NULL;
@@ -56,13 +54,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
/* Why do we have this when nobody checks it. How about a BUG()!? -AK */
if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
- *value = -1;
+err: *value = -1;
return -EINVAL;
}
addr = pci_dev_base(seg, bus, devfn);
if (!addr)
- return pci_conf1_read(seg,bus,devfn,reg,len,value);
+ goto err;
switch (len) {
case 1:
@@ -90,7 +88,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
addr = pci_dev_base(seg, bus, devfn);
if (!addr)
- return pci_conf1_write(seg,bus,devfn,reg,len,value);
+ return -EINVAL;
switch (len) {
case 1:
@@ -126,12 +124,6 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
return addr;
}
-int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn)
-{
- return pci_dev_base(seg, bus, devfn) != NULL;
-}
-
int __init pci_mmcfg_arch_init(void)
{
int i;
@@ -152,6 +144,6 @@ int __init pci_mmcfg_arch_init(void)
return 0;
}
}
- raw_pci_ops = &pci_mmcfg;
+ raw_pci_ext_ops = &pci_mmcfg;
return 1;
}
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index ac56d3916c5..3431518d921 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -85,10 +85,17 @@ extern spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
-extern int pci_conf1_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value);
-extern int pci_conf1_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value);
+struct pci_raw_ops {
+ int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *val);
+ int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 val);
+};
+
+extern struct pci_raw_ops *raw_pci_ops;
+extern struct pci_raw_ops *raw_pci_ext_ops;
+
+extern struct pci_raw_ops pci_direct_conf1;
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
@@ -98,13 +105,6 @@ extern void pcibios_sort(void);
/* pci-mmconfig.c */
-/* Verify the first 16 busses. We assume that systems with more busses
- get MCFG right. */
-#define PCI_MMCFG_MAX_CHECK_BUS 16
-extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
-
-extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn);
extern int __init pci_mmcfg_arch_init(void);
/*
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 8ecb1c72259..c2df4e97eed 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -13,9 +13,6 @@
#include "pci.h"
-
-extern struct pci_raw_ops pci_direct_conf1;
-
static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
static void pci_visws_disable_irq(struct pci_dev *dev) { }
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index d764ec95006..9ff4d5b55ad 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_PM) += cpu.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
+obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
+obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu_32.c
index efcf620d143..7f9c6da04a4 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu_32.c
@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
savesegment(ss, ctxt->ss);
/*
- * control registers
+ * control registers
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/power/cpu_64.c
index 7ac7130022f..66bdfb591fd 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -1,8 +1,9 @@
/*
- * Suspend support specific for i386.
+ * Suspend and hibernation support for x86-64
*
* Distribute under GPLv2
*
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
@@ -14,9 +15,6 @@
#include <asm/pgtable.h>
#include <asm/mtrr.h>
-/* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
-
static void fix_processor_context(void);
struct saved_context saved_context;
@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
mtrr_save_fixed_ranges(NULL);
/*
- * control registers
+ * control registers
*/
rdmsrl(MSR_EFER, ctxt->efer);
ctxt->cr0 = read_cr0();
@@ -166,155 +164,3 @@ static void fix_processor_context(void)
loaddebug(&current->thread, 7);
}
}
-
-#ifdef CONFIG_HIBERNATION
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-/*
- * Address to jump to in the last phase of restore in order to get to the image
- * kernel's text (this value is passed in the image header).
- */
-unsigned long restore_jump_address;
-
-/*
- * Value of the cr3 register from before the hibernation (this value is passed
- * in the image header).
- */
-unsigned long restore_cr3;
-
-pgd_t *temp_level4_pgt;
-
-void *relocated_restore_code;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
-{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- unsigned long paddr;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end)
- break;
-
- pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end)
- break;
- pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- }
- return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
- unsigned long start, end, next;
- int error;
-
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
- return -ENOMEM;
-
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
- /* Set up the direct mapping from scratch */
- start = (unsigned long)pfn_to_kaddr(0);
- end = (unsigned long)pfn_to_kaddr(end_pfn);
-
- for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!pud)
- return -ENOMEM;
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
- return error;
- set_pgd(temp_level4_pgt + pgd_index(start),
- mk_kernel_pgd(__pa(pud)));
- }
- return 0;
-}
-
-int swsusp_arch_resume(void)
-{
- int error;
-
- /* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
- return error;
-
- relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
- if (!relocated_restore_code)
- return -ENOMEM;
- memcpy(relocated_restore_code, &core_restore_code,
- &restore_registers - &core_restore_code);
-
- restore_image();
- return 0;
-}
-
-/*
- * pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-struct restore_data_record {
- unsigned long jump_address;
- unsigned long cr3;
- unsigned long magic;
-};
-
-#define RESTORE_MAGIC 0x0123456789ABCDEFUL
-
-/**
- * arch_hibernation_header_save - populate the architecture specific part
- * of a hibernation image header
- * @addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
- struct restore_data_record *rdr = addr;
-
- if (max_size < sizeof(struct restore_data_record))
- return -EOVERFLOW;
- rdr->jump_address = restore_jump_address;
- rdr->cr3 = restore_cr3;
- rdr->magic = RESTORE_MAGIC;
- return 0;
-}
-
-/**
- * arch_hibernation_header_restore - read the architecture specific data
- * from the hibernation image header
- * @addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
- struct restore_data_record *rdr = addr;
-
- restore_jump_address = rdr->jump_address;
- restore_cr3 = rdr->cr3;
- return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
-}
-#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/power/suspend.c b/arch/x86/power/hibernate_32.c
index a0020b913f3..f2b6e3f11bf 100644
--- a/arch/x86/power/suspend.c
+++ b/arch/x86/power/hibernate_32.c
@@ -1,5 +1,5 @@
/*
- * Suspend support specific for i386 - temporary page tables
+ * Hibernation support specific for i386 - temporary page tables
*
* Distribute under GPLv2
*
@@ -13,7 +13,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
-/* Defined in arch/i386/power/swsusp.S */
+/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
/* References to section boundaries */
@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
pgd_t *resume_pg_dir;
/* The following three functions are based on the analogous code in
- * arch/i386/mm/init.c
+ * arch/x86/mm/init_32.c
*/
/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
new file mode 100644
index 00000000000..b542355e0e3
--- /dev/null
+++ b/arch/x86/power/hibernate_64.c
@@ -0,0 +1,169 @@
+/*
+ * Hibernation support for x86-64
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/smp.h>
+#include <linux/suspend.h>
+#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/* Defined in hibernate_asm_64.S */
+extern int restore_image(void);
+
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
+pgd_t *temp_level4_pgt;
+
+void *relocated_restore_code;
+
+static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i, j;
+
+ i = pud_index(address);
+ pud = pud + i;
+ for (; i < PTRS_PER_PUD; pud++, i++) {
+ unsigned long paddr;
+ pmd_t *pmd;
+
+ paddr = address + i*PUD_SIZE;
+ if (paddr >= end)
+ break;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ if (paddr >= end)
+ break;
+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+ }
+ return 0;
+}
+
+static int set_up_temporary_mappings(void)
+{
+ unsigned long start, end, next;
+ int error;
+
+ temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!temp_level4_pgt)
+ return -ENOMEM;
+
+ /* It is safe to reuse the original kernel mapping */
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+ /* Set up the direct mapping from scratch */
+ start = (unsigned long)pfn_to_kaddr(0);
+ end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+ for (; start < end; start = next) {
+ pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
+ return error;
+ set_pgd(temp_level4_pgt + pgd_index(start),
+ mk_kernel_pgd(__pa(pud)));
+ }
+ return 0;
+}
+
+int swsusp_arch_resume(void)
+{
+ int error;
+
+ /* We have got enough memory and from now on we cannot recover */
+ if ((error = set_up_temporary_mappings()))
+ return error;
+
+ relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+ memcpy(relocated_restore_code, &core_restore_code,
+ &restore_registers - &core_restore_code);
+
+ restore_image();
+ return 0;
+}
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long cr3;
+ unsigned long magic;
+};
+
+#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->jump_address = restore_jump_address;
+ rdr->cr3 = restore_cr3;
+ rdr->magic = RESTORE_MAGIC;
+ return 0;
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ restore_jump_address = rdr->jump_address;
+ restore_cr3 = rdr->cr3;
+ return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
diff --git a/arch/x86/power/swsusp.S b/arch/x86/power/hibernate_asm_32.S
index 53662e05b39..b95aa6cfe3c 100644
--- a/arch/x86/power/swsusp.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,7 +1,6 @@
.text
-/* Originally gcc generated, modified by hand
- *
+/*
* This may not use any stack, nor any variable that is not "NoSave":
*
* Its rewriting one kernel image with another. What is stack in "old"
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index aeb9a4d7681..1deb3244b99 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -1,7 +1,12 @@
-/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl>
+/*
+ * Hibernation support for x86-64
*
* Distribute under GPLv2.
*
+ * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright 2005 Andi Kleen <ak@suse.de>
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ *
* swsusp_arch_resume must not use any stack or any nonlocal variables while
* copying pages:
*
@@ -9,7 +14,7 @@
* image could very well be data page in "new" image, and overwriting
* your own stack under you is bad idea.
*/
-
+
.text
#include <linux/linkage.h>
#include <asm/segment.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 45aa771e73a..0144395448a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,7 @@
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
{
- int level;
+ unsigned int level;
pte_t *pte = lookup_address(address, &level);
unsigned offset = address & PAGE_MASK;
@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
- int level;
+ unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
- int level;
+ unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3721fd6877..c39e1a5aa24 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
/* Get the CPU speed from Xen */
unsigned long xen_cpu_khz(void)
{
- u64 cpu_khz = 1000000ULL << 32;
+ u64 xen_khz = 1000000ULL << 32;
const struct vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
- do_div(cpu_khz, info->tsc_to_system_mul);
+ do_div(xen_khz, info->tsc_to_system_mul);
if (info->tsc_shift < 0)
- cpu_khz <<= -info->tsc_shift;
+ xen_khz <<= -info->tsc_shift;
else
- cpu_khz >>= info->tsc_shift;
+ xen_khz >>= info->tsc_shift;
- return cpu_khz;
+ return xen_khz;
}
/*
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index fd36764d7fb..9fc8551a1cf 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -14,6 +14,7 @@ config ZONE_DMA
config XTENSA
bool
default y
+ select HAVE_IDE
help
Xtensa processors are 32-bit RISC machines designed by Tensilica
primarily for embedded systems. These processors are both