diff options
Diffstat (limited to 'arch')
157 files changed, 2131 insertions, 1541 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669cbb3b..2505740b81d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config OPROFILE_NMI_TIMER + def_bool y + depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 776d76b8cb6..b259c7c644e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1246,7 +1246,7 @@ config PL310_ERRATA_588369 config ARM_ERRATA_720789 bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID" - depends on CPU_V7 && SMP + depends on CPU_V7 help This option enables the workaround for the 720789 Cortex-A9 (prior to r2p0) erratum. A faulty ASID can be sent to the other CPUs for the @@ -1282,7 +1282,7 @@ config ARM_ERRATA_743622 config ARM_ERRATA_751472 bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation" - depends on CPU_V7 && SMP + depends on CPU_V7 help This option enables the workaround for the 751472 Cortex-A9 (prior to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the diff --git a/arch/arm/common/pl330.c b/arch/arm/common/pl330.c index f407a6b35d3..8d8df744f7a 100644 --- a/arch/arm/common/pl330.c +++ b/arch/arm/common/pl330.c @@ -221,17 +221,6 @@ */ #define MCODE_BUFF_PER_REQ 256 -/* - * Mark a _pl330_req as free. - * We do it by writing DMAEND as the first instruction - * because no valid request is going to have DMAEND as - * its first instruction to execute. - */ -#define MARK_FREE(req) do { \ - _emit_END(0, (req)->mc_cpu); \ - (req)->mc_len = 0; \ - } while (0) - /* If the _pl330_req is available to the client */ #define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND) @@ -301,8 +290,10 @@ struct pl330_thread { struct pl330_dmac *dmac; /* Only two at a time */ struct _pl330_req req[2]; - /* Index of the last submitted request */ + /* Index of the last enqueued request */ unsigned lstenq; + /* Index of the last submitted request or -1 if the DMA is stopped */ + int req_running; }; enum pl330_dmac_state { @@ -778,6 +769,22 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd, writel(0, regs + DBGCMD); } +/* + * Mark a _pl330_req as free. + * We do it by writing DMAEND as the first instruction + * because no valid request is going to have DMAEND as + * its first instruction to execute. + */ +static void mark_free(struct pl330_thread *thrd, int idx) +{ + struct _pl330_req *req = &thrd->req[idx]; + + _emit_END(0, req->mc_cpu); + req->mc_len = 0; + + thrd->req_running = -1; +} + static inline u32 _state(struct pl330_thread *thrd) { void __iomem *regs = thrd->dmac->pinfo->base; @@ -836,31 +843,6 @@ static inline u32 _state(struct pl330_thread *thrd) } } -/* If the request 'req' of thread 'thrd' is currently active */ -static inline bool _req_active(struct pl330_thread *thrd, - struct _pl330_req *req) -{ - void __iomem *regs = thrd->dmac->pinfo->base; - u32 buf = req->mc_bus, pc = readl(regs + CPC(thrd->id)); - - if (IS_FREE(req)) - return false; - - return (pc >= buf && pc <= buf + req->mc_len) ? true : false; -} - -/* Returns 0 if the thread is inactive, ID of active req + 1 otherwise */ -static inline unsigned _thrd_active(struct pl330_thread *thrd) -{ - if (_req_active(thrd, &thrd->req[0])) - return 1; /* First req active */ - - if (_req_active(thrd, &thrd->req[1])) - return 2; /* Second req active */ - - return 0; -} - static void _stop(struct pl330_thread *thrd) { void __iomem *regs = thrd->dmac->pinfo->base; @@ -892,17 +874,22 @@ static bool _trigger(struct pl330_thread *thrd) struct _arg_GO go; unsigned ns; u8 insn[6] = {0, 0, 0, 0, 0, 0}; + int idx; /* Return if already ACTIVE */ if (_state(thrd) != PL330_STATE_STOPPED) return true; - if (!IS_FREE(&thrd->req[1 - thrd->lstenq])) - req = &thrd->req[1 - thrd->lstenq]; - else if (!IS_FREE(&thrd->req[thrd->lstenq])) - req = &thrd->req[thrd->lstenq]; - else - req = NULL; + idx = 1 - thrd->lstenq; + if (!IS_FREE(&thrd->req[idx])) + req = &thrd->req[idx]; + else { + idx = thrd->lstenq; + if (!IS_FREE(&thrd->req[idx])) + req = &thrd->req[idx]; + else + req = NULL; + } /* Return if no request */ if (!req || !req->r) @@ -933,6 +920,8 @@ static bool _trigger(struct pl330_thread *thrd) /* Only manager can execute GO */ _execute_DBGINSN(thrd, insn, true); + thrd->req_running = idx; + return true; } @@ -1382,8 +1371,8 @@ static void pl330_dotask(unsigned long data) thrd->req[0].r = NULL; thrd->req[1].r = NULL; - MARK_FREE(&thrd->req[0]); - MARK_FREE(&thrd->req[1]); + mark_free(thrd, 0); + mark_free(thrd, 1); /* Clear the reset flag */ pl330->dmac_tbd.reset_chan &= ~(1 << i); @@ -1461,14 +1450,12 @@ int pl330_update(const struct pl330_info *pi) thrd = &pl330->channels[id]; - active = _thrd_active(thrd); - if (!active) /* Aborted */ + active = thrd->req_running; + if (active == -1) /* Aborted */ continue; - active -= 1; - rqdone = &thrd->req[active]; - MARK_FREE(rqdone); + mark_free(thrd, active); /* Get going again ASAP */ _start(thrd); @@ -1509,7 +1496,7 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op) struct pl330_thread *thrd = ch_id; struct pl330_dmac *pl330; unsigned long flags; - int ret = 0, active; + int ret = 0, active = thrd->req_running; if (!thrd || thrd->free || thrd->dmac->state == DYING) return -EINVAL; @@ -1525,28 +1512,24 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op) thrd->req[0].r = NULL; thrd->req[1].r = NULL; - MARK_FREE(&thrd->req[0]); - MARK_FREE(&thrd->req[1]); + mark_free(thrd, 0); + mark_free(thrd, 1); break; case PL330_OP_ABORT: - active = _thrd_active(thrd); - /* Make sure the channel is stopped */ _stop(thrd); /* ABORT is only for the active req */ - if (!active) + if (active == -1) break; - active--; - thrd->req[active].r = NULL; - MARK_FREE(&thrd->req[active]); + mark_free(thrd, active); /* Start the next */ case PL330_OP_START: - if (!_thrd_active(thrd) && !_start(thrd)) + if ((active == -1) && !_start(thrd)) ret = -EIO; break; @@ -1587,14 +1570,13 @@ int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus) else pstatus->faulting = false; - active = _thrd_active(thrd); + active = thrd->req_running; - if (!active) { + if (active == -1) { /* Indicate that the thread is not running */ pstatus->top_req = NULL; pstatus->wait_req = NULL; } else { - active--; pstatus->top_req = thrd->req[active].r; pstatus->wait_req = !IS_FREE(&thrd->req[1 - active]) ? thrd->req[1 - active].r : NULL; @@ -1659,9 +1641,9 @@ void *pl330_request_channel(const struct pl330_info *pi) thrd->free = false; thrd->lstenq = 1; thrd->req[0].r = NULL; - MARK_FREE(&thrd->req[0]); + mark_free(thrd, 0); thrd->req[1].r = NULL; - MARK_FREE(&thrd->req[1]); + mark_free(thrd, 1); break; } } @@ -1767,14 +1749,14 @@ static inline void _reset_thread(struct pl330_thread *thrd) thrd->req[0].mc_bus = pl330->mcode_bus + (thrd->id * pi->mcbufsz); thrd->req[0].r = NULL; - MARK_FREE(&thrd->req[0]); + mark_free(thrd, 0); thrd->req[1].mc_cpu = thrd->req[0].mc_cpu + pi->mcbufsz / 2; thrd->req[1].mc_bus = thrd->req[0].mc_bus + pi->mcbufsz / 2; thrd->req[1].r = NULL; - MARK_FREE(&thrd->req[1]); + mark_free(thrd, 1); } static int dmac_alloc_threads(struct pl330_dmac *pl330) diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index 11a4192197c..cf497ce41df 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -18,9 +18,10 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_IMX_V4_V5=y CONFIG_ARCH_MX1ADS=y CONFIG_MACH_SCB9328=y +CONFIG_MACH_APF9328=y CONFIG_MACH_MX21ADS=y CONFIG_MACH_MX25_3DS=y -CONFIG_MACH_EUKREA_CPUIMX25=y +CONFIG_MACH_EUKREA_CPUIMX25SD=y CONFIG_MACH_MX27ADS=y CONFIG_MACH_PCM038=y CONFIG_MACH_CPUIMX27=y @@ -72,17 +73,16 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_MXC=y CONFIG_MTD_UBI=y CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_SMC91X=y CONFIG_DM9000=y +CONFIG_SMC91X=y CONFIG_SMC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_SMSC_PHY=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set @@ -100,6 +100,7 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_IMX=y CONFIG_SPI=y CONFIG_SPI_IMX=y +CONFIG_SPI_SPIDEV=y CONFIG_W1=y CONFIG_W1_MASTER_MXC=y CONFIG_W1_SLAVE_THERM=y @@ -139,6 +140,7 @@ CONFIG_MMC=y CONFIG_MMC_MXC=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y CONFIG_LEDS_MC13783=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3d0c6fb74ae..e8e8fe505df 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -183,7 +183,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); leds_event(led_idle_start); while (!need_resched()) { #ifdef CONFIG_HOTPLUG_CPU @@ -213,7 +214,8 @@ void cpu_idle(void) } } leds_event(led_idle_end); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8fc2c8fcbdc..c0b59bff6be 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -52,6 +52,7 @@ #include <asm/mach/time.h> #include <asm/traps.h> #include <asm/unwind.h> +#include <asm/memblock.h> #if defined(CONFIG_DEPRECATED_PARAM_STRUCT) #include "compat.h" diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 3f03fe0c326..00df012c467 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -160,12 +160,12 @@ static const struct unwind_idx *unwind_find_origin( const struct unwind_idx *start, const struct unwind_idx *stop) { pr_debug("%s(%p, %p)\n", __func__, start, stop); - while (start < stop - 1) { + while (start < stop) { const struct unwind_idx *mid = start + ((stop - start) >> 1); if (mid->addr_offset >= 0x40000000) /* negative offset */ - start = mid; + start = mid + 1; else /* positive offset */ stop = mid; diff --git a/arch/arm/mach-exynos/cpu.c b/arch/arm/mach-exynos/cpu.c index 90ec247f3b3..cc8d4bd6d0f 100644 --- a/arch/arm/mach-exynos/cpu.c +++ b/arch/arm/mach-exynos/cpu.c @@ -111,11 +111,6 @@ static struct map_desc exynos4_iodesc[] __initdata = { .length = SZ_4K, .type = MT_DEVICE, }, { - .virtual = (unsigned long)S5P_VA_SROMC, - .pfn = __phys_to_pfn(EXYNOS4_PA_SROMC), - .length = SZ_4K, - .type = MT_DEVICE, - }, { .virtual = (unsigned long)S3C_VA_USB_HSPHY, .pfn = __phys_to_pfn(EXYNOS4_PA_HSPHY), .length = SZ_4K, diff --git a/arch/arm/mach-exynos/mct.c b/arch/arm/mach-exynos/mct.c index 97343df8f13..85b5527d091 100644 --- a/arch/arm/mach-exynos/mct.c +++ b/arch/arm/mach-exynos/mct.c @@ -44,8 +44,6 @@ struct mct_clock_event_device { char name[10]; }; -static DEFINE_PER_CPU(struct mct_clock_event_device, percpu_mct_tick); - static void exynos4_mct_write(unsigned int value, void *addr) { void __iomem *stat_addr; @@ -264,6 +262,9 @@ static void exynos4_clockevent_init(void) } #ifdef CONFIG_LOCAL_TIMERS + +static DEFINE_PER_CPU(struct mct_clock_event_device, percpu_mct_tick); + /* Clock event handling */ static void exynos4_mct_tick_stop(struct mct_clock_event_device *mevt) { @@ -428,9 +429,13 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt) void local_timer_stop(struct clock_event_device *evt) { + unsigned int cpu = smp_processor_id(); evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); if (mct_int_type == MCT_INT_SPI) - disable_irq(evt->irq); + if (cpu == 0) + remove_irq(evt->irq, &mct_tick0_event_irq); + else + remove_irq(evt->irq, &mct_tick1_event_irq); else disable_percpu_irq(IRQ_MCT_LOCALTIMER); } @@ -443,6 +448,7 @@ static void __init exynos4_timer_resources(void) clk_rate = clk_get_rate(mct_clk); +#ifdef CONFIG_LOCAL_TIMERS if (mct_int_type == MCT_INT_PPI) { int err; @@ -452,6 +458,7 @@ static void __init exynos4_timer_resources(void) WARN(err, "MCT: can't request IRQ %d (%d)\n", IRQ_MCT_LOCALTIMER, err); } +#endif /* CONFIG_LOCAL_TIMERS */ } static void __init exynos4_timer_init(void) diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index c44aa974e79..0e6f1af260b 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -132,7 +132,7 @@ config MACH_MX25_3DS select IMX_HAVE_PLATFORM_MXC_NAND select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX -config MACH_EUKREA_CPUIMX25 +config MACH_EUKREA_CPUIMX25SD bool "Support Eukrea CPUIMX25 Platform" select SOC_IMX25 select IMX_HAVE_PLATFORM_FLEXCAN @@ -148,7 +148,7 @@ config MACH_EUKREA_CPUIMX25 choice prompt "Baseboard" - depends on MACH_EUKREA_CPUIMX25 + depends on MACH_EUKREA_CPUIMX25SD default MACH_EUKREA_MBIMXSD25_BASEBOARD config MACH_EUKREA_MBIMXSD25_BASEBOARD @@ -542,7 +542,7 @@ config MACH_MX35_3DS Include support for MX35PDK platform. This includes specific configurations for the board and its peripherals. -config MACH_EUKREA_CPUIMX35 +config MACH_EUKREA_CPUIMX35SD bool "Support Eukrea CPUIMX35 Platform" select SOC_IMX35 select IMX_HAVE_PLATFORM_FLEXCAN @@ -560,7 +560,7 @@ config MACH_EUKREA_CPUIMX35 choice prompt "Baseboard" - depends on MACH_EUKREA_CPUIMX35 + depends on MACH_EUKREA_CPUIMX35SD default MACH_EUKREA_MBIMXSD35_BASEBOARD config MACH_EUKREA_MBIMXSD35_BASEBOARD diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index aba73214c2a..d97f409ce98 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -24,7 +24,7 @@ obj-$(CONFIG_MACH_MX21ADS) += mach-mx21ads.o # i.MX25 based machines obj-$(CONFIG_MACH_MX25_3DS) += mach-mx25_3ds.o -obj-$(CONFIG_MACH_EUKREA_CPUIMX25) += mach-eukrea_cpuimx25.o +obj-$(CONFIG_MACH_EUKREA_CPUIMX25SD) += mach-eukrea_cpuimx25.o obj-$(CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD) += eukrea_mbimxsd25-baseboard.o # i.MX27 based machines @@ -57,7 +57,7 @@ obj-$(CONFIG_MACH_BUG) += mach-bug.o # i.MX35 based machines obj-$(CONFIG_MACH_PCM043) += mach-pcm043.o obj-$(CONFIG_MACH_MX35_3DS) += mach-mx35_3ds.o -obj-$(CONFIG_MACH_EUKREA_CPUIMX35) += mach-cpuimx35.o +obj-$(CONFIG_MACH_EUKREA_CPUIMX35SD) += mach-cpuimx35.o obj-$(CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD) += eukrea_mbimxsd35-baseboard.o obj-$(CONFIG_MACH_VPR200) += mach-vpr200.o diff --git a/arch/arm/mach-imx/clock-imx35.c b/arch/arm/mach-imx/clock-imx35.c index 8116f119517..ac8238caecb 100644 --- a/arch/arm/mach-imx/clock-imx35.c +++ b/arch/arm/mach-imx/clock-imx35.c @@ -507,7 +507,7 @@ static struct clk_lookup lookups[] = { int __init mx35_clocks_init() { - unsigned int cgr2 = 3 << 26, cgr3 = 0; + unsigned int cgr2 = 3 << 26; #if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC) cgr2 |= 3 << 16; @@ -521,6 +521,12 @@ int __init mx35_clocks_init() __raw_writel((3 << 18), CCM_BASE + CCM_CGR0); __raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16), CCM_BASE + CCM_CGR1); + __raw_writel(cgr2, CCM_BASE + CCM_CGR2); + __raw_writel(0, CCM_BASE + CCM_CGR3); + + clk_enable(&iim_clk); + imx_print_silicon_rev("i.MX35", mx35_revision()); + clk_disable(&iim_clk); /* * Check if we came up in internal boot mode. If yes, we need some @@ -529,17 +535,11 @@ int __init mx35_clocks_init() */ if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) { /* Additionally turn on UART1, SCC, and IIM clocks */ - cgr2 |= 3 << 16 | 3 << 4; - cgr3 |= 3 << 2; + clk_enable(&iim_clk); + clk_enable(&uart1_clk); + clk_enable(&scc_clk); } - __raw_writel(cgr2, CCM_BASE + CCM_CGR2); - __raw_writel(cgr3, CCM_BASE + CCM_CGR3); - - clk_enable(&iim_clk); - imx_print_silicon_rev("i.MX35", mx35_revision()); - clk_disable(&iim_clk); - #ifdef CONFIG_MXC_USE_EPIT epit_timer_init(&epit1_clk, MX35_IO_ADDRESS(MX35_EPIT1_BASE_ADDR), MX35_INT_EPIT1); diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c index 66af2e8f7e5..362aae78060 100644 --- a/arch/arm/mach-imx/mach-cpuimx35.c +++ b/arch/arm/mach-imx/mach-cpuimx35.c @@ -53,12 +53,18 @@ static const struct imxi2c_platform_data .bitrate = 100000, }; +#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2) +static int tsc2007_get_pendown_state(void) +{ + return !gpio_get_value(TSC2007_IRQGPIO); +} + static struct tsc2007_platform_data tsc2007_info = { .model = 2007, .x_plate_ohms = 180, + .get_pendown_state = tsc2007_get_pendown_state, }; -#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2) static struct i2c_board_info eukrea_cpuimx35_i2c_devices[] = { { I2C_BOARD_INFO("pcf8563", 0x51), diff --git a/arch/arm/mach-mx5/board-mx51_babbage.c b/arch/arm/mach-mx5/board-mx51_babbage.c index 5c837603ff0..24994bb5214 100644 --- a/arch/arm/mach-mx5/board-mx51_babbage.c +++ b/arch/arm/mach-mx5/board-mx51_babbage.c @@ -362,7 +362,7 @@ static void __init mx51_babbage_init(void) { iomux_v3_cfg_t usbh1stp = MX51_PAD_USBH1_STP__USBH1_STP; iomux_v3_cfg_t power_key = NEW_PAD_CTRL(MX51_PAD_EIM_A27__GPIO2_21, - PAD_CTL_SRE_FAST | PAD_CTL_DSE_HIGH | PAD_CTL_PUS_100K_UP); + PAD_CTL_SRE_FAST | PAD_CTL_DSE_HIGH); imx51_soc_init(); diff --git a/arch/arm/mach-mx5/board-mx53_evk.c b/arch/arm/mach-mx5/board-mx53_evk.c index 6bea31ab8f8..64bbfcea6f3 100644 --- a/arch/arm/mach-mx5/board-mx53_evk.c +++ b/arch/arm/mach-mx5/board-mx53_evk.c @@ -106,7 +106,7 @@ static inline void mx53_evk_fec_reset(void) gpio_set_value(MX53_EVK_FEC_PHY_RST, 1); } -static struct fec_platform_data mx53_evk_fec_pdata = { +static const struct fec_platform_data mx53_evk_fec_pdata __initconst = { .phy = PHY_INTERFACE_MODE_RMII, }; diff --git a/arch/arm/mach-mx5/board-mx53_loco.c b/arch/arm/mach-mx5/board-mx53_loco.c index 7678f7734db..237bdecd933 100644 --- a/arch/arm/mach-mx5/board-mx53_loco.c +++ b/arch/arm/mach-mx5/board-mx53_loco.c @@ -242,7 +242,7 @@ static inline void mx53_loco_fec_reset(void) gpio_set_value(LOCO_FEC_PHY_RST, 1); } -static struct fec_platform_data mx53_loco_fec_data = { +static const struct fec_platform_data mx53_loco_fec_data __initconst = { .phy = PHY_INTERFACE_MODE_RMII, }; diff --git a/arch/arm/mach-mx5/board-mx53_smd.c b/arch/arm/mach-mx5/board-mx53_smd.c index 59c0845eb4a..d42132a80e8 100644 --- a/arch/arm/mach-mx5/board-mx53_smd.c +++ b/arch/arm/mach-mx5/board-mx53_smd.c @@ -104,7 +104,7 @@ static inline void mx53_smd_fec_reset(void) gpio_set_value(SMD_FEC_PHY_RST, 1); } -static struct fec_platform_data mx53_smd_fec_data = { +static const struct fec_platform_data mx53_smd_fec_data __initconst = { .phy = PHY_INTERFACE_MODE_RMII, }; diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index ba1aa07bdb2..c15c5c9c908 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -193,7 +193,7 @@ static struct platform_device rx51_charger_device = { static void __init rx51_charger_init(void) { WARN_ON(gpio_request_one(RX51_USB_TRANSCEIVER_RST_GPIO, - GPIOF_OUT_INIT_LOW, "isp1704_reset")); + GPIOF_OUT_INIT_HIGH, "isp1704_reset")); platform_device_register(&rx51_charger_device); } diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c index 292eee3be15..28fcb27005d 100644 --- a/arch/arm/mach-omap2/mcbsp.c +++ b/arch/arm/mach-omap2/mcbsp.c @@ -145,6 +145,9 @@ static int omap_init_mcbsp(struct omap_hwmod *oh, void *unused) pdata->reg_size = 4; pdata->has_ccr = true; } + pdata->set_clk_src = omap2_mcbsp_set_clk_src; + if (id == 1) + pdata->mux_signal = omap2_mcbsp1_mux_rx_clk; if (oh->class->rev == MCBSP_CONFIG_TYPE3) { if (id == 2) @@ -174,9 +177,6 @@ static int omap_init_mcbsp(struct omap_hwmod *oh, void *unused) name, oh->name); return PTR_ERR(pdev); } - pdata->set_clk_src = omap2_mcbsp_set_clk_src; - if (id == 1) - pdata->mux_signal = omap2_mcbsp1_mux_rx_clk; omap_mcbsp_count++; return 0; } diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 7f8915ad509..eef43e2e163 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3247,18 +3247,14 @@ static __initdata struct omap_hwmod *omap3xxx_hwmods[] = { /* 3430ES1-only hwmods */ static __initdata struct omap_hwmod *omap3430es1_hwmods[] = { - &omap3xxx_iva_hwmod, &omap3430es1_dss_core_hwmod, - &omap3xxx_mailbox_hwmod, NULL }; /* 3430ES2+-only hwmods */ static __initdata struct omap_hwmod *omap3430es2plus_hwmods[] = { - &omap3xxx_iva_hwmod, &omap3xxx_dss_core_hwmod, &omap3xxx_usbhsotg_hwmod, - &omap3xxx_mailbox_hwmod, NULL }; diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c index a9106c39239..8662ef6e568 100644 --- a/arch/arm/mach-s5pv210/mach-smdkv210.c +++ b/arch/arm/mach-s5pv210/mach-smdkv210.c @@ -273,6 +273,7 @@ static struct samsung_bl_gpio_info smdkv210_bl_gpio_info = { static struct platform_pwm_backlight_data smdkv210_bl_data = { .pwm_id = 3, + .pwm_period_ns = 1000, }; static void __init smdkv210_map_io(void) diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c index b862e9f81e3..7119b87cbfa 100644 --- a/arch/arm/mach-shmobile/board-ag5evm.c +++ b/arch/arm/mach-shmobile/board-ag5evm.c @@ -607,6 +607,7 @@ struct sys_timer ag5evm_timer = { MACHINE_START(AG5EVM, "ag5evm") .map_io = ag5evm_map_io, + .nr_irqs = NR_IRQS_LEGACY, .init_irq = sh73a0_init_irq, .handle_irq = shmobile_handle_irq_gic, .init_machine = ag5evm_init, diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c index bd9a78424d6..f44150b5ae4 100644 --- a/arch/arm/mach-shmobile/board-kota2.c +++ b/arch/arm/mach-shmobile/board-kota2.c @@ -33,6 +33,7 @@ #include <linux/input/sh_keysc.h> #include <linux/gpio_keys.h> #include <linux/leds.h> +#include <linux/platform_data/leds-renesas-tpu.h> #include <linux/mmc/host.h> #include <linux/mmc/sh_mmcif.h> #include <linux/mfd/tmio.h> @@ -56,7 +57,7 @@ static struct resource smsc9220_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = gic_spi(33), /* PINTA2 @ PORT144 */ + .start = SH73A0_PINT0_IRQ(2), /* PINTA2 */ .flags = IORESOURCE_IRQ, }, }; @@ -157,10 +158,6 @@ static struct platform_device gpio_keys_device = { #define GPIO_LED(n, g) { .name = n, .gpio = g } static struct gpio_led gpio_leds[] = { - GPIO_LED("V2513", GPIO_PORT153), /* PORT153 [TPU1T02] -> V2513 */ - GPIO_LED("V2514", GPIO_PORT199), /* PORT199 [TPU4TO1] -> V2514 */ - GPIO_LED("V2515", GPIO_PORT197), /* PORT197 [TPU2TO1] -> V2515 */ - GPIO_LED("KEYLED", GPIO_PORT163), /* PORT163 [TPU3TO0] -> KEYLED */ GPIO_LED("G", GPIO_PORT20), /* PORT20 [GPO0] -> LED7 -> "G" */ GPIO_LED("H", GPIO_PORT21), /* PORT21 [GPO1] -> LED8 -> "H" */ GPIO_LED("J", GPIO_PORT22), /* PORT22 [GPO2] -> LED9 -> "J" */ @@ -179,6 +176,119 @@ static struct platform_device gpio_leds_device = { }, }; +/* TPU LED */ +static struct led_renesas_tpu_config led_renesas_tpu12_pdata = { + .name = "V2513", + .pin_gpio_fn = GPIO_FN_TPU1TO2, + .pin_gpio = GPIO_PORT153, + .channel_offset = 0x90, + .timer_bit = 2, + .max_brightness = 1000, +}; + +static struct resource tpu12_resources[] = { + [0] = { + .name = "TPU12", + .start = 0xe6610090, + .end = 0xe66100b5, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device leds_tpu12_device = { + .name = "leds-renesas-tpu", + .id = 12, + .dev = { + .platform_data = &led_renesas_tpu12_pdata, + }, + .num_resources = ARRAY_SIZE(tpu12_resources), + .resource = tpu12_resources, +}; + +static struct led_renesas_tpu_config led_renesas_tpu41_pdata = { + .name = "V2514", + .pin_gpio_fn = GPIO_FN_TPU4TO1, + .pin_gpio = GPIO_PORT199, + .channel_offset = 0x50, + .timer_bit = 1, + .max_brightness = 1000, +}; + +static struct resource tpu41_resources[] = { + [0] = { + .name = "TPU41", + .start = 0xe6640050, + .end = 0xe6640075, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device leds_tpu41_device = { + .name = "leds-renesas-tpu", + .id = 41, + .dev = { + .platform_data = &led_renesas_tpu41_pdata, + }, + .num_resources = ARRAY_SIZE(tpu41_resources), + .resource = tpu41_resources, +}; + +static struct led_renesas_tpu_config led_renesas_tpu21_pdata = { + .name = "V2515", + .pin_gpio_fn = GPIO_FN_TPU2TO1, + .pin_gpio = GPIO_PORT197, + .channel_offset = 0x50, + .timer_bit = 1, + .max_brightness = 1000, +}; + +static struct resource tpu21_resources[] = { + [0] = { + .name = "TPU21", + .start = 0xe6620050, + .end = 0xe6620075, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device leds_tpu21_device = { + .name = "leds-renesas-tpu", + .id = 21, + .dev = { + .platform_data = &led_renesas_tpu21_pdata, + }, + .num_resources = ARRAY_SIZE(tpu21_resources), + .resource = tpu21_resources, +}; + +static struct led_renesas_tpu_config led_renesas_tpu30_pdata = { + .name = "KEYLED", + .pin_gpio_fn = GPIO_FN_TPU3TO0, + .pin_gpio = GPIO_PORT163, + .channel_offset = 0x10, + .timer_bit = 0, + .max_brightness = 1000, +}; + +static struct resource tpu30_resources[] = { + [0] = { + .name = "TPU30", + .start = 0xe6630010, + .end = 0xe6630035, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device leds_tpu30_device = { + .name = "leds-renesas-tpu", + .id = 30, + .dev = { + .platform_data = &led_renesas_tpu30_pdata, + }, + .num_resources = ARRAY_SIZE(tpu30_resources), + .resource = tpu30_resources, +}; + /* MMCIF */ static struct resource mmcif_resources[] = { [0] = { @@ -291,6 +401,10 @@ static struct platform_device *kota2_devices[] __initdata = { &keysc_device, &gpio_keys_device, &gpio_leds_device, + &leds_tpu12_device, + &leds_tpu41_device, + &leds_tpu21_device, + &leds_tpu30_device, &mmcif_device, &sdhi0_device, &sdhi1_device, @@ -317,18 +431,6 @@ static void __init kota2_map_io(void) shmobile_setup_console(); } -#define PINTER0A 0xe69000a0 -#define PINTCR0A 0xe69000b0 - -void __init kota2_init_irq(void) -{ - sh73a0_init_irq(); - - /* setup PINT: enable PINTA2 as active low */ - __raw_writel(1 << 29, PINTER0A); - __raw_writew(2 << 10, PINTCR0A); -} - static void __init kota2_init(void) { sh73a0_pinmux_init(); @@ -447,7 +549,8 @@ struct sys_timer kota2_timer = { MACHINE_START(KOTA2, "kota2") .map_io = kota2_map_io, - .init_irq = kota2_init_irq, + .nr_irqs = NR_IRQS_LEGACY, + .init_irq = sh73a0_init_irq, .handle_irq = shmobile_handle_irq_gic, .init_machine = kota2_init, .timer = &kota2_timer, diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 61a846bb30f..1370a89ca35 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -113,6 +113,12 @@ static struct clk main_clk = { .ops = &main_clk_ops, }; +/* Divide Main clock by two */ +static struct clk main_div2_clk = { + .ops = &div2_clk_ops, + .parent = &main_clk, +}; + /* PLL0, PLL1, PLL2, PLL3 */ static unsigned long pll_recalc(struct clk *clk) { @@ -181,6 +187,7 @@ static struct clk *main_clks[] = { &extal1_div2_clk, &extal2_div2_clk, &main_clk, + &main_div2_clk, &pll0_clk, &pll1_clk, &pll2_clk, @@ -243,7 +250,7 @@ static struct clk div6_clks[DIV6_NR] = { [DIV6_VCK1] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR1, 0), [DIV6_VCK2] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR2, 0), [DIV6_VCK3] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR3, 0), - [DIV6_ZB1] = SH_CLK_DIV6(&pll1_div2_clk, ZBCKCR, 0), + [DIV6_ZB1] = SH_CLK_DIV6(&pll1_div2_clk, ZBCKCR, CLK_ENABLE_ON_INIT), [DIV6_FLCTL] = SH_CLK_DIV6(&pll1_div2_clk, FLCKCR, 0), [DIV6_SDHI0] = SH_CLK_DIV6(&pll1_div2_clk, SD0CKCR, 0), [DIV6_SDHI1] = SH_CLK_DIV6(&pll1_div2_clk, SD1CKCR, 0), @@ -268,6 +275,7 @@ enum { MSTP001, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP331, MSTP329, MSTP325, MSTP323, MSTP318, MSTP314, MSTP313, MSTP312, MSTP311, + MSTP303, MSTP302, MSTP301, MSTP300, MSTP411, MSTP410, MSTP403, MSTP_NR }; @@ -301,6 +309,10 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP313] = MSTP(&div6_clks[DIV6_SDHI1], SMSTPCR3, 13, 0), /* SDHI1 */ [MSTP312] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 12, 0), /* MMCIF0 */ [MSTP311] = MSTP(&div6_clks[DIV6_SDHI2], SMSTPCR3, 11, 0), /* SDHI2 */ + [MSTP303] = MSTP(&main_div2_clk, SMSTPCR3, 3, 0), /* TPU1 */ + [MSTP302] = MSTP(&main_div2_clk, SMSTPCR3, 2, 0), /* TPU2 */ + [MSTP301] = MSTP(&main_div2_clk, SMSTPCR3, 1, 0), /* TPU3 */ + [MSTP300] = MSTP(&main_div2_clk, SMSTPCR3, 0, 0), /* TPU4 */ [MSTP411] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 11, 0), /* IIC3 */ [MSTP410] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 10, 0), /* IIC4 */ [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */ @@ -350,6 +362,10 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMCIF0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.2", &mstp_clks[MSTP311]), /* SDHI2 */ + CLKDEV_DEV_ID("leds-renesas-tpu.12", &mstp_clks[MSTP303]), /* TPU1 */ + CLKDEV_DEV_ID("leds-renesas-tpu.21", &mstp_clks[MSTP302]), /* TPU2 */ + CLKDEV_DEV_ID("leds-renesas-tpu.30", &mstp_clks[MSTP301]), /* TPU3 */ + CLKDEV_DEV_ID("leds-renesas-tpu.41", &mstp_clks[MSTP300]), /* TPU4 */ CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* I2C3 */ CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* I2C4 */ CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index fbdd12ea3a5..7c38474e533 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -32,6 +32,7 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/memblock.h> #include "mm.h" @@ -332,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); - memblock_init(); for (i = 0; i < mi->nr_banks; i++) memblock_add(mi->bank[i].start, mi->bank[i].size); @@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); } diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 2c559ac3814..e70a73731ea 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -363,11 +363,13 @@ __v7_setup: orreq r10, r10, #1 << 6 @ set bit #6 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif -#ifdef CONFIG_ARM_ERRATA_751472 - cmp r6, #0x30 @ present prior to r3p0 +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register orrlt r10, r10, #1 << 11 @ set bit #11 mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +1: #endif 3: mov r10, #0 diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index c074e66ad22..4e0a371630b 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -116,7 +116,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) return oprofile_perf_init(ops); } -void __exit oprofile_arch_exit(void) +void oprofile_arch_exit(void) { oprofile_perf_exit(); } diff --git a/arch/arm/plat-mxc/cpufreq.c b/arch/arm/plat-mxc/cpufreq.c index 74aac96cda2..73db34bf588 100644 --- a/arch/arm/plat-mxc/cpufreq.c +++ b/arch/arm/plat-mxc/cpufreq.c @@ -17,6 +17,7 @@ * the CPU clock speed on the fly. */ +#include <linux/module.h> #include <linux/cpufreq.h> #include <linux/clk.h> #include <linux/err.h> @@ -97,7 +98,7 @@ static int mxc_set_target(struct cpufreq_policy *policy, return ret; } -static int __init mxc_cpufreq_init(struct cpufreq_policy *policy) +static int mxc_cpufreq_init(struct cpufreq_policy *policy) { int ret; int i; diff --git a/arch/arm/plat-mxc/include/mach/uncompress.h b/arch/arm/plat-mxc/include/mach/uncompress.h index 88fd4045256..477971b0093 100644 --- a/arch/arm/plat-mxc/include/mach/uncompress.h +++ b/arch/arm/plat-mxc/include/mach/uncompress.h @@ -98,6 +98,7 @@ static __inline__ void __arch_decomp_setup(unsigned long arch_id) case MACH_TYPE_PCM043: case MACH_TYPE_LILLY1131: case MACH_TYPE_VPR200: + case MACH_TYPE_EUKREA_CPUIMX35SD: uart_base = MX3X_UART1_BASE_ADDR; break; case MACH_TYPE_MAGX_ZN5: diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c index 42d74ea5908..e032717f7d0 100644 --- a/arch/arm/plat-mxc/pwm.c +++ b/arch/arm/plat-mxc/pwm.c @@ -32,6 +32,9 @@ #define MX3_PWMSAR 0x0C /* PWM Sample Register */ #define MX3_PWMPR 0x10 /* PWM Period Register */ #define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4) +#define MX3_PWMCR_DOZEEN (1 << 24) +#define MX3_PWMCR_WAITEN (1 << 23) +#define MX3_PWMCR_DBGEN (1 << 22) #define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16) #define MX3_PWMCR_CLKSRC_IPG (1 << 16) #define MX3_PWMCR_EN (1 << 0) @@ -74,10 +77,21 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) do_div(c, period_ns); duty_cycles = c; + /* + * according to imx pwm RM, the real period value should be + * PERIOD value in PWMPR plus 2. + */ + if (period_cycles > 2) + period_cycles -= 2; + else + period_cycles = 0; + writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR); writel(period_cycles, pwm->mmio_base + MX3_PWMPR); - cr = MX3_PWMCR_PRESCALER(prescale) | MX3_PWMCR_EN; + cr = MX3_PWMCR_PRESCALER(prescale) | + MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN | + MX3_PWMCR_DBGEN | MX3_PWMCR_EN; if (cpu_is_mx25()) cr |= MX3_PWMCR_CLKSRC_IPG; diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 41ab97ebe4c..10d16088813 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -384,12 +384,16 @@ void __init orion_gpio_init(int gpio_base, int ngpio, struct orion_gpio_chip *ochip; struct irq_chip_generic *gc; struct irq_chip_type *ct; + char gc_label[16]; if (orion_gpio_chip_count == ARRAY_SIZE(orion_gpio_chips)) return; + snprintf(gc_label, sizeof(gc_label), "orion_gpio%d", + orion_gpio_chip_count); + ochip = orion_gpio_chips + orion_gpio_chip_count; - ochip->chip.label = "orion_gpio"; + ochip->chip.label = kstrdup(gc_label, GFP_KERNEL); ochip->chip.request = orion_gpio_request; ochip->chip.direction_input = orion_gpio_direction_input; ochip->chip.get = orion_gpio_get; diff --git a/arch/arm/plat-samsung/dev-backlight.c b/arch/arm/plat-samsung/dev-backlight.c index e657305644c..a976c023b28 100644 --- a/arch/arm/plat-samsung/dev-backlight.c +++ b/arch/arm/plat-samsung/dev-backlight.c @@ -15,7 +15,6 @@ #include <linux/slab.h> #include <linux/io.h> #include <linux/pwm_backlight.h> -#include <linux/slab.h> #include <plat/devs.h> #include <plat/gpio-cfg.h> diff --git a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h index dac4760c0f0..95509d8eb14 100644 --- a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h +++ b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h @@ -202,14 +202,6 @@ extern int s3c_plltab_register(struct cpufreq_frequency_table *plls, extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void); extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void); -extern void s3c2410_iotiming_debugfs(struct seq_file *seq, - struct s3c_cpufreq_config *cfg, - union s3c_iobank *iob); - -extern void s3c2412_iotiming_debugfs(struct seq_file *seq, - struct s3c_cpufreq_config *cfg, - union s3c_iobank *iob); - #ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS #define s3c_cpufreq_debugfs_call(x) x #else @@ -226,6 +218,10 @@ extern void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg); extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg); #ifdef CONFIG_S3C2410_IOTIMING +extern void s3c2410_iotiming_debugfs(struct seq_file *seq, + struct s3c_cpufreq_config *cfg, + union s3c_iobank *iob); + extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg, struct s3c_iotimings *iot); @@ -235,6 +231,7 @@ extern int s3c2410_iotiming_get(struct s3c_cpufreq_config *cfg, extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg, struct s3c_iotimings *iot); #else +#define s3c2410_iotiming_debugfs NULL #define s3c2410_iotiming_calc NULL #define s3c2410_iotiming_get NULL #define s3c2410_iotiming_set NULL @@ -242,8 +239,10 @@ extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg, /* S3C2412 compatible routines */ -extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, - struct s3c_iotimings *timings); +#ifdef CONFIG_S3C2412_IOTIMING +extern void s3c2412_iotiming_debugfs(struct seq_file *seq, + struct s3c_cpufreq_config *cfg, + union s3c_iobank *iob); extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, struct s3c_iotimings *timings); @@ -253,6 +252,12 @@ extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg, extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg, struct s3c_iotimings *iot); +#else +#define s3c2412_iotiming_debugfs NULL +#define s3c2412_iotiming_calc NULL +#define s3c2412_iotiming_get NULL +#define s3c2412_iotiming_set NULL +#endif /* CONFIG_S3C2412_IOTIMING */ #ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUG #define s3c_freq_dbg(x...) printk(KERN_INFO x) diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index ef5a2a08fcc..ea339575032 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -34,10 +34,12 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) cpu_idle_sleep(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 6a80a9e9fc4..8dd0416673c 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -88,10 +88,12 @@ void cpu_idle(void) #endif if (!idle) idle = default_idle; - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 27489b6dd53..3b7a7c48378 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -23,6 +23,9 @@ config IA64 select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG select HAVE_GENERIC_HARDIRQS + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select IRQ_PER_CPU @@ -474,9 +477,6 @@ config NODES_SHIFT MAX_NUMNODES will be 2^(This value). If in doubt, use the default. -config ARCH_POPULATES_NODE_MAP - def_bool y - # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. # VIRTUAL_MEM_MAP has been retained for historical reasons. config VIRTUAL_MEM_MAP diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 461e52f0277..3deac956d32 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t; ((__force u64)(__ct) / NSEC_PER_USEC) #define usecs_to_cputime(__usecs) \ (__force cputime_t)((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) \ + (__force cputime64_t)((__usecs) * NSEC_PER_USEC) /* * Convert cputime <-> seconds diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index f114a3b14c6..1516d1dc11f 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -16,6 +16,7 @@ */ #include <linux/bootmem.h> #include <linux/efi.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/nmi.h> #include <linux/swap.h> @@ -348,7 +349,7 @@ paging_init (void) printk("Virtual mem_map starts at 0x%p\n", mem_map); } #else /* !CONFIG_VIRTUAL_MEM_MAP */ - add_active_range(0, 0, max_low_pfn); + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); free_area_init_nodes(max_zone_pfns); #endif /* !CONFIG_VIRTUAL_MEM_MAP */ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 00cb0e26c64..13df239dbed 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -10,6 +10,7 @@ #include <linux/bootmem.h> #include <linux/efi.h> #include <linux/elf.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/module.h> @@ -557,8 +558,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid) #endif if (start < end) - add_active_range(nid, __pa(start) >> PAGE_SHIFT, - __pa(end) >> PAGE_SHIFT); + memblock_add_node(__pa(start), end - start, nid); return 0; } diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h deleted file mode 100644 index 20a8e257c77..00000000000 --- a/arch/microblaze/include/asm/memblock.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (C) 2008 Michal Simek <monstr@monstr.eu> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef _ASM_MICROBLAZE_MEMBLOCK_H -#define _ASM_MICROBLAZE_MEMBLOCK_H - -#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */ - - diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 95cc295976a..7dcb5bfffb7 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -103,10 +103,12 @@ void cpu_idle(void) if (!idle) idle = default_idle; - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 977484add21..80d314e8190 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -122,7 +122,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - memblock_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory, NULL); @@ -130,7 +129,7 @@ void __init early_init_devtree(void *params) strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); - memblock_analyze(); + memblock_allow_resize(); pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size()); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d46f1da18a3..9c652eb68aa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -25,6 +25,9 @@ config MIPS select GENERIC_IRQ_SHOW select HAVE_ARCH_JUMP_LABEL select IRQ_FORCED_THREADING + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK menu "Machine selection" @@ -2064,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SPARSEMEM_ENABLE bool select SPARSEMEM_STATIC diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c47f96e453c..7955409051c 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -56,7 +56,8 @@ void __noreturn cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && cpu_online(cpu)) { #ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); @@ -77,7 +78,8 @@ void __noreturn cpu_idle(void) system_state == SYSTEM_BOOTING)) play_dead(); #endif - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 84af26ab221..b1cb8f87d7b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -14,6 +14,7 @@ #include <linux/ioport.h> #include <linux/export.h> #include <linux/screen_info.h> +#include <linux/memblock.h> #include <linux/bootmem.h> #include <linux/initrd.h> #include <linux/root_dev.h> @@ -352,7 +353,7 @@ static void __init bootmem_init(void) continue; #endif - add_active_range(0, start, end); + memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0); } /* diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index bc1297109cc..b105eca3c02 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -12,6 +12,7 @@ */ #include <linux/init.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/module.h> @@ -381,8 +382,8 @@ static void __init szmem(void) continue; } num_physpages += slot_psize; - add_active_range(node, slot_getbasepfn(node, slot), - slot_getbasepfn(node, slot) + slot_psize); + memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)), + PFN_PHYS(slot_psize), node); } } } diff --git a/arch/openrisc/include/asm/memblock.h b/arch/openrisc/include/asm/memblock.h deleted file mode 100644 index bbe5a1c788c..00000000000 --- a/arch/openrisc/include/asm/memblock.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * OpenRISC Linux - * - * Linux architectural port borrowing liberally from similar works of - * others. All original copyrights apply as per the original source - * declaration. - * - * OpenRISC implementation: - * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com> - * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se> - * et al. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __ASM_OPENRISC_MEMBLOCK_H -#define __ASM_OPENRISC_MEMBLOCK_H - -/* empty */ - -#endif /* __ASM_OPENRISC_MEMBLOCK_H */ diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c index d5bc5f813e8..e5fc7887783 100644 --- a/arch/openrisc/kernel/idle.c +++ b/arch/openrisc/kernel/idle.c @@ -51,7 +51,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -69,7 +70,8 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); } - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c index 1bb58ba89af..3d4478f6c94 100644 --- a/arch/openrisc/kernel/prom.c +++ b/arch/openrisc/kernel/prom.c @@ -76,14 +76,13 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - memblock_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory, NULL); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); - memblock_analyze(); + memblock_allow_resize(); /* We must copy the flattend device tree from init memory to regular * memory because the device tree references the strings in it diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 951e18f5335..ead0bc68439 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -117,6 +117,7 @@ config PPC select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select USE_GENERIC_SMP_HELPERS if SMP @@ -421,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT def_bool y depends on (SMP && PPC_PSERIES) || PPC_PS3 -config ARCH_POPULATES_NODE_MAP - def_bool y - config SYS_SUPPORTS_HUGETLBFS bool diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index e94935c5201..6ec1c380a4d 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -134,6 +134,8 @@ static inline cputime_t usecs_to_cputime(const unsigned long us) return (__force cputime_t) ct; } +#define usecs_to_cputime64(us) usecs_to_cputime(us) + /* * Convert cputime <-> seconds */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index d4df013ad77..69c7377d207 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -381,39 +381,6 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) } #endif -static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, - unsigned long pte_index) -{ - unsigned long rb, va_low; - - rb = (v & ~0x7fUL) << 16; /* AVA field */ - va_low = pte_index >> 3; - if (v & HPTE_V_SECONDARY) - va_low = ~va_low; - /* xor vsid from AVA */ - if (!(v & HPTE_V_1TB_SEG)) - va_low ^= v >> 12; - else - va_low ^= v >> 24; - va_low &= 0x7ff; - if (v & HPTE_V_LARGE) { - rb |= 1; /* L field */ - if (cpu_has_feature(CPU_FTR_ARCH_206) && - (r & 0xff000)) { - /* non-16MB large page, must be 64k */ - /* (masks depend on page size) */ - rb |= 0x1000; /* page encoding in LP field */ - rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ - rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ - } - } else { - /* 4kB page */ - rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ - } - rb |= (v >> 54) & 0x300; /* B field */ - return rb; -} - /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index e43fe42b987..d0ac94f98f9 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -29,4 +29,37 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) #define SPAPR_TCE_SHIFT 12 +static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, + unsigned long pte_index) +{ + unsigned long rb, va_low; + + rb = (v & ~0x7fUL) << 16; /* AVA field */ + va_low = pte_index >> 3; + if (v & HPTE_V_SECONDARY) + va_low = ~va_low; + /* xor vsid from AVA */ + if (!(v & HPTE_V_1TB_SEG)) + va_low ^= v >> 12; + else + va_low ^= v >> 24; + va_low &= 0x7ff; + if (v & HPTE_V_LARGE) { + rb |= 1; /* L field */ + if (cpu_has_feature(CPU_FTR_ARCH_206) && + (r & 0xff000)) { + /* non-16MB large page, must be 64k */ + /* (masks depend on page size) */ + rb |= 0x1000; /* page encoding in LP field */ + rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ + rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ + } + } else { + /* 4kB page */ + rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ + } + rb |= (v >> 54) & 0x300; /* B field */ + return rb; +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h deleted file mode 100644 index 43efc345065..00000000000 --- a/arch/powerpc/include/asm/memblock.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_POWERPC_MEMBLOCK_H -#define _ASM_POWERPC_MEMBLOCK_H - -#include <asm/udbg.h> - -#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) - -#endif /* _ASM_POWERPC_MEMBLOCK_H */ diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad5..9c3cd490b1b 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -46,6 +46,12 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) +static const bool idle_uses_rcu = 1; +#else +static const bool idle_uses_rcu; +#endif + /* * The body of the idle task. */ @@ -56,7 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + if (!idle_uses_rcu) + rcu_idle_enter(); + while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +102,9 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + if (!idle_uses_rcu) + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 9ce1672afb5..a2158a395d9 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -107,9 +107,6 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of memblock_phys_mem_size() */ - memblock_analyze(); - /* use common parsing */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fa1235b0503..abe405dab34 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -733,8 +733,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - memblock_init(); - of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -756,20 +754,14 @@ void __init early_init_devtree(void *params) early_reserve_mem(); phyp_dump_reserve_mem(); - limit = memory_limit; - if (! limit) { - phys_addr_t memsize; - - /* Ensure that total memory size is page-aligned, because - * otherwise mark_bootmem() gets upset. */ - memblock_analyze(); - memsize = memblock_phys_mem_size(); - if ((memsize & PAGE_MASK) != memsize) - limit = memsize & PAGE_MASK; - } + /* + * Ensure that total memory size is page-aligned, because otherwise + * mark_bootmem() gets upset. + */ + limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0cb137a9b03..336983da9e7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -538,7 +538,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) tpaca->kvm_hstate.napping = 0; vcpu->cpu = vc->pcpu; smp_wmb(); -#ifdef CONFIG_PPC_ICP_NATIVE +#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { tpaca->cpu_start = 0x80; wmb(); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3c791e1eb67..e2cfb9e1e20 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -658,10 +658,12 @@ program_interrupt: ulong cmd = kvmppc_get_gpr(vcpu, 3); int i; +#ifdef CONFIG_KVM_BOOK3S_64_PR if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { r = RESUME_GUEST; break; } +#endif run->papr_hcall.nr = cmd; for (i = 0; i < 9; ++i) { diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 26d20903f2b..8c0d45a6faf 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -15,6 +15,7 @@ #include <linux/kvm_host.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/export.h> #include <asm/reg.h> #include <asm/cputable.h> diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 161cefde5c1..58861fa1220 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -134,8 +134,7 @@ void __init MMU_init(void) if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII - memblock.memory.cnt = 1; - memblock_analyze(); + memblock_enforce_memory_limit(memblock.memory.regions[0].size); printk(KERN_WARNING "Only using first contiguous memory region"); #else wii_memory_fixups(); @@ -158,7 +157,6 @@ void __init MMU_init(void) #ifndef CONFIG_HIGHMEM total_memory = total_lowmem; memblock_enforce_memory_limit(total_lowmem); - memblock_analyze(); #endif /* CONFIG_HIGHMEM */ } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2dd6bdd31fe..8e2eb6611b0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -199,7 +199,7 @@ void __init do_init_bootmem(void) unsigned long start_pfn, end_pfn; start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); - add_active_range(0, start_pfn, end_pfn); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); } /* Add all physical memory to the bootmem map, mark each area diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b22a83a91cb..e6eea0ac80c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, } /* - * get_active_region_work_fn - A helper function for get_node_active_region - * Returns datax set to the start_pfn and end_pfn if they contain - * the initial value of datax->start_pfn between them - * @start_pfn: start page(inclusive) of region to check - * @end_pfn: end page(exclusive) of region to check - * @datax: comes in with ->start_pfn set to value to search for and - * goes out with active range if it contains it - * Returns 1 if search value is in range else 0 - */ -static int __init get_active_region_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct node_active_region *data; - data = (struct node_active_region *)datax; - - if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) { - data->start_pfn = start_pfn; - data->end_pfn = end_pfn; - return 1; - } - return 0; - -} - -/* - * get_node_active_region - Return active region containing start_pfn + * get_node_active_region - Return active region containing pfn * Active range returned is empty if none found. - * @start_pfn: The page to return the region for. - * @node_ar: Returned set to the active region containing start_pfn + * @pfn: The page to return the region for + * @node_ar: Returned set to the active region containing @pfn */ -static void __init get_node_active_region(unsigned long start_pfn, - struct node_active_region *node_ar) +static void __init get_node_active_region(unsigned long pfn, + struct node_active_region *node_ar) { - int nid = early_pfn_to_nid(start_pfn); + unsigned long start_pfn, end_pfn; + int i, nid; - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = start_pfn; - work_with_active_regions(nid, get_active_region_work_fn, node_ar); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + if (pfn >= start_pfn && pfn < end_pfn) { + node_ar->nid = nid; + node_ar->start_pfn = start_pfn; + node_ar->end_pfn = end_pfn; + break; + } + } } static void map_cpu_to_node(int cpu, int node) @@ -710,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory) node_set_online(nid); sz = numa_enforce_memory_limit(base, size); if (sz) - add_active_range(nid, base >> PAGE_SHIFT, - (base >> PAGE_SHIFT) - + (sz >> PAGE_SHIFT)); + memblock_set_node(base, sz, nid); } while (--ranges); } } @@ -802,8 +780,7 @@ new_range: continue; } - add_active_range(nid, start >> PAGE_SHIFT, - (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); + memblock_set_node(start, size, nid); if (--ranges) goto new_range; @@ -839,7 +816,8 @@ static void __init setup_nonnuma(void) end_pfn = memblock_region_memory_end_pfn(reg); fake_numa_create_new_node(end_pfn, &nid); - add_active_range(nid, start_pfn, end_pfn); + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), nid); node_set_online(nid); } } diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 4e13d6f9023..573ba3b69d1 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); - memblock_analyze(); patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 1b5dc1a2e14..6d8dadf19f0 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -79,24 +79,19 @@ void __init wii_memory_fixups(void) BUG_ON(memblock.memory.cnt != 2); BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); - p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE); - p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE); + /* trim unaligned tail */ + memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE), + (phys_addr_t)ULLONG_MAX); - wii_hole_start = p[0].base + p[0].size; + /* determine hole, add & reserve them */ + wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE); wii_hole_size = p[1].base - wii_hole_start; - - pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size); - pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size); - pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size); - - p[0].size += wii_hole_size + p[1].size; - - memblock.memory.cnt = 1; - memblock_analyze(); - - /* reserve the hole */ + memblock_add(wii_hole_start, wii_hole_size); memblock_reserve(wii_hole_start, wii_hole_size); + BUG_ON(memblock.memory.cnt != 1); + __memblock_dump_all(); + /* allow ioremapping the address space in the hole */ __allow_ioremap_reserved = 1; } diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index ea0acbd8966..8fc62586a97 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -563,7 +563,8 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); @@ -577,7 +578,8 @@ static void iseries_shared_idle(void) } ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); if (hvlpevent_is_pending()) process_iSeries_events(); @@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); @@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 72714ad2784..8bd6ba54269 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void) } memblock_add(start_addr, map.r1.size); - memblock_analyze(); result = online_pages(start_pfn, nr_pages); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 27a49508b41..52d429be6c7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -555,6 +555,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) (*depth)++; trace_hcall_entry(opcode, args); + if (opcode == H_CEDE) + rcu_idle_enter(); (*depth)--; out: @@ -575,6 +577,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval, goto out; (*depth)++; + if (opcode == H_CEDE) + rcu_idle_exit(); trace_hcall_exit(opcode, retval, retbuf); (*depth)--; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 373679b3744..d48ede33443 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -92,6 +92,9 @@ config S390 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -config ARCH_POPULATES_NODE_MAP - def_bool y - comment "Kernel preemption" source "kernel/Kconfig.preempt" diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 0887a0463e3..c23c3900c30 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -72,6 +72,8 @@ static inline cputime_t usecs_to_cputime(const unsigned int m) return (__force cputime_t)(m * 4096ULL); } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9451b210a1b..3201ae44799 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,12 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e54c4ff8aba..f11d1b037c5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -820,7 +821,8 @@ setup_memory(void) end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; - add_active_range(0, start_chunk, end_chunk); + memblock_add_node(PFN_PHYS(start_chunk), + PFN_PHYS(end_chunk - start_chunk), 0); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index f43c0e4282a..9daee91e6c3 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -22,6 +22,7 @@ #include <asm/irq.h> #include "hwsampler.h" +#include "op_counter.h" #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 @@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, if (sample_data_ptr->P == 1) { /* userspace sample */ unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; rcu_read_lock(); tsk = pid_task(find_vpid(pid), PIDTYPE_PID); if (tsk) @@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, rcu_read_unlock(); } else { /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; regs = task_pt_regs(current); } @@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, !sample_data_ptr->P, tsk); mutex_unlock(&hws_sem); - + skip_sample: sample_data_ptr++; } } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 6efc18b5e60..2297be406c6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -2,10 +2,11 @@ * arch/s390/oprofile/init.c * * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * * @remark Copyright 2002-2011 OProfile authors */ @@ -14,6 +15,8 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> #include "../../../drivers/oprofile/oprof.h" @@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #ifdef CONFIG_64BIT #include "hwsampler.h" +#include "op_counter.h" #define DEFAULT_INTERVAL 4127518 @@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static int hwsampler_file; +static int hwsampler_enabled; static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; static struct oprofile_operations timer_ops; +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + static int oprofile_hwsampler_start(void) { int retval; - hwsampler_running = hwsampler_file; + hwsampler_running = hwsampler_enabled; if (!hwsampler_running) return timer_ops.start(); @@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void) return; } +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; + if (val != 0 && val != 1) + return -EINVAL; + if (oprofile_started) /* * save to do without locking as we set @@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, */ return -EBUSY; - hwsampler_file = val; + hwsampler_enabled = val; return count; } @@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = { .write = hwsampler_write, }; +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) + struct dentry *root) { - struct dentry *hw_dir; + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; /* reinitialize default values */ - hwsampler_file = 1; + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } return 0; } static int oprofile_hwsampler_init(struct oprofile_operations *ops) { + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + if (hwsampler_setup()) return -ENODEV; /* - * create hwsampler files only if hwsampler_setup() succeeds. + * Query the range for the sampling interval from the + * hardware. */ oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval == 0) @@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_hw_interval > oprofile_max_interval) oprofile_hw_interval = oprofile_max_interval; - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); ops->start = oprofile_hwsampler_start; ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; return 0; } static void oprofile_hwsampler_exit(void) { - oprofile_timer_exit(); hwsampler_shutdown(); } @@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = s390_backtrace; #ifdef CONFIG_64BIT - return oprofile_hwsampler_init(ops); + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; #else return -ENODEV; #endif diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000000..1a8d3ca0901 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ diff --git a/arch/score/Kconfig b/arch/score/Kconfig index df169e84db4..8b0c9464aa9 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -4,6 +4,9 @@ config SCORE def_bool y select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK choice prompt "System type" @@ -60,9 +63,6 @@ config 32BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_POPULATES_NODE_MAP - def_bool y - source "mm/Kconfig" config MEMORY_START diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index 6f898c05787..b48459afefd 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -26,6 +26,7 @@ #include <linux/bootmem.h> #include <linux/initrd.h> #include <linux/ioport.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/seq_file.h> #include <linux/screen_info.h> @@ -54,7 +55,8 @@ static void __init bootmem_init(void) /* Initialize the boot-time allocator with low memory only. */ bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn, min_low_pfn, max_low_pfn); - add_active_range(0, min_low_pfn, max_low_pfn); + memblock_add_node(PFN_PHYS(min_low_pfn), + PFN_PHYS(max_low_pfn - min_low_pfn), 0); free_bootmem(PFN_PHYS(start_pfn), (max_low_pfn - start_pfn) << PAGE_SHIFT); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5629e209913..47a2f1c2cb0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -4,6 +4,7 @@ config SUPERH select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP select HAVE_OPROFILE select HAVE_GENERIC_DMA_COHERENT select HAVE_ARCH_TRACEHOOK diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c index ec8c84c14b1..895e337c79b 100644 --- a/arch/sh/boards/board-sh7757lcr.c +++ b/arch/sh/boards/board-sh7757lcr.c @@ -50,9 +50,9 @@ static struct platform_device heartbeat_device = { #define GBECONT 0xffc10100 #define GBECONT_RMII1 BIT(17) #define GBECONT_RMII0 BIT(16) -static void sh7757_eth_set_mdio_gate(unsigned long addr) +static void sh7757_eth_set_mdio_gate(void *addr) { - if ((addr & 0x00000fff) < 0x0800) + if (((unsigned long)addr & 0x00000fff) < 0x0800) writel(readl(GBECONT) | GBECONT_RMII0, GBECONT); else writel(readl(GBECONT) | GBECONT_RMII1, GBECONT); @@ -116,9 +116,9 @@ static struct platform_device sh7757_eth1_device = { }, }; -static void sh7757_eth_giga_set_mdio_gate(unsigned long addr) +static void sh7757_eth_giga_set_mdio_gate(void *addr) { - if ((addr & 0x00000fff) < 0x0800) { + if (((unsigned long)addr & 0x00000fff) < 0x0800) { gpio_set_value(GPIO_PTT4, 1); writel(readl(GBECONT) & ~GBECONT_RMII0, GBECONT); } else { @@ -210,8 +210,12 @@ static struct resource sh_mmcif_resources[] = { }; static struct sh_mmcif_dma sh7757lcr_mmcif_dma = { - .chan_priv_tx = SHDMA_SLAVE_MMCIF_TX, - .chan_priv_rx = SHDMA_SLAVE_MMCIF_RX, + .chan_priv_tx = { + .slave_id = SHDMA_SLAVE_MMCIF_TX, + }, + .chan_priv_rx = { + .slave_id = SHDMA_SLAVE_MMCIF_RX, + } }; static struct sh_mmcif_plat_data sh_mmcif_plat = { diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h deleted file mode 100644 index e87063fad2e..00000000000 --- a/arch/sh/include/asm/memblock.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __ASM_SH_MEMBLOCK_H -#define __ASM_SH_MEMBLOCK_H - -#endif /* __ASM_SH_MEMBLOCK_H */ diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index db4ecd731a0..406508d4ce7 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -89,7 +89,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -111,7 +112,8 @@ void cpu_idle(void) start_critical_timings(); } - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index c5a33f007f8..9fea49f6e66 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -157,9 +157,6 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of memblock_phys_mem_size() */ - memblock_analyze(); - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 1a0e946679a..7b57bf1dc85 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -230,7 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, pmb_bolt_mapping((unsigned long)__va(start), start, end - start, PAGE_KERNEL); - add_active_range(nid, start_pfn, end_pfn); + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), nid); } void __init __weak plat_early_device_setup(void) diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index c3e61b36649..cb8f9920f4d 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS CPU_SUBTYPE_SH7785) default "1" -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 939ca0f356f..82cc576fab1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -324,7 +324,6 @@ void __init paging_init(void) unsigned long vaddr, end; int nid; - memblock_init(); sh_mv.mv_mem_init(); early_reserve_mem(); @@ -337,7 +336,7 @@ void __init paging_init(void) sh_mv.mv_mem_reserve(); memblock_enforce_memory_limit(memory_limit); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c index b4c2d2b946d..e4dd5d5a111 100644 --- a/arch/sh/oprofile/common.c +++ b/arch/sh/oprofile/common.c @@ -49,7 +49,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) return oprofile_perf_init(ops); } -void __exit oprofile_arch_exit(void) +void oprofile_arch_exit(void) { oprofile_perf_exit(); kfree(sh_pmu_op_name); @@ -60,5 +60,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = sh_backtrace; return -ENODEV; } -void __exit oprofile_arch_exit(void) {} +void oprofile_arch_exit(void) {} #endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f92602e8660..70ae9d81870 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC64 select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP select HAVE_SYSCALL_WRAPPERS select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD @@ -352,9 +353,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config ARCH_POPULATES_NODE_MAP - def_bool y if SPARC64 - config ARCH_SELECT_MEMORY_MODEL def_bool y if SPARC64 diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h deleted file mode 100644 index c67b047ef85..00000000000 --- a/arch/sparc/include/asm/memblock.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _SPARC64_MEMBLOCK_H -#define _SPARC64_MEMBLOCK_H - -#include <asm/oplib.h> - -#define MEMBLOCK_DBG(fmt...) prom_printf(fmt) - -#endif /* !(_SPARC64_MEMBLOCK_H) */ diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 7429b47c3ac..381edcd5bc2 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1181,13 +1181,11 @@ static int __devinit ds_probe(struct vio_dev *vdev, dp->rcv_buf_len = 4096; - dp->ds_states = kzalloc(sizeof(ds_states_template), - GFP_KERNEL); + dp->ds_states = kmemdup(ds_states_template, + sizeof(ds_states_template), GFP_KERNEL); if (!dp->ds_states) goto out_free_rcv_buf; - memcpy(dp->ds_states, ds_states_template, - sizeof(ds_states_template)); dp->num_ds_states = ARRAY_SIZE(ds_states_template); for (i = 0; i < dp->num_ds_states; i++) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index b272cda35a0..af5755d20fb 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -849,10 +849,10 @@ static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm, if (!irq) return -ENOMEM; - if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) - return -EINVAL; if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) return -EINVAL; + if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) + return -EINVAL; return irq; } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 3739a06a76c..39d8b05201a 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -95,12 +95,14 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while(1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !cpu_is_offline(cpu)) sparc64_yield(cpu); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index 46614807a57..741df916c12 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -58,12 +58,10 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len void *new_val; int err; - new_val = kmalloc(len, GFP_KERNEL); + new_val = kmemdup(val, len, GFP_KERNEL); if (!new_val) return -ENOMEM; - memcpy(new_val, val, len); - err = -ENODEV; mutex_lock(&of_set_property_mutex); diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index fe1e3fc31bc..ffb883ddd0f 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -84,7 +84,7 @@ static void prom_sync_me(void) prom_printf("PROM SYNC COMMAND...\n"); show_free_areas(0); - if(current->pid != 0) { + if (!is_idle_task(current)) { local_irq_enable(); sys_sync(); local_irq_disable(); diff --git a/arch/sparc/mm/btfixup.c b/arch/sparc/mm/btfixup.c index 5175ac2f482..8a7f81743c1 100644 --- a/arch/sparc/mm/btfixup.c +++ b/arch/sparc/mm/btfixup.c @@ -302,8 +302,7 @@ void __init btfixup(void) case 'i': /* INT */ if ((insn & 0xc1c00000) == 0x01000000) /* %HI */ set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10)); - else if ((insn & 0x80002000) == 0x80002000 && - (insn & 0x01800000) != 0x01800000) /* %LO */ + else if ((insn & 0x80002000) == 0x80002000) /* %LO */ set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x3ff)); else { prom_printf(insn_i, p, addr, insn); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 8e073d80213..b3f5e7dfea5 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -790,7 +790,7 @@ static int find_node(unsigned long addr) return -1; } -u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -808,7 +808,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid) return start; } #else -u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = 0; return end; @@ -816,7 +816,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid) #endif /* This must be invoked after performing all of the necessary - * add_active_range() calls for 'nid'. We need to be able to get + * memblock_set_node() calls for 'nid'. We need to be able to get * correct data from get_pfn_range_for_nid(). */ static void __init allocate_node_data(int nid) @@ -987,14 +987,11 @@ static void __init add_node_ranges(void) this_end = memblock_nid_range(start, end, &nid); - numadbg("Adding active range nid[%d] " + numadbg("Setting memblock NUMA node nid[%d] " "start[%lx] end[%lx]\n", nid, start, this_end); - add_active_range(nid, - start >> PAGE_SHIFT, - this_end >> PAGE_SHIFT); - + memblock_set_node(start, this_end - start, nid); start = this_end; } } @@ -1282,7 +1279,6 @@ static void __init bootmem_init_nonnuma(void) { unsigned long top_of_ram = memblock_end_of_DRAM(); unsigned long total_ram = memblock_phys_mem_size(); - struct memblock_region *reg; numadbg("bootmem_init_nonnuma()\n"); @@ -1292,20 +1288,8 @@ static void __init bootmem_init_nonnuma(void) (top_of_ram - total_ram) >> 20); init_node_masks_nonnuma(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn, end_pfn; - - if (!reg->size) - continue; - - start_pfn = memblock_region_memory_base_pfn(reg); - end_pfn = memblock_region_memory_end_pfn(reg); - add_active_range(0, start_pfn, end_pfn); - } - + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); allocate_node_data(0); - node_set_online(0); } @@ -1769,8 +1753,6 @@ void __init paging_init(void) sun4v_ktsb_init(); } - memblock_init(); - /* Find available physical memory... * * Read it twice in order to work around a bug in openfirmware. @@ -1796,7 +1778,7 @@ void __init paging_init(void) memblock_enforce_memory_limit(cmdline_memory_size); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); set_bit(0, mmu_context_bmap); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 9c45d8bbdf5..4c1ac6e5347 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -85,7 +85,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { if (cpu_is_offline(cpu)) BUG(); /* no HOTPLUG_CPU */ @@ -105,7 +106,8 @@ void cpu_idle(void) local_irq_enable(); current_thread_info()->status |= TS_POLLING; } - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 25b7b90fd62..c1eaaa1fcc2 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -54,7 +54,7 @@ static noinline void force_sig_info_fault(const char *type, int si_signo, if (unlikely(tsk->pid < 2)) { panic("Signal %d (code %d) at %#lx sent to %s!", si_signo, si_code & 0xffff, address, - tsk->pid ? "init" : "the idle task"); + is_idle_task(tsk) ? "the idle task" : "init"); } info.si_signo = si_signo; @@ -515,7 +515,7 @@ no_context: if (unlikely(tsk->pid < 2)) { panic("Kernel page fault running %s!", - tsk->pid ? "init" : "the idle task"); + is_idle_task(tsk) ? "the idle task" : "init"); } /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c5338351aec..69f24905abd 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -246,10 +246,12 @@ void default_idle(void) if (need_resched()) schedule(); - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); nsecs = disable_timer(); idle_sleep(nsecs); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); } } diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index ba401df971e..52edc2b6287 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -55,7 +55,8 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { local_irq_disable(); stop_critical_timings(); @@ -63,7 +64,8 @@ void cpu_idle(void) local_irq_enable(); start_critical_timings(); } - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 471b6bca8da..673d7a89d8f 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -37,6 +37,7 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/traps.h> +#include <asm/memblock.h> #include "setup.h" diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 3b379cddbc6..de186bde897 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -26,6 +26,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <mach/map.h> #include "mm.h" @@ -245,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi) sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); - memblock_init(); for (i = 0; i < mi->nr_banks; i++) memblock_add(mi->bank[i].start, mi->bank[i].size); @@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi) uc32_mm_memblock_reserve(); - memblock_analyze(); + memblock_allow_resize(); memblock_dump_all(); } diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 3e5c3e5a0b4..43c20b40e44 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -25,6 +25,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <mach/map.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index efb42949cc0..67d6af3581b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,6 +26,8 @@ config X86 select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS @@ -204,9 +206,6 @@ config ZONE_DMA32 bool default X86_64 -config ARCH_POPULATES_NODE_MAP - def_bool y - config AUDIT_ARCH bool default X86_64 diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 908b96957d8..37782566af2 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end) extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); +extern u64 early_reserve_e820(u64 sizet, u64 align); void memblock_x86_fill(void); void memblock_find_dma_reserve(void); diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 88c765e1641..74df3f1eddf 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn) return (insn->vex_prefix.value != 0); } +/* Ensure this instruction is decoded completely */ +static inline int insn_complete(struct insn *insn) +{ + return insn->opcode.got && insn->modrm.got && insn->sib.got && + insn->displacement.got && insn->immediate.got; +} + static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h deleted file mode 100644 index 0cd3800f33b..00000000000 --- a/arch/x86/include/asm/memblock.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _X86_MEMBLOCK_H -#define _X86_MEMBLOCK_H - -#define ARCH_DISCARD_MEMBLOCK - -u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); - -void memblock_x86_reserve_range(u64 start, u64 end, char *name); -void memblock_x86_free_range(u64 start, u64 end); -struct range; -int __get_free_all_memory_range(struct range **range, int nodeid, - unsigned long start_pfn, unsigned long end_pfn); -int get_free_all_memory_range(struct range **rangep, int nodeid); - -void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn); -u64 memblock_x86_hole_size(u64 start, u64 end); -u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); -u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); -u64 memblock_x86_memory_in_range(u64 addr, u64 limit); -bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); - -#endif diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f61c62f7d5d..096c975e099 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -57,6 +57,7 @@ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_EVENTS_COUNT 7 /* * Intel "Architectural Performance Monitoring" CPUID @@ -72,6 +73,19 @@ union cpuid10_eax { unsigned int full; }; +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + union cpuid10_edx { struct { unsigned int num_counters_fixed:5; @@ -81,6 +95,15 @@ union cpuid10_edx { unsigned int full; }; +struct x86_pmu_capability { + int version; + int num_counters_gp; + int num_counters_fixed; + int bit_width_gp; + int bit_width_fixed; + unsigned int events_mask; + int events_mask_len; +}; /* * Fixed-purpose performance events: @@ -89,23 +112,24 @@ union cpuid10_edx { /* * All 3 fixed-mode PMCs are configured via this single MSR: */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d /* * The counts are available in three separate MSRs: */ /* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) +#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) /* * We model BTS tracing as another fixed-mode PMC. @@ -202,6 +226,7 @@ struct perf_guest_switch_msr { }; extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); #else static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) { @@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) return NULL; } +static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + memset(cap, 0, sizeof(*cap)); +} + static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3d2661ca654..6e76c191a83 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void) */ addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, aper_size, aper_size); - if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { + if (!addr || addr + aper_size > GART_MAX_ADDR) { printk(KERN_ERR "Cannot allocate aperture memory hole (%lx,%uK)\n", addr, aper_size>>10); return 0; } - memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); + memblock_reserve(addr, aper_size); /* * Kmemleak should not scan this block as it may not be mapped via the * kernel direct mapping. diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f98d84caf94..2cd2d93643d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -876,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - exit_idle(); irq_enter(); + exit_idle(); local_apic_timer_interrupt(); irq_exit(); @@ -1809,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs) { u32 v; - exit_idle(); irq_enter(); + exit_idle(); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1846,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs) "Illegal register address", /* APIC Error Bit 7 */ }; - exit_idle(); irq_enter(); + exit_idle(); /* First tickle the hardware, only then report what went on. -- REW */ v0 = apic_read(APIC_ESR); apic_write(APIC_ESR, 0); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7847e..89805558551 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) unsigned vector, me; ack_APIC_irq(); - exit_idle(); irq_enter(); + exit_idle(); me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 452932d3473..5da1269e8dd 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size); void __init setup_bios_corruption_check(void) { - u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ + phys_addr_t start, end; + u64 i; if (memory_corruption_check == -1) { memory_corruption_check = @@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void) corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); - while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { - u64 size; - addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); + for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { + start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), + PAGE_SIZE, corruption_check_size); + end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), + PAGE_SIZE, corruption_check_size); + if (start >= end) + continue; - if (addr == MEMBLOCK_ERROR) - break; - - if (addr >= corruption_check_size) - break; - - if ((addr + size) > corruption_check_size) - size = corruption_check_size - addr; - - memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); - scan_areas[num_scan_areas].addr = addr; - scan_areas[num_scan_areas].size = size; - num_scan_areas++; + memblock_reserve(start, end - start); + scan_areas[num_scan_areas].addr = start; + scan_areas[num_scan_areas].size = end - start; /* Assume we've already mapped this early memory */ - memset(__va(addr), 0, size); + memset(__va(start), 0, end - start); - addr += size; + if (++num_scan_areas >= MAX_SCAN_AREAS) + break; } if (num_scan_areas) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea..ce215616d5b 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) { - exit_idle(); irq_enter(); + exit_idle(); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); irq_exit(); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c..aa578cadb94 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; asmlinkage void smp_threshold_interrupt(void) { - exit_idle(); irq_enter(); + exit_idle(); inc_irq_stat(irq_threshold_count); mce_threshold_vector(); irq_exit(); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a001..5adce1040b1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) return event->pmu == &pmu; } +/* + * Event scheduler state: + * + * Assign events iterating over all events and counters, beginning + * with events with least weights first. Keep the current iterator + * state in struct sched_state. + */ +struct sched_state { + int weight; + int event; /* event index */ + int counter; /* counter index */ + int unassigned; /* number of events to be assigned left */ + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; +}; + +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ +#define SCHED_STATES_MAX 2 + +struct perf_sched { + int max_weight; + int max_events; + struct event_constraint **constraints; + struct sched_state state; + int saved_states; + struct sched_state saved[SCHED_STATES_MAX]; +}; + +/* + * Initialize interator that runs through all events and counters. + */ +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, + int num, int wmin, int wmax) +{ + int idx; + + memset(sched, 0, sizeof(*sched)); + sched->max_events = num; + sched->max_weight = wmax; + sched->constraints = c; + + for (idx = 0; idx < num; idx++) { + if (c[idx]->weight == wmin) + break; + } + + sched->state.event = idx; /* start with min weight */ + sched->state.weight = wmin; + sched->state.unassigned = num; +} + +static void perf_sched_save_state(struct perf_sched *sched) +{ + if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) + return; + + sched->saved[sched->saved_states] = sched->state; + sched->saved_states++; +} + +static bool perf_sched_restore_state(struct perf_sched *sched) +{ + if (!sched->saved_states) + return false; + + sched->saved_states--; + sched->state = sched->saved[sched->saved_states]; + + /* continue with next counter: */ + clear_bit(sched->state.counter++, sched->state.used); + + return true; +} + +/* + * Select a counter for the current event to schedule. Return true on + * success. + */ +static bool __perf_sched_find_counter(struct perf_sched *sched) +{ + struct event_constraint *c; + int idx; + + if (!sched->state.unassigned) + return false; + + if (sched->state.event >= sched->max_events) + return false; + + c = sched->constraints[sched->state.event]; + + /* Prefer fixed purpose counters */ + if (x86_pmu.num_counters_fixed) { + idx = X86_PMC_IDX_FIXED; + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + if (!__test_and_set_bit(idx, sched->state.used)) + goto done; + } + } + /* Grab the first unused counter starting with idx */ + idx = sched->state.counter; + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + if (!__test_and_set_bit(idx, sched->state.used)) + goto done; + } + + return false; + +done: + sched->state.counter = idx; + + if (c->overlap) + perf_sched_save_state(sched); + + return true; +} + +static bool perf_sched_find_counter(struct perf_sched *sched) +{ + while (!__perf_sched_find_counter(sched)) { + if (!perf_sched_restore_state(sched)) + return false; + } + + return true; +} + +/* + * Go through all unassigned events and find the next one to schedule. + * Take events with the least weight first. Return true on success. + */ +static bool perf_sched_next_event(struct perf_sched *sched) +{ + struct event_constraint *c; + + if (!sched->state.unassigned || !--sched->state.unassigned) + return false; + + do { + /* next event */ + sched->state.event++; + if (sched->state.event >= sched->max_events) { + /* next weight */ + sched->state.event = 0; + sched->state.weight++; + if (sched->state.weight > sched->max_weight) + return false; + } + c = sched->constraints[sched->state.event]; + } while (c->weight != sched->state.weight); + + sched->state.counter = 0; /* start with first counter */ + + return true; +} + +/* + * Assign a counter for each event. + */ +static int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int *assign) +{ + struct perf_sched sched; + + perf_sched_init(&sched, constraints, n, wmin, wmax); + + do { + if (!perf_sched_find_counter(&sched)) + break; /* failed */ + if (assign) + assign[sched.state.event] = sched.state.counter; + } while (perf_sched_next_event(&sched)); + + return sched.state.unassigned; +} + int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int i, j, w, wmax, num = 0; + int i, wmin, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); - for (i = 0; i < n; i++) { + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); constraints[i] = c; + wmin = min(wmin, c->weight); + wmax = max(wmax, c->weight); } /* @@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (assign) assign[i] = hwc->idx; } - if (i == n) - goto done; - - /* - * begin slow path - */ - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); - /* - * weight = number of possible counters - * - * 1 = most constrained, only works on one counter - * wmax = least constrained, works on any counter - * - * assign events to counters starting with most - * constrained events. - */ - wmax = x86_pmu.num_counters; + /* slow path */ + if (i != n) + num = perf_assign_events(constraints, n, wmin, wmax, assign); /* - * when fixed event counters are present, - * wmax is incremented by 1 to account - * for one more choice - */ - if (x86_pmu.num_counters_fixed) - wmax++; - - for (w = 1, num = n; num && w <= wmax; w++) { - /* for each event */ - for (i = 0; num && i < n; i++) { - c = constraints[i]; - hwc = &cpuc->event_list[i]->hw; - - if (c->weight != w) - continue; - - for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { - if (!test_bit(j, used_mask)) - break; - } - - if (j == X86_PMC_IDX_MAX) - break; - - __set_bit(j, used_mask); - - if (assign) - assign[i] = j; - num--; - } - } -done: - /* * scheduling failed or is just a simulation, * free resources if necessary */ @@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void) static int __init init_hw_perf_events(void) { + struct x86_pmu_quirk *quirk; struct event_constraint *c; int err; @@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); - if (x86_pmu.quirks) - x86_pmu.quirks(); + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) + quirk->func(); if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", @@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters); + 0, x86_pmu.num_counters, 0); if (x86_pmu.event_constraints) { + /* + * event on fixed counter2 (REF_CYCLES) only works on this + * counter, so do not extend mask to generic counters + */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK) + if (c->cmask != X86_RAW_EVENT_MASK + || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { continue; + } c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; c->weight += x86_pmu.num_counters; @@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) return misc; } + +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ + cap->version = x86_pmu.version; + cap->num_counters_gp = x86_pmu.num_counters; + cap->num_counters_fixed = x86_pmu.num_counters_fixed; + cap->bit_width_gp = x86_pmu.cntval_bits; + cap->bit_width_fixed = x86_pmu.cntval_bits; + cap->events_mask = (unsigned int)x86_pmu.events_maskl; + cap->events_mask_len = x86_pmu.events_mask_len; +} +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4..8944062f46e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -45,6 +45,7 @@ struct event_constraint { u64 code; u64 cmask; int weight; + int overlap; }; struct amd_nb { @@ -151,15 +152,40 @@ struct cpu_hw_events { void *kfree_on_online; }; -#define __EVENT_CONSTRAINT(c, n, m, w) {\ +#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ + .overlap = (o), \ } #define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) + +/* + * The overlap flag marks event constraints with overlapping counter + * masks. This is the case if the counter mask of such an event is not + * a subset of any other counter mask of a constraint with an equal or + * higher weight, e.g.: + * + * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); + * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); + * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); + * + * The event scheduler may not select the correct counter in the first + * cycle because it needs to know which subsequent events will be + * scheduled. It may fail to schedule the events then. So we set the + * overlap flag for such constraints to give the scheduler a hint which + * events to select for counter rescheduling. + * + * Care must be taken as the rescheduling algorithm is O(n!) which + * will increase scheduling cycles for an over-commited system + * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros + * and its counter masks must be kept at a minimum. + */ +#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) /* * Constraint on the Event code. @@ -235,6 +261,11 @@ union perf_capabilities { u64 capabilities; }; +struct x86_pmu_quirk { + struct x86_pmu_quirk *next; + void (*func)(void); +}; + /* * struct x86_pmu - generic x86 pmu */ @@ -259,6 +290,11 @@ struct x86_pmu { int num_counters_fixed; int cntval_bits; u64 cntval_mask; + union { + unsigned long events_maskl; + unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; + }; + int events_mask_len; int apic; u64 max_period; struct event_constraint * @@ -268,7 +304,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; - void (*quirks)(void); + struct x86_pmu_quirk *quirks; int perfctr_second_write; int (*cpu_prepare)(int cpu); @@ -309,6 +345,15 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +#define x86_add_quirk(func_) \ +do { \ + static struct x86_pmu_quirk __quirk __initdata = { \ + .func = func_, \ + }; \ + __quirk.next = x86_pmu.quirks; \ + x86_pmu.quirks = &__quirk; \ +} while (0) + #define ERF_NO_HT_SHARING 1 #define ERF_HAS_RSP_1 2 diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a..0397b23be8e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); -static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8d601b18bf9..3bd37bdf1b8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ }; static struct event_constraint intel_core_event_constraints[] __read_mostly = @@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* - * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event - * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed - * ratio between these counters. - */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ @@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ @@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ @@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ @@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ EVENT_CONSTRAINT_END }; @@ -1169,7 +1165,7 @@ again: */ c = &unconstrained; } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock(&era->lock); + raw_spin_unlock_irqrestore(&era->lock, flags); goto again; } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, }; -static void intel_clovertown_quirks(void) +static __init void intel_clovertown_quirk(void) { /* * PEBS is unreliable due to: @@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void) x86_pmu.pebs_constraints = NULL; } -static void intel_sandybridge_quirks(void) +static __init void intel_sandybridge_quirk(void) { printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); x86_pmu.pebs = 0; x86_pmu.pebs_constraints = NULL; } +static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, +}; + +static __init void intel_arch_events_quirk(void) +{ + int bit; + + /* disable event that reported as not presend by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { + intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; + printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", + intel_arch_events_map[bit].name); + } +} + +static __init void intel_nehalem_quirk(void) +{ + union cpuid10_ebx ebx; + + ebx.full = x86_pmu.events_maskl; + if (ebx.split.no_branch_misses_retired) { + /* + * Erratum AAJ80 detected, we work it around by using + * the BR_MISP_EXEC.ANY event. This will over-count + * branch-misses, but it's still much better than the + * architectural event which is often completely bogus: + */ + intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + ebx.split.no_branch_misses_retired = 0; + x86_pmu.events_maskl = ebx.full; + printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); + } +} + __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; + union cpuid10_ebx ebx; unsigned int unused; - unsigned int ebx; int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { @@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired hw_event or not. */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) return -ENODEV; version = eax.split.version_id; @@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void) x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: @@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void) intel_ds_init(); + x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + /* * Install the hw-cache-events table: */ @@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void) break; case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - x86_pmu.quirks = intel_clovertown_quirks; + x86_add_quirk(intel_clovertown_quirk); case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ case 29: /* six-core 45 nm xeon "Dunnington" */ @@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void) /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; - if (ebx & 0x40) { - /* - * Erratum AAJ80 detected, we work it around by using - * the BR_MISP_EXEC.ANY event. This will over-count - * branch-misses, but it's still much better than the - * architectural event which is often completely bogus: - */ - intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; + x86_add_quirk(intel_nehalem_quirk); - pr_cont("erratum AAJ80 worked around, "); - } pr_cont("Nehalem events, "); break; @@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_pmu.quirks = intel_sandybridge_quirks; + x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void) break; } } + return 0; } diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 3b97a80ce32..c99f9ed013d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -116,16 +116,16 @@ void show_registers(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(" Bad EIP value."); + printk(KERN_CONT " Bad EIP value."); break; } if (ip == (u8 *)regs->ip) - printk("<%02x> ", c); + printk(KERN_CONT "<%02x> ", c); else - printk("%02x ", c); + printk(KERN_CONT "%02x ", c); } } - printk("\n"); + printk(KERN_CONT "\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 19853ad8afc..6d728d9284b 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -284,16 +284,16 @@ void show_registers(struct pt_regs *regs) for (i = 0; i < code_len; i++, ip++) { if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - printk(" Bad RIP value."); + printk(KERN_CONT " Bad RIP value."); break; } if (ip == (u8 *)regs->ip) - printk("<%02x> ", c); + printk(KERN_CONT "<%02x> ", c); else - printk("%02x ", c); + printk(KERN_CONT "%02x ", c); } } - printk("\n"); + printk(KERN_CONT "\n"); } int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e48f07..8071e2f3d6e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory); /* * pre allocated 4k and reserved it in memblock and e820_saved */ -u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) +u64 __init early_reserve_e820(u64 size, u64 align) { - u64 size = 0; u64 addr; - u64 start; - for (start = startt; ; start += size) { - start = memblock_x86_find_in_range_size(start, &size, align); - if (start == MEMBLOCK_ERROR) - return 0; - if (size >= sizet) - break; + addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); + if (addr) { + e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); + printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); + update_e820_saved(); } -#ifdef CONFIG_X86_32 - if (start >= MAXMEM) - return 0; - if (start + size > MAXMEM) - size = MAXMEM - start; -#endif - - addr = round_down(start + size - sizet, align); - if (addr < start) - return 0; - memblock_x86_reserve_range(addr, addr + sizet, "new next"); - e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); - update_e820_saved(); - return addr; } @@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void) * We are safe to enable resizing, beause memblock_x86_fill() * is rather later for x86 */ - memblock_can_resize = 1; + memblock_allow_resize(); for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; @@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void) memblock_add(ei->addr, ei->size); } - memblock_analyze(); memblock_dump_all(); } void __init memblock_find_dma_reserve(void) { #ifdef CONFIG_X86_64 - u64 free_size_pfn; - u64 mem_size_pfn; + u64 nr_pages = 0, nr_free_pages = 0; + unsigned long start_pfn, end_pfn; + phys_addr_t start, end; + int i; + u64 u; + /* * need to find out used area below MAX_DMA_PFN * need to use memblock to get free size in [0, MAX_DMA_PFN] * at first, and assume boot_mem will not take below MAX_DMA_PFN */ - mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; - free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; - set_dma_reserve(mem_size_pfn - free_size_pfn); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); + end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); + nr_pages += end_pfn - start_pfn; + } + + for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { + start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); + end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); + if (start_pfn < end_pfn) + nr_free_pages += end_pfn - start_pfn; + } + + set_dma_reserve(nr_pages - nr_free_pages); #endif } diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index af0699ba48c..48d9d4ea102 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -52,5 +52,5 @@ void __init reserve_ebda_region(void) lowmem = 0x9f000; /* reserve all memory between lowmem and the 1MB mark */ - memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); + memblock_reserve(lowmem, 0x100000 - lowmem); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3bb08509a7a..51ff18616d5 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { - memblock_init(); - - memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ @@ -42,7 +41,7 @@ void __init i386_start_kernel(void) u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); + memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5655c2272ad..3a3b779f41d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); - memblock_init(); - - memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ @@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data) unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); + memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); } #endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 429e0c92924..5d31e5bdbf8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -181,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; - exit_idle(); irq_enter(); + exit_idle(); irq = __this_cpu_read(vector_irq[vector]); @@ -209,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs) ack_APIC_irq(); - exit_idle(); - irq_enter(); + exit_idle(); + inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13e..2889b3d4388 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, put_online_cpus(); } -void arch_jump_label_transform_static(struct jump_entry *entry, +__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { __jump_label_transform(entry, type, text_poke_early); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0741b062a30..ca470e4c92d 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early) static void __init smp_reserve_memory(struct mpf_intel *mpf) { - unsigned long size = get_mpc_size(mpf->physptr); - - memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); + memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); } static int __init smp_scan_config(unsigned long base, unsigned long length) @@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf, (u64)virt_to_phys(mpf)); mem = virt_to_phys(mpf); - memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); + memblock_reserve(mem, sizeof(*mpf)); if (mpf->physptr) smp_reserve_memory(mpf); @@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt); void __init early_reserve_e820_mpc_new(void) { - if (enable_update_mptable && alloc_mptable) { - u64 startt = 0; - mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); - } + if (enable_update_mptable && alloc_mptable) + mpc_new_phys = early_reserve_e820(mpc_new_length, 4); } static int __init update_mp_table(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 795b79f984c..485204f58cd 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -99,7 +99,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -116,7 +117,8 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3bd7e6eebf3..64e926c89a6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,7 +122,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { rmb(); @@ -139,8 +139,14 @@ void cpu_idle(void) enter_idle(); /* Don't trace irqs off for idle */ stop_critical_timings(); + + /* enter_idle() needs rcu for notifiers */ + rcu_idle_enter(); + if (cpuidle_idle_call()) pm_idle(); + + rcu_idle_exit(); start_critical_timings(); /* In many cases the interrupt that ended idle @@ -149,7 +155,7 @@ void cpu_idle(void) __exit_idle(); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cf0ef986cb6..d05444ac2ae 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -306,7 +306,8 @@ static void __init cleanup_highmap(void) static void __init reserve_brk(void) { if (_brk_end > _brk_start) - memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); + memblock_reserve(__pa(_brk_start), + __pa(_brk_end) - __pa(_brk_start)); /* Mark brk area as locked down and no longer taking any new allocations */ @@ -331,13 +332,13 @@ static void __init relocate_initrd(void) ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, PAGE_SIZE); - if (ramdisk_here == MEMBLOCK_ERROR) + if (!ramdisk_here) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); + memblock_reserve(ramdisk_here, area_size); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", @@ -393,7 +394,7 @@ static void __init reserve_initrd(void) initrd_start = 0; if (ramdisk_size >= (end_of_lowmem>>1)) { - memblock_x86_free_range(ramdisk_image, ramdisk_end); + memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; @@ -416,7 +417,7 @@ static void __init reserve_initrd(void) relocate_initrd(); - memblock_x86_free_range(ramdisk_image, ramdisk_end); + memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); } #else static void __init reserve_initrd(void) @@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void) { struct setup_data *data; u64 pa_data; - char buf[32]; if (boot_params.hdr.version < 0x0209) return; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); - sprintf(buf, "setup data %x", data->type); - memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); + memblock_reserve(pa_data, sizeof(*data) + data->len); pa_data = data->next; early_iounmap(data, sizeof(*data)); } @@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void) crash_base = memblock_find_in_range(alignment, CRASH_KERNEL_ADDR_MAX, crash_size, alignment); - if (crash_base == MEMBLOCK_ERROR) { + if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } @@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void) return; } } - memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); + memblock_reserve(crash_base, crash_size); printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void) addr = find_ibft_region(&size); if (size) - memblock_x86_reserve_range(addr, addr + size, "* ibft"); + memblock_reserve(addr, size); } static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a91ae7709b4..a73b61055ad 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -14,11 +14,11 @@ void __init setup_trampolines(void) /* Has to be in very low memory so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (mem == MEMBLOCK_ERROR) + if (!mem) panic("Cannot allocate trampoline\n"); x86_trampoline_base = __va(mem); - memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); + memblock_reserve(mem, size); printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", x86_trampoline_base, (unsigned long long)mem, size); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 76e3f1cd036..405f2620392 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) return HRTIMER_NORESTART; } -static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) +static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) { + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; struct kvm_timer *pt = &ps->pit_timer; s64 interval; + if (!irqchip_in_kernel(kvm)) + return; + interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); pr_debug("create pit timer, interval is %llu nsec\n", interval); @@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) /* FIXME: enhance mode 4 precision */ case 4: if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { - create_pit_timer(ps, val, 0); + create_pit_timer(kvm, val, 0); } break; case 2: case 3: if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ - create_pit_timer(ps, val, 1); + create_pit_timer(kvm, val, 1); } break; default: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c38efd7b792..4c938da2ba0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; struct kvm_lapic *apic = vcpu->arch.apic; - u32 timer_mode_mask; best = kvm_find_cpuid_entry(vcpu, 1, 0); if (!best) @@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu) best->ecx |= bit(X86_FEATURE_OSXSAVE); } - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - best->function == 0x1) { - best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); - timer_mode_mask = 3 << 17; - } else - timer_mode_mask = 1 << 17; - - if (apic) - apic->lapic_timer.timer_mode_mask = timer_mode_mask; + if (apic) { + if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER)) + apic->lapic_timer.timer_mode_mask = 3 << 17; + else + apic->lapic_timer.timer_mode_mask = 1 << 17; + } } int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_TSC_CONTROL: r = kvm_has_tsc_control; break; + case KVM_CAP_TSC_DEADLINE_TIMER: + r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER); + break; default: r = 0; break; diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 46fc4ee09fc..88ad5fbda6e 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, const insn_attr_t *table; if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) return 0; - table = inat_avx_tables[vex_m][vex_p]; + /* At first, this checks the master table */ + table = inat_avx_tables[vex_m][0]; if (!table) return 0; + if (!inat_is_group(table[opcode]) && vex_p) { + /* If this is not a group, get attribute directly */ + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + } return table[opcode]; } diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562ed670..5a1f9f3e3fb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr)) + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } @@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn) pfx = insn_last_prefix(insn); insn->attr = inat_get_group_attribute(mod, pfx, insn->attr); + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) + insn->attr = 0; /* This is bad */ } } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index a793da5e560..5b83c51c12e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1,5 +1,11 @@ # x86 Opcode Maps # +# This is (mostly) based on following documentations. +# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 +# (#325383-040US, October 2011) +# - Intel(R) Advanced Vector Extensions Programming Reference +# (#319433-011,JUNE 2011). +# #<Opcode maps> # Table: table-name # Referrer: escaped-name @@ -15,10 +21,13 @@ # EndTable # # AVX Superscripts -# (VEX): this opcode can accept VEX prefix. -# (oVEX): this opcode requires VEX prefix. -# (o128): this opcode only supports 128bit VEX. -# (o256): this opcode only supports 256bit VEX. +# (v): this opcode requires VEX prefix. +# (v1): this opcode only supports 128bit VEX. +# +# Last Prefix Superscripts +# - (66): the last prefix is 0x66 +# - (F3): the last prefix is 0xF3 +# - (F2): the last prefix is 0xF2 # Table: one byte opcode @@ -199,8 +208,8 @@ a0: MOV AL,Ob a1: MOV rAX,Ov a2: MOV Ob,AL a3: MOV Ov,rAX -a4: MOVS/B Xb,Yb -a5: MOVS/W/D/Q Xv,Yv +a4: MOVS/B Yb,Xb +a5: MOVS/W/D/Q Yv,Xv a6: CMPS/B Xb,Yb a7: CMPS/W/D Xv,Yv a8: TEST AL,Ib @@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) -c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) c6: Grp11 Eb,Ib (1A) c7: Grp11 Ev,Iz (1A) c8: ENTER Iw,Ib @@ -320,14 +329,19 @@ AVXcode: 1 # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) -11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) -12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) -13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) -14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) -15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) -16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) -17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) +# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands +# but it actually has operands. And also, vmovss and vmovsd only accept 128bit. +# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. +# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming +# Reference A.1 +10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) +11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) +12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) +13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) +14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) +15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) +16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) +17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: 1a: @@ -345,14 +359,14 @@ AVXcode: 1 25: 26: 27: -28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) -29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) -2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) -2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) -2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) +28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) +29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) +2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) +2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) +2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) +2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -388,65 +402,66 @@ AVXcode: 1 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) -51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) -52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) -53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) -54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) -55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) -56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) -57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) -58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) -59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) -5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) -5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) -5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) -5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) -5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) -5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) +50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) +51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) +52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) +53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) +54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) +55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) +56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) +57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) +58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) +59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) +5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) +5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) +5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) +5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) +5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) -6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) -6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) +60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) +61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) +62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) +63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) +64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) +65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) +66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) +67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) +68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) +69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) +6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) +6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) +6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) +6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) +6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) +70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) -77: emms/vzeroupper/vzeroall (VEX) -78: VMREAD Ed/q,Gd/q -79: VMWRITE Gd/q,Ed/q +74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) +75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) +76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) +# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. +77: emms | vzeroupper | vzeroall +78: VMREAD Ey,Gy +79: VMWRITE Gy,Ey 7a: 7b: -7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) -7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) +7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) +7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) +7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) -82: JB/JNAE/JC Jz (f64) -83: JNB/JAE/JNC Jz (f64) -84: JZ/JE Jz (f64) -85: JNZ/JNE Jz (f64) +82: JB/JC/JNAE Jz (f64) +83: JAE/JNB/JNC Jz (f64) +84: JE/JZ Jz (f64) +85: JNE/JNZ Jz (f64) 86: JBE/JNA Jz (f64) -87: JNBE/JA Jz (f64) +87: JA/JNBE Jz (f64) 88: JS Jz (f64) 89: JNS Jz (f64) 8a: JP/JPE Jz (f64) @@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev -bd: BSR Gv,Ev +bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) -c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) -c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) +c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) +c3: movnti My,Gy +c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) +c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) +c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) -d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) -da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) -db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) -df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) +d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) +d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) +d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) +d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) +d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) +d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) +d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) +d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) +d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) +da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) +dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) +de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) -e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) -eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) +e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) +e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) +e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) +e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) +e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) +e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) +e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) +e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) +ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) +ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) +ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2),(VEX) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) +f0: vlddqu Vx,Mx (F2) +f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) +f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) +f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) +f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) +f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) +f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) +f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) +f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) +f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) +fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) +fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) +fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) +fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) +fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) ff: EndTable @@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) -08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) -09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) -0c: Vpermilps /r (66),(oVEX) -0d: Vpermilpd /r (66),(oVEX) -0e: vtestps /r (66),(oVEX) -0f: vtestpd /r (66),(oVEX) +00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) +01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) +02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) +03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) +04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) +05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) +06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) +07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) +08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) +09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) +0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) +0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) +0c: vpermilps Vx,Hx,Wx (66),(v) +0d: vpermilpd Vx,Hx,Wx (66),(v) +0e: vtestps Vx,Wx (66),(v) +0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: 12: -13: +13: vcvtph2ps Vx,Wx,Ib (66),(v) 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) -16: -17: ptest Vdq,Wdq (66),(VEX) -18: vbroadcastss /r (66),(oVEX) -19: vbroadcastsd /r (66),(oVEX),(o256) -1a: vbroadcastf128 /r (66),(oVEX),(o256) +16: vpermps Vqq,Hqq,Wqq (66),(v) +17: vptest Vx,Wx (66) +18: vbroadcastss Vx,Wd (66),(v) +19: vbroadcastsd Vqq,Wq (66),(v) +1a: vbroadcastf128 Vqq,Mdq (66),(v) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) +1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) +1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) +1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) -21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) -22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) -23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) -24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) -25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) +21: vpmovsxbd Vx,Ux/Md (66),(v1) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) +24: vpmovsxwq Vx,Ux/Md (66),(v1) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) 26: 27: -28: pmuldq Vdq,Wdq (66),(VEX),(o128) -29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) -2a: movntdqa Vdq,Mdq (66),(VEX),(o128) -2b: packusdw Vdq,Wdq (66),(VEX),(o128) -2c: vmaskmovps(ld) /r (66),(oVEX) -2d: vmaskmovpd(ld) /r (66),(oVEX) -2e: vmaskmovps(st) /r (66),(oVEX) -2f: vmaskmovpd(st) /r (66),(oVEX) +28: vpmuldq Vx,Hx,Wx (66),(v1) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) +2a: vmovntdqa Vx,Mx (66),(v1) +2b: vpackusdw Vx,Hx,Wx (66),(v1) +2c: vmaskmovps Vx,Hx,Mx (66),(v) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2e: vmaskmovps Mx,Hx,Vx (66),(v) +2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) -31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) -32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) -33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) -34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) -35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) -36: -37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) -38: pminsb Vdq,Wdq (66),(VEX),(o128) -39: pminsd Vdq,Wdq (66),(VEX),(o128) -3a: pminuw Vdq,Wdq (66),(VEX),(o128) -3b: pminud Vdq,Wdq (66),(VEX),(o128) -3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) -3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) -3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) -3f: pmaxud Vdq,Wdq (66),(VEX),(o128) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) +31: vpmovzxbd Vx,Ux/Md (66),(v1) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) +34: vpmovzxwq Vx,Ux/Md (66),(v1) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) +36: vpermd Vqq,Hqq,Wqq (66),(v) +37: vpcmpgtq Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) +39: vpminsd Vx,Hx,Wx (66),(v1) +3a: vpminuw Vx,Hx,Wx (66),(v1) +3b: vpminud Vx,Hx,Wx (66),(v1) +3c: vpmaxsb Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3e: vpmaxuw Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) # 0x0f 0x38 0x40-0x8f -40: pmulld Vdq,Wdq (66),(VEX),(o128) -41: phminposuw Vdq,Wdq (66),(VEX),(o128) -80: INVEPT Gd/q,Mdq (66) -81: INVPID Gd/q,Mdq (66) +40: vpmulld Vx,Hx,Wx (66),(v1) +41: vphminposuw Vdq,Wdq (66),(v1) +42: +43: +44: +45: vpsrlvd/q Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) +47: vpsllvd/q Vx,Hx,Wx (66),(v) +# Skip 0x48-0x57 +58: vpbroadcastd Vx,Wx (66),(v) +59: vpbroadcastq Vx,Wx (66),(v) +5a: vbroadcasti128 Vqq,Mdq (66),(v) +# Skip 0x5b-0x77 +78: vpbroadcastb Vx,Wx (66),(v) +79: vpbroadcastw Vx,Wx (66),(v) +# Skip 0x7a-0x7f +80: INVEPT Gy,Mdq (66) +81: INVPID Gy,Mdq (66) +82: INVPCID Gy,Mdq (66) +8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -96: vfmaddsub132pd/ps /r (66),(VEX) -97: vfmsubadd132pd/ps /r (66),(VEX) -98: vfmadd132pd/ps /r (66),(VEX) -99: vfmadd132sd/ss /r (66),(VEX),(o128) -9a: vfmsub132pd/ps /r (66),(VEX) -9b: vfmsub132sd/ss /r (66),(VEX),(o128) -9c: vfnmadd132pd/ps /r (66),(VEX) -9d: vfnmadd132sd/ss /r (66),(VEX),(o128) -9e: vfnmsub132pd/ps /r (66),(VEX) -9f: vfnmsub132sd/ss /r (66),(VEX),(o128) -a6: vfmaddsub213pd/ps /r (66),(VEX) -a7: vfmsubadd213pd/ps /r (66),(VEX) -a8: vfmadd213pd/ps /r (66),(VEX) -a9: vfmadd213sd/ss /r (66),(VEX),(o128) -aa: vfmsub213pd/ps /r (66),(VEX) -ab: vfmsub213sd/ss /r (66),(VEX),(o128) -ac: vfnmadd213pd/ps /r (66),(VEX) -ad: vfnmadd213sd/ss /r (66),(VEX),(o128) -ae: vfnmsub213pd/ps /r (66),(VEX) -af: vfnmsub213sd/ss /r (66),(VEX),(o128) -b6: vfmaddsub231pd/ps /r (66),(VEX) -b7: vfmsubadd231pd/ps /r (66),(VEX) -b8: vfmadd231pd/ps /r (66),(VEX) -b9: vfmadd231sd/ss /r (66),(VEX),(o128) -ba: vfmsub231pd/ps /r (66),(VEX) -bb: vfmsub231sd/ss /r (66),(VEX),(o128) -bc: vfnmadd231pd/ps /r (66),(VEX) -bd: vfnmadd231sd/ss /r (66),(VEX),(o128) -be: vfnmsub231pd/ps /r (66),(VEX) -bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +90: vgatherdd/q Vx,Hx,Wx (66),(v) +91: vgatherqd/q Vx,Hx,Wx (66),(v) +92: vgatherdps/d Vx,Hx,Wx (66),(v) +93: vgatherqps/d Vx,Hx,Wx (66),(v) +94: +95: +96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) +97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) +98: vfmadd132ps/d Vx,Hx,Wx (66),(v) +99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) +9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) +9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) +9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) +9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) +a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) +a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) +a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) +ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) +ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) +ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) +af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) +b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) +b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) +b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) +bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) +bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) +bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) +be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) +bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -db: aesimc Vdq,Wdq (66),(VEX),(o128) -dc: aesenc Vdq,Wdq (66),(VEX),(o128) -dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -de: aesdec Vdq,Wdq (66),(VEX),(o128) -df: aesdeclast Vdq,Wdq (66),(VEX),(o128) -f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) -f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +db: VAESIMC Vdq,Wdq (66),(v1) +dc: VAESENC Vdq,Hdq,Wdq (66),(v1) +dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) +de: VAESDEC Vdq,Hdq,Wdq (66),(v1) +df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) +f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) +f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) +f3: ANDN Gy,By,Ey (v) +f4: Grp17 (1A) +f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) +f6: MULX By,Gy,rDX,Ey (F2),(v) +f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 AVXcode: 3 # 0x0f 0x3a 0x00-0xff -04: vpermilps /r,Ib (66),(oVEX) -05: vpermilpd /r,Ib (66),(oVEX) -06: vperm2f128 /r,Ib (66),(oVEX),(o256) -08: roundps Vdq,Wdq,Ib (66),(VEX) -09: roundpd Vdq,Wdq,Ib (66),(VEX) -0a: roundss Vss,Wss,Ib (66),(VEX),(o128) -0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) -0c: blendps Vdq,Wdq,Ib (66),(VEX) -0d: blendpd Vdq,Wdq,Ib (66),(VEX) -0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) -14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) -15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) -16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) -17: extractps Ed,Vdq,Ib (66),(VEX),(o128) -18: vinsertf128 /r,Ib (66),(oVEX),(o256) -19: vextractf128 /r,Ib (66),(oVEX),(o256) -20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) -21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) -40: dpps Vdq,Wdq,Ib (66),(VEX) -41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) -42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) -44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) -4a: vblendvps /r,Ib (66),(oVEX) -4b: vblendvpd /r,Ib (66),(oVEX) -4c: vpblendvb /r,Ib (66),(oVEX),(o128) -60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) -61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) -62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) -63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) -df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) +00: vpermq Vqq,Wqq,Ib (66),(v) +01: vpermpd Vqq,Wqq,Ib (66),(v) +02: vpblendd Vx,Hx,Wx,Ib (66),(v) +03: +04: vpermilps Vx,Wx,Ib (66),(v) +05: vpermilpd Vx,Wx,Ib (66),(v) +06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) +07: +08: vroundps Vx,Wx,Ib (66) +09: vroundpd Vx,Wx,Ib (66) +0a: vroundss Vss,Wss,Ib (66),(v1) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) +0c: vblendps Vx,Hx,Wx,Ib (66) +0d: vblendpd Vx,Hx,Wx,Ib (66) +0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) +0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) +14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) +15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) +16: vpextrd/q Ey,Vdq,Ib (66),(v1) +17: vextractps Ed,Vdq,Ib (66),(v1) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) +19: vextractf128 Wdq,Vqq,Ib (66),(v) +1d: vcvtps2ph Wx,Vx,Ib (66),(v) +20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) +21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) +22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) +39: vextracti128 Wdq,Vqq,Ib (66),(v) +40: vdpps Vx,Hx,Wx,Ib (66) +41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) +46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) +4a: vblendvps Vx,Hx,Wx,Lx (66),(v) +4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) +4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) +61: vpcmpestri Vdq,Wdq,Ib (66),(v1) +62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) +63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) +f0: RORX Gy,Ey,Ib (F2),(v) EndTable GrpTable: Grp1 @@ -790,7 +843,7 @@ GrpTable: Grp5 2: CALLN Ev (f64) 3: CALLF Ep 4: JMPN Ev (f64) -5: JMPF Ep +5: JMPF Mp 6: PUSH Ev (d64) 7: EndTable @@ -807,7 +860,7 @@ EndTable GrpTable: Grp7 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: @@ -824,44 +877,45 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) -7: VMPTRST Mq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +7: VMPTRST Mq | VMPTRST Mq (F3) EndTable GrpTable: Grp10 EndTable GrpTable: Grp11 +# Note: the operands are given by group opcode 0: MOV EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) +2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) +4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) +6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) +2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) -3: psrldq Udq,Ib (66),(11B),(VEX),(o128) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) -7: pslldq Udq,Ib (66),(11B),(VEX),(o128) +2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) +3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) +6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) +7: vpslldq Hx,Ux,Ib (66),(11B),(v1) EndTable GrpTable: Grp15 -0: fxsave -1: fxstor -2: ldmxcsr (VEX) -3: stmxcsr (VEX) +0: fxsave | RDFSBASE Ry (F3),(11B) +1: fxstor | RDGSBASE Ry (F3),(11B) +2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) +3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: mfence (11B) +6: XSAVEOPT | mfence (11B) 7: clflush | sfence (11B) EndTable @@ -872,6 +926,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: Grp17 +1: BLSR By,Ey (v) +2: BLSMSK By,Ey (v) +3: BLSI By,Ey (v) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 3d11327c9ab..23d8e5fecf7 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o - obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 87488b93a65..a298914058f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, good_end = max_pfn_mapped << PAGE_SHIFT; base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); - if (base == MEMBLOCK_ERROR) + if (!base) panic("Cannot find space for the kernel page tables"); pgt_buf_start = base >> PAGE_SHIFT; @@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, void __init native_pagetable_reserve(u64 start, u64 end) { - memblock_x86_reserve_range(start, end, "PGTABLE"); + memblock_reserve(start, end - start); } struct map_range { @@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) * so that they can be reused for other purposes. * - * On native it just means calling memblock_x86_reserve_range, on Xen it - * also means marking RW the pagetable pages that we allocated before + * On native it just means calling memblock_reserve, on Xen it also + * means marking RW the pagetable pages that we allocated before * but that haven't been used. * * In fact on xen we mark RO the whole range pgt_buf_start - diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 29f7c6d9817..0c1da394a63 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page) void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn) { - struct range *range; - int nr_range; - int i; - - nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); - - for (i = 0; i < nr_range; i++) { - struct page *page; - int node_pfn; - - for (node_pfn = range[i].start; node_pfn < range[i].end; - node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page); - } + phys_addr_t start, end; + u64 i; + + for_each_free_mem_range(i, nid, &start, &end, NULL) { + unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), + start_pfn, end_pfn); + unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), + start_pfn, end_pfn); + for ( ; pfn < e_pfn; pfn++) + if (pfn_valid(pfn)) + add_one_highpage_init(pfn_to_page(pfn)); } } #else @@ -650,18 +644,18 @@ void __init initmem_init(void) highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; - memblock_x86_register_active_regions(0, 0, highend_pfn); - sparse_memory_present_with_active_regions(0); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - memblock_x86_register_active_regions(0, 0, max_low_pfn); - sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif + + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + sparse_memory_present_with_active_regions(0); + #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bbaaa005bf0..a8a56ce3a96 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start, #ifndef CONFIG_NUMA void __init initmem_init(void) { - memblock_x86_register_active_regions(0, 0, max_pfn); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); } #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c deleted file mode 100644 index 992da5ec5a6..00000000000 --- a/arch/x86/mm/memblock.c +++ /dev/null @@ -1,348 +0,0 @@ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/memblock.h> -#include <linux/bootmem.h> -#include <linux/mm.h> -#include <linux/range.h> - -/* Check for already reserved areas */ -bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align) -{ - struct memblock_region *r; - u64 addr = *addrp, last; - u64 size = *sizep; - bool changed = false; - -again: - last = addr + size; - for_each_memblock(reserved, r) { - if (last > r->base && addr < r->base) { - size = r->base - addr; - changed = true; - goto again; - } - if (last > (r->base + r->size) && addr < (r->base + r->size)) { - addr = round_up(r->base + r->size, align); - size = last - addr; - changed = true; - goto again; - } - if (last <= (r->base + r->size) && addr >= r->base) { - *sizep = 0; - return false; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find next free range after start, and size is returned in *sizep - */ -u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) -{ - struct memblock_region *r; - - for_each_memblock(memory, r) { - u64 ei_start = r->base; - u64 ei_last = ei_start + r->size; - u64 addr; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (memblock_x86_check_reserved_size(&addr, sizep, align)) - ; - - if (*sizep) - return addr; - } - - return MEMBLOCK_ERROR; -} - -static __init struct range *find_range_array(int count) -{ - u64 end, size, mem; - struct range *range; - - size = sizeof(struct range) * count; - end = memblock.current_limit; - - mem = memblock_find_in_range(0, end, size, sizeof(struct range)); - if (mem == MEMBLOCK_ERROR) - panic("can not find more space for range array"); - - /* - * This range is tempoaray, so don't reserve it, it will not be - * overlapped because We will not alloccate new buffer before - * We discard this one - */ - range = __va(mem); - memset(range, 0, size); - - return range; -} - -static void __init memblock_x86_subtract_reserved(struct range *range, int az) -{ - u64 final_start, final_end; - struct memblock_region *r; - - /* Take out region array itself at first*/ - memblock_free_reserved_regions(); - - memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt); - - for_each_memblock(reserved, r) { - memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); - final_start = PFN_DOWN(r->base); - final_end = PFN_UP(r->base + r->size); - if (final_start >= final_end) - continue; - subtract_range(range, az, final_start, final_end); - } - - /* Put region array back ? */ - memblock_reserve_reserved_regions(); -} - -struct count_data { - int nr; -}; - -static int __init count_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct count_data *data = datax; - - data->nr++; - - return 0; -} - -static int __init count_early_node_map(int nodeid) -{ - struct count_data data; - - data.nr = 0; - work_with_active_regions(nodeid, count_work_fn, &data); - - return data.nr; -} - -int __init __get_free_all_memory_range(struct range **rangep, int nodeid, - unsigned long start_pfn, unsigned long end_pfn) -{ - int count; - struct range *range; - int nr_range; - - count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2; - - range = find_range_array(count); - nr_range = 0; - - /* - * Use early_node_map[] and memblock.reserved.region to get range array - * at first - */ - nr_range = add_from_early_node_map(range, count, nr_range, nodeid); - subtract_range(range, count, 0, start_pfn); - subtract_range(range, count, end_pfn, -1ULL); - - memblock_x86_subtract_reserved(range, count); - nr_range = clean_sort_range(range, count); - - *rangep = range; - return nr_range; -} - -int __init get_free_all_memory_range(struct range **rangep, int nodeid) -{ - unsigned long end_pfn = -1UL; - -#ifdef CONFIG_X86_32 - end_pfn = max_low_pfn; -#endif - return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn); -} - -static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) -{ - int i, count; - struct range *range; - int nr_range; - u64 final_start, final_end; - u64 free_size; - struct memblock_region *r; - - count = (memblock.reserved.cnt + memblock.memory.cnt) * 2; - - range = find_range_array(count); - nr_range = 0; - - addr = PFN_UP(addr); - limit = PFN_DOWN(limit); - - for_each_memblock(memory, r) { - final_start = PFN_UP(r->base); - final_end = PFN_DOWN(r->base + r->size); - if (final_start >= final_end) - continue; - if (final_start >= limit || final_end <= addr) - continue; - - nr_range = add_range(range, count, nr_range, final_start, final_end); - } - subtract_range(range, count, 0, addr); - subtract_range(range, count, limit, -1ULL); - - /* Subtract memblock.reserved.region in range ? */ - if (!get_free) - goto sort_and_count_them; - for_each_memblock(reserved, r) { - final_start = PFN_DOWN(r->base); - final_end = PFN_UP(r->base + r->size); - if (final_start >= final_end) - continue; - if (final_start >= limit || final_end <= addr) - continue; - - subtract_range(range, count, final_start, final_end); - } - -sort_and_count_them: - nr_range = clean_sort_range(range, count); - - free_size = 0; - for (i = 0; i < nr_range; i++) - free_size += range[i].end - range[i].start; - - return free_size << PAGE_SHIFT; -} - -u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) -{ - return __memblock_x86_memory_in_range(addr, limit, true); -} - -u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit) -{ - return __memblock_x86_memory_in_range(addr, limit, false); -} - -void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) -{ - if (start == end) - return; - - if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end)) - return; - - memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name); - - memblock_reserve(start, end - start); -} - -void __init memblock_x86_free_range(u64 start, u64 end) -{ - if (start == end) - return; - - if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end)) - return; - - memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1); - - memblock_free(start, end - start); -} - -/* - * Need to call this function after memblock_x86_register_active_regions, - * so early_node_map[] is filled already. - */ -u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align) -{ - u64 addr; - addr = find_memory_core_early(nid, size, align, start, end); - if (addr != MEMBLOCK_ERROR) - return addr; - - /* Fallback, should already have start end within node range */ - return memblock_find_in_range(start, end, size, align); -} - -/* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the memblock entry. - */ -static int __init memblock_x86_find_active_region(const struct memblock_region *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} - -/* Walk the memblock.memory map and register active regions within a node */ -void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - struct memblock_region *r; - - for_each_memblock(memory, r) - if (memblock_x86_find_active_region(r, start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init memblock_x86_hole_size(u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - struct memblock_region *r; - - for_each_memblock(memory, r) - if (memblock_x86_find_active_region(r, start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - - return end - start - ((u64)ram << PAGE_SHIFT); -} diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 92faf3a1c53..c80b9fb9573 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) (unsigned long long) pattern, (unsigned long long) start_bad, (unsigned long long) end_bad); - memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); + memblock_reserve(start_bad, end_bad - start_bad); } static void __init memtest(u64 pattern, u64 start_phys, u64 size) @@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size) static void __init do_one_pass(u64 pattern, u64 start, u64 end) { - u64 size = 0; - - while (start < end) { - start = memblock_x86_find_in_range_size(start, &size, 1); - - /* done ? */ - if (start >= end) - break; - if (start + size > end) - size = end - start; - - printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", - (unsigned long long) start, - (unsigned long long) start + size, - (unsigned long long) cpu_to_be64(pattern)); - memtest(pattern, start, size); - - start += size; + u64 i; + phys_addr_t this_start, this_end; + + for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { + this_start = clamp_t(phys_addr_t, this_start, start, end); + this_end = clamp_t(phys_addr_t, this_end, start, end); + if (this_start < this_end) { + printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", + (unsigned long long)this_start, + (unsigned long long)this_end, + (unsigned long long)cpu_to_be64(pattern)); + memtest(pattern, this_start, this_end - this_start); + } } } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fbeaaf41661..496f494593b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start, u64 end) { - const u64 nd_low = PFN_PHYS(MAX_DMA_PFN); - const u64 nd_high = PFN_PHYS(max_pfn_mapped); const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); bool remapped = false; u64 nd_pa; @@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end) nd_pa = __pa(nd); remapped = true; } else { - nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, - nd_size, SMP_CACHE_BYTES); - if (nd_pa == MEMBLOCK_ERROR) - nd_pa = memblock_find_in_range(nd_low, nd_high, - nd_size, SMP_CACHE_BYTES); - if (nd_pa == MEMBLOCK_ERROR) { + nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + if (!nd_pa) { pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid); return; } - memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA"); nd = __va(nd_pa); } @@ -371,8 +364,7 @@ void __init numa_reset_distance(void) /* numa_distance could be 1LU marking allocation failure, test cnt */ if (numa_distance_cnt) - memblock_x86_free_range(__pa(numa_distance), - __pa(numa_distance) + size); + memblock_free(__pa(numa_distance), size); numa_distance_cnt = 0; numa_distance = NULL; /* enable table creation */ } @@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), size, PAGE_SIZE); - if (phys == MEMBLOCK_ERROR) { + if (!phys) { pr_warning("NUMA: Warning: can't allocate distance table!\n"); /* don't retry until explicitly reset */ numa_distance = (void *)1LU; return -ENOMEM; } - memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); + memblock_reserve(phys, size); numa_distance = __va(phys); numa_distance_cnt = cnt; @@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) numaram = 0; } - e820ram = max_pfn - (memblock_x86_hole_size(0, - PFN_PHYS(max_pfn)) >> PAGE_SHIFT); + e820ram = max_pfn - absent_pages_in_range(0, max_pfn); + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", @@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) if (WARN_ON(nodes_empty(node_possible_map))) return -EINVAL; - for (i = 0; i < mi->nr_blks; i++) - memblock_x86_register_active_regions(mi->blk[i].nid, - mi->blk[i].start >> PAGE_SHIFT, - mi->blk[i].end >> PAGE_SHIFT); - - /* for out of order entries */ - sort_node_map(); + for (i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *mb = &mi->blk[i]; + memblock_set_node(mb->start, mb->end - mb->start, mb->nid); + } /* * If sections array is gonna be used for pfn -> nid mapping, check @@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) setup_node_data(nid, start, end); } + /* Dump memblock with node info and return. */ + memblock_dump_all(); return 0; } @@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(node_possible_map); nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - remove_all_active_ranges(); + WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); numa_reset_distance(); ret = init_func(); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 3adebe7e536..534255a36b6 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end) /* allocate node memory and the lowmem remap area */ node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); - if (node_pa == MEMBLOCK_ERROR) { + if (!node_pa) { pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", size, nid); return; } - memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); + memblock_reserve(node_pa, size); remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT, size, LARGE_PAGE_BYTES); - if (remap_pa == MEMBLOCK_ERROR) { + if (!remap_pa) { pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", size, nid); - memblock_x86_free_range(node_pa, node_pa + size); + memblock_free(node_pa, size); return; } - memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); + memblock_reserve(remap_pa, size); remap_va = phys_to_virt(remap_pa); /* perform actual remap */ diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index dd27f401f0a..92e27119ee1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void) for_each_online_node(i) pages += free_all_bootmem_node(NODE_DATA(i)); - pages += free_all_memory_core_early(MAX_NUMNODES); + pages += free_low_memory_core_early(MAX_NUMNODES); return pages; } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index d0ed086b624..46db56845f1 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) return -ENOENT; } +static u64 mem_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = PFN_UP(start); + unsigned long end_pfn = PFN_DOWN(end); + + if (start_pfn < end_pfn) + return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn)); + return 0; +} + /* * Sets up nid to range from @start to @end. The return value is -errno if * something went wrong, 0 otherwise. @@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, * Calculate target node size. x86_32 freaks on __udivdi3() so do * the division in ulong number of pages and convert back. */ - size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); + size = max_addr - addr - mem_hole_size(addr, max_addr); size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); /* @@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, * Continue to add memory to this fake node if its * non-reserved memory is less than the per-node size. */ - while (end - start - - memblock_x86_hole_size(start, end) < size) { + while (end - start - mem_hole_size(start, end) < size) { end += FAKE_NODE_MIN_SIZE; if (end > limit) { end = limit; @@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, * this one must extend to the boundary. */ if (end < dma32_end && dma32_end - end - - memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) + mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) end = dma32_end; /* @@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, * next node, this one must extend to the end of the * physical node. */ - if (limit - end - - memblock_x86_hole_size(end, limit) < size) + if (limit - end - mem_hole_size(end, limit) < size) end = limit; ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, @@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) { u64 end = start + size; - while (end - start - memblock_x86_hole_size(start, end) < size) { + while (end - start - mem_hole_size(start, end) < size) { end += FAKE_NODE_MIN_SIZE; if (end > max_addr) { end = max_addr; @@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, * creates a uniform distribution of node sizes across the entire * machine (but not necessarily over physical nodes). */ - min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / - MAX_NUMNODES; + min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES; min_size = max(min_size, FAKE_NODE_MIN_SIZE); if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) min_size = (min_size + FAKE_NODE_MIN_SIZE) & @@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, * this one must extend to the boundary. */ if (end < dma32_end && dma32_end - end - - memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) + mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) end = dma32_end; /* @@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, * next node, this one must extend to the end of the * physical node. */ - if (limit - end - - memblock_x86_hole_size(end, limit) < size) + if (limit - end - mem_hole_size(end, limit) < size) end = limit; ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, @@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); - if (phys == MEMBLOCK_ERROR) { + if (!phys) { pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); goto no_emu; } - memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); + memblock_reserve(phys, phys_size); phys_dist = __va(phys); for (i = 0; i < numa_dist_cnt; i++) @@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) /* free the copied physical distance table */ if (phys_dist) - memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); + memblock_free(__pa(phys_dist), phys_size); return; no_emu: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bfab3fa10ed..7b65f752c5f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -568,8 +568,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; break; } if (filter[i].jt != 0) { - if (filter[i].jf) - t_offset += is_near(f_offset) ? 2 : 6; + if (filter[i].jf && f_offset) + t_offset += is_near(f_offset) ? 2 : 5; EMIT_COND_JMP(t_op, t_offset); if (filter[i].jf) EMIT_JMP(f_offset); diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b..1599f568f0e 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index f148cf65267..9e138d00ad3 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -16,37 +16,23 @@ * with the NMI mode driver. */ +#ifdef CONFIG_X86_LOCAL_APIC extern int op_nmi_init(struct oprofile_operations *ops); -extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +#else +static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } +static void op_nmi_exit(void) { } +#endif -static int nmi_timer; +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations *ops) { - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif - nmi_timer = (ret != 0); -#ifdef CONFIG_X86_IO_APIC - if (nmi_timer) - ret = op_nmi_timer_init(ops); -#endif ops->backtrace = x86_backtrace; - - return ret; + return op_nmi_init(ops); } - void oprofile_arch_exit(void) { -#ifdef CONFIG_X86_LOCAL_APIC - if (!nmi_timer) - op_nmi_exit(); -#endif + op_nmi_exit(); } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 75f9528e037..26b8a8514ee 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type) return 0; } -static int force_arch_perfmon; -static int force_cpu_type(const char *str, struct kernel_param *kp) +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "arch_perfmon")) { - force_arch_perfmon = 1; + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; } return 0; } -module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) return 0; /* @@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 7f8052cd662..00000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,50 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo <zwane@linuxpower.ca> - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/oprofile.h> -#include <linux/rcupdate.h> -#include <linux/kdebug.h> - -#include <asm/nmi.h> -#include <asm/apic.h> -#include <asm/ptrace.h> - -static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) -{ - oprofile_add_sample(regs, 0); - return NMI_HANDLED; -} - -static int timer_start(void) -{ - if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, - 0, "oprofile-timer")) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 37718f0f053..4a01967f02e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -352,8 +352,7 @@ void __init efi_memblock_x86_reserve_range(void) boot_params.efi_info.efi_memdesc_size; memmap.desc_version = boot_params.efi_info.efi_memdesc_version; memmap.desc_size = boot_params.efi_info.efi_memdesc_size; - memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, - "EFI memmap"); + memblock_reserve(pmap, memmap.nr_map * memmap.desc_size); } #if EFI_DEBUG @@ -397,16 +396,14 @@ void __init efi_reserve_boot_services(void) if ((start+size >= virt_to_phys(_text) && start <= virt_to_phys(_end)) || !e820_all_mapped(start, start+size, E820_RAM) || - memblock_x86_check_reserved_size(&start, &size, - 1<<EFI_PAGE_SHIFT)) { + memblock_is_region_reserved(start, size)) { /* Could not reserve, skip it */ md->num_pages = 0; memblock_dbg(PFX "Could not reserve boot range " "[0x%010llx-0x%010llx]\n", start, start+size-1); } else - memblock_x86_reserve_range(start, start+size, - "EFI Boot"); + memblock_reserve(start, size); } } diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index f8208267733..d511aa97533 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk quiet_cmd_posttest = TEST $@ cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) -posttest: $(obj)/test_get_len vmlinux +quiet_cmd_sanitytest = TEST $@ + cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 + +posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity $(call cmd,posttest) + $(call cmd,sanitytest) -hostprogs-y := test_get_len +hostprogs-y += test_get_len insn_sanity # -I needed for generated C source and C source which in the kernel tree. HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ +HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + # Dependencies are also needed. $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index eaf11f52fc0..5f6a5b6c3a1 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -47,7 +47,7 @@ BEGIN { sep_expr = "^\\|$" group_expr = "^Grp[0-9A-Za-z]+" - imm_expr = "^[IJAO][a-z]" + imm_expr = "^[IJAOL][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" @@ -59,6 +59,7 @@ BEGIN { imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" @@ -70,8 +71,12 @@ BEGIN { lprefix3_expr = "\\(F2\\)" max_lprefix = 4 - vexok_expr = "\\(VEX\\)" - vexonly_expr = "\\(oVEX\\)" + # All opcodes starting with lower-case 'v' or with (v1) superscript + # accepts VEX prefix + vexok_opcode_expr = "^v.*" + vexok_expr = "\\(v1\\)" + # All opcodes with (v) superscript supports *only* VEX prefix + vexonly_expr = "\\(v\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" @@ -85,8 +90,8 @@ BEGIN { prefix_num["SEG=GS"] = "INAT_PFX_GS" prefix_num["SEG=SS"] = "INAT_PFX_SS" prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" - prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" - prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" clear_vars() } @@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(opcode, fpu_expr)) flags = add_flags(flags, "INAT_MODRM") - # check VEX only code + # check VEX codes if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") - - # check VEX only code - if (match(ext, vexok_expr)) + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") # check prefixes diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c new file mode 100644 index 00000000000..cc2f8c13128 --- /dev/null +++ b/arch/x86/tools/insn_sanity.c @@ -0,0 +1,275 @@ +/* + * x86 decoder sanity test - based on test_get_insn.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + * Copyright (C) Hitachi, Ltd., 2011 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#define unlikely(cond) (cond) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#include <asm/insn.h> +#include <inat.c> +#include <insn.c> + +/* + * Test of instruction analysis against tampering. + * Feed random binary to instruction decoder and ensure not to + * access out-of-instruction-buffer. + */ + +#define DEFAULT_MAX_ITER 10000 +#define INSN_NOP 0x90 + +static const char *prog; /* Program name */ +static int verbose; /* Verbosity */ +static int x86_64; /* x86-64 bit mode flag */ +static unsigned int seed; /* Random seed */ +static unsigned long iter_start; /* Start of iteration number */ +static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */ +static FILE *input_file; /* Input file name */ + +static void usage(const char *err) +{ + if (err) + fprintf(stderr, "Error: %s\n\n", err); + fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n"); + fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); + fprintf(stderr, "\t-m Give a maximum iteration number\n"); + fprintf(stderr, "\t-i Give an input file with decoded binary\n"); + exit(1); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = {\n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, + unsigned char *insn_buf, struct insn *insn) +{ + int i; + + fprintf(fp, "%s:\n", msg); + + dump_insn(fp, insn); + + fprintf(fp, "You can reproduce this with below command(s);\n"); + + /* Input a decoded instruction sequence directly */ + fprintf(fp, " $ echo "); + for (i = 0; i < MAX_INSN_SIZE; i++) + fprintf(fp, " %02x", insn_buf[i]); + fprintf(fp, " | %s -i -\n", prog); + + if (!input_file) { + fprintf(fp, "Or \n"); + /* Give a seed and iteration number */ + fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter); + } +} + +static void init_random_seed(void) +{ + int fd; + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + goto fail; + + if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) + goto fail; + + close(fd); + return; +fail: + usage("Failed to open /dev/urandom"); +} + +/* Read given instruction sequence from the input file */ +static int read_next_insn(unsigned char *insn_buf) +{ + char buf[256] = "", *tmp; + int i; + + tmp = fgets(buf, ARRAY_SIZE(buf), input_file); + if (tmp == NULL || feof(input_file)) + return 0; + + for (i = 0; i < MAX_INSN_SIZE; i++) { + insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); + if (*tmp != ' ') + break; + } + + return i; +} + +static int generate_insn(unsigned char *insn_buf) +{ + int i; + + if (input_file) + return read_next_insn(insn_buf); + + /* Fills buffer with random binary up to MAX_INSN_SIZE */ + for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) + *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; + + while (i < MAX_INSN_SIZE) + insn_buf[i++] = random() & 0xff; + + return i; +} + +static void parse_args(int argc, char **argv) +{ + int c; + char *tmp = NULL; + int set_seed = 0; + + prog = argv[0]; + while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose++; + break; + case 'i': + if (strcmp("-", optarg) == 0) + input_file = stdin; + else + input_file = fopen(optarg, "r"); + if (!input_file) + usage("Failed to open input file"); + break; + case 's': + seed = (unsigned int)strtoul(optarg, &tmp, 0); + if (*tmp == ',') { + optarg = tmp + 1; + iter_start = strtoul(optarg, &tmp, 0); + } + if (*tmp != '\0' || tmp == optarg) + usage("Failed to parse seed"); + set_seed = 1; + break; + case 'm': + iter_end = strtoul(optarg, &tmp, 0); + if (*tmp != '\0' || tmp == optarg) + usage("Failed to parse max_iter"); + break; + default: + usage(NULL); + } + } + + /* Check errors */ + if (iter_end < iter_start) + usage("Max iteration number must be bigger than iter-num"); + + if (set_seed && input_file) + usage("Don't use input file (-i) with random seed (-s)"); + + /* Initialize random seed */ + if (!input_file) { + if (!set_seed) /* No seed is given */ + init_random_seed(); + srand(seed); + } +} + +int main(int argc, char **argv) +{ + struct insn insn; + int insns = 0; + int errors = 0; + unsigned long i; + unsigned char insn_buf[MAX_INSN_SIZE * 2]; + + parse_args(argc, argv); + + /* Prepare stop bytes with NOPs */ + memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); + + for (i = 0; i < iter_end; i++) { + if (generate_insn(insn_buf) <= 0) + break; + + if (i < iter_start) /* Skip to given iteration number */ + continue; + + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + + if (insn.next_byte <= insn.kaddr || + insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { + /* Access out-of-range memory */ + dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); + errors++; + } else if (verbose && !insn_complete(&insn)) + dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); + else if (verbose >= 2) + dump_insn(stdout, &insn); + insns++; + } + + fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); + + return errors ? 1 : 0; +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1f928659c33..12eb07bfb26 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void) local_irq_disable(); early_boot_irqs_disabled = true; - memblock_init(); - xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); xen_ident_map_ISA(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 87f6673b120..f4bf8aa574f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, __xen_write_cr3(true, __pa(pgd)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_x86_reserve_range(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); + memblock_reserve(__pa(xen_start_info->pt_base), + xen_start_info->nr_pt_frames * PAGE_SIZE); return pgd; } @@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, PFN_DOWN(__pa(initial_page_table))); xen_write_cr3(__pa(initial_page_table)); - memblock_x86_reserve_range(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); + memblock_reserve(__pa(xen_start_info->pt_base), + xen_start_info->nr_pt_frames * PAGE_SIZE)); return initial_page_table; } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1093f80c162..e03c6369217 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size) if (i == XEN_EXTRA_MEM_MAX_REGIONS) printk(KERN_WARNING "Warning: not enough extra memory regions\n"); - memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); + memblock_reserve(start, size); xen_max_p2m_pfn = PFN_DOWN(start + size); @@ -173,9 +173,21 @@ static unsigned long __init xen_get_max_pages(void) domid_t domid = DOMID_SELF; int ret; - ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); - if (ret > 0) - max_pages = ret; + /* + * For the initial domain we use the maximum reservation as + * the maximum page. + * + * For guest domains the current maximum reservation reflects + * the current maximum rather than the static maximum. In this + * case the e820 map provided to us will cover the static + * maximum region. + */ + if (xen_initial_domain()) { + ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); + if (ret > 0) + max_pages = ret; + } + return min(max_pages, MAX_DOMAIN_PAGES); } @@ -299,9 +311,8 @@ char * __init xen_memory_setup(void) * - xen_start_info * See comment above "struct start_info" in <xen/interface/xen.h> */ - memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), - __pa(xen_start_info->pt_base), - "XEN START INFO"); + memblock_reserve(__pa(xen_start_info->mfn_list), + xen_start_info->pt_base - xen_start_info->mfn_list); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |