summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Makefile2
-rw-r--r--arch/powerpc/boot/dts/a3m071.dts144
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020si-post.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi6
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi85
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/include/asm/Kbuild2
-rw-r--r--arch/powerpc/include/asm/bitops.h75
-rw-r--r--arch/powerpc/include/asm/cputable.h12
-rw-r--r--arch/powerpc/include/asm/cputime.h2
-rw-r--r--arch/powerpc/include/asm/dbell.h2
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h1
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h83
-rw-r--r--arch/powerpc/include/asm/exception-64s.h97
-rw-r--r--arch/powerpc/include/asm/firmware.h4
-rw-r--r--arch/powerpc/include/asm/fsl_gtm.h2
-rw-r--r--arch/powerpc/include/asm/fsl_guts.h4
-rw-r--r--arch/powerpc/include/asm/fsl_hcalls.h36
-rw-r--r--arch/powerpc/include/asm/hvcall.h23
-rw-r--r--arch/powerpc/include/asm/immap_qe.h2
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h12
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h33
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h29
-rw-r--r--arch/powerpc/include/asm/kvm_host.h68
-rw-r--r--arch/powerpc/include/asm/kvm_para.h15
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h87
-rw-r--r--arch/powerpc/include/asm/machdep.h39
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h10
-rw-r--r--arch/powerpc/include/asm/mmu.h1
-rw-r--r--arch/powerpc/include/asm/oprofile_impl.h2
-rw-r--r--arch/powerpc/include/asm/pSeries_reconfig.h47
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h9
-rw-r--r--arch/powerpc/include/asm/prom.h16
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h2
-rw-r--r--arch/powerpc/include/asm/qe.h2
-rw-r--r--arch/powerpc/include/asm/qe_ic.h2
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/include/asm/reg_booke.h7
-rw-r--r--arch/powerpc/include/asm/rtas.h5
-rw-r--r--arch/powerpc/include/asm/setup.h29
-rw-r--r--arch/powerpc/include/asm/smp.h8
-rw-r--r--arch/powerpc/include/asm/smu.h4
-rw-r--r--arch/powerpc/include/asm/systbl.h3
-rw-r--r--arch/powerpc/include/asm/ucc.h2
-rw-r--r--arch/powerpc/include/asm/ucc_fast.h2
-rw-r--r--arch/powerpc/include/asm/ucc_slow.h2
-rw-r--r--arch/powerpc/include/asm/udbg.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/epapr_hcalls.h98
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h86
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h7
-rw-r--r--arch/powerpc/include/uapi/asm/setup.h31
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h1
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h1
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S (renamed from arch/powerpc/kernel/cpu_setup_power7.S)32
-rw-r--r--arch/powerpc/kernel/cputable.c38
-rw-r--r--arch/powerpc/kernel/entry_64.S2
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S28
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c11
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S308
-rw-r--r--arch/powerpc/kernel/head_64.S6
-rw-r--r--arch/powerpc/kernel/idle.c3
-rw-r--r--arch/powerpc/kernel/iommu.c16
-rw-r--r--arch/powerpc/kernel/kvm.c2
-rw-r--r--arch/powerpc/kernel/legacy_serial.c2
-rw-r--r--arch/powerpc/kernel/machine_kexec.c14
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c8
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kernel/pci_32.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kernel/prom.c7
-rw-r--r--arch/powerpc/kernel/prom_init.c11
-rw-r--r--arch/powerpc/kernel/ptrace.c90
-rw-r--r--arch/powerpc/kernel/rtas.c1
-rw-r--r--arch/powerpc/kernel/rtas_flash.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/kernel/signal.c4
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/kernel/smp.c46
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c17
-rw-r--r--arch/powerpc/kernel/sysfs.c4
-rw-r--r--arch/powerpc/kernel/time.c20
-rw-r--r--arch/powerpc/kernel/udbg.c23
-rw-r--r--arch/powerpc/kernel/uprobes.c6
-rw-r--r--arch/powerpc/kvm/44x.c1
-rw-r--r--arch/powerpc/kvm/44x_emulate.c112
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s.c125
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c474
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c16
-rw-r--r--arch/powerpc/kvm/book3s_exports.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv.c655
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c143
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S142
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr.c294
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S18
-rw-r--r--arch/powerpc/kvm/booke.c346
-rw-r--r--arch/powerpc/kvm/booke.h1
-rw-r--r--arch/powerpc/kvm/booke_emulate.c36
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S145
-rw-r--r--arch/powerpc/kvm/e500.h11
-rw-r--r--arch/powerpc/kvm/e500_emulate.c14
-rw-r--r--arch/powerpc/kvm/e500_tlb.c132
-rw-r--r--arch/powerpc/kvm/emulate.c221
-rw-r--r--arch/powerpc/kvm/powerpc.c187
-rw-r--r--arch/powerpc/kvm/trace.h200
-rw-r--r--arch/powerpc/mm/fault.c27
-rw-r--r--arch/powerpc/mm/numa.c12
-rw-r--r--arch/powerpc/mm/slice.c2
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S15
-rw-r--r--arch/powerpc/net/bpf_jit.h6
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c25
-rw-r--r--arch/powerpc/perf/power7-pmu.c17
-rw-r--r--arch/powerpc/platforms/512x/Kconfig1
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.c3
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h11
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c25
-rw-r--r--arch/powerpc/platforms/52xx/lite5200.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc5200_simple.c1
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c16
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads-pci-pic.c8
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_rdk.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_rdb.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c2
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c8
-rw-r--r--arch/powerpc/platforms/85xx/smp.c49
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c2
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c2
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_32.c5
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c2
-rw-r--r--arch/powerpc/platforms/powermac/pic.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c25
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c6
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c34
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c1
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c8
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c60
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c10
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h31
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c4
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c122
-rw-r--r--arch/powerpc/platforms/pseries/setup.c77
-rw-r--r--arch/powerpc/platforms/pseries/smp.c1
-rw-r--r--arch/powerpc/sysdev/fsl_gtm.c2
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c9
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c39
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c2
-rw-r--r--arch/powerpc/sysdev/pmi.c13
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_ic.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_ic.h2
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_io.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc_fast.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc_slow.c2
-rw-r--r--arch/powerpc/sysdev/qe_lib/usb.c2
-rw-r--r--arch/powerpc/sysdev/scom.c2
-rw-r--r--arch/powerpc/xmon/Makefile2
-rw-r--r--arch/powerpc/xmon/nonstdio.c53
-rw-r--r--arch/powerpc/xmon/nonstdio.h6
-rw-r--r--arch/powerpc/xmon/start.c34
-rw-r--r--arch/powerpc/xmon/xmon.c26
184 files changed, 4658 insertions, 1632 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 159e94f4b22..b639852116f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -181,7 +181,7 @@ $(BOOT_TARGETS2): vmlinux
bootwrapper_install:
$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-%.dtb:
+%.dtb: scripts
$(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
define archhelp
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
new file mode 100644
index 00000000000..877a28cb77e
--- /dev/null
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -0,0 +1,144 @@
+/*
+ * a3m071 board Device Tree Source
+ *
+ * Copyright 2012 Stefan Roese <sr@denx.de>
+ *
+ * Copyright (C) 2011 DENX Software Engineering GmbH
+ * Heiko Schocher <hs@denx.de>
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz <m8@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+/include/ "mpc5200b.dtsi"
+
+/ {
+ model = "anonymous,a3m071";
+ compatible = "anonymous,a3m071";
+
+ soc5200@f0000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc5200b-immr";
+ ranges = <0 0xf0000000 0x0000c000>;
+ reg = <0xf0000000 0x00000100>;
+ bus-frequency = <0>; /* From boot loader */
+ system-frequency = <0>; /* From boot loader */
+
+ timer@600 {
+ fsl,has-wdt;
+ };
+
+ spi@f00 {
+ status = "disabled";
+ };
+
+ usb: usb@1000 {
+ status = "disabled";
+ };
+
+ psc@2000 {
+ compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+ reg = <0x2000 0x100>;
+ interrupts = <2 1 0>;
+ };
+
+ psc@2200 {
+ status = "disabled";
+ };
+
+ psc@2400 {
+ status = "disabled";
+ };
+
+ psc@2600 {
+ status = "disabled";
+ };
+
+ psc@2800 {
+ status = "disabled";
+ };
+
+ psc@2c00 { // PSC6
+ compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart";
+ reg = <0x2c00 0x100>;
+ interrupts = <2 4 0>;
+ };
+
+ ethernet@3000 {
+ phy-handle = <&phy0>;
+ };
+
+ mdio@3000 {
+ phy0: ethernet-phy@3 {
+ reg = <0x03>;
+ };
+ };
+
+ ata@3a00 {
+ status = "disabled";
+ };
+
+ i2c@3d00 {
+ status = "disabled";
+ };
+
+ i2c@3d40 {
+ status = "disabled";
+ };
+ };
+
+ localbus {
+ compatible = "fsl,mpc5200b-lpb","simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0xfc000000 0x02000000
+ 3 0 0xe9000000 0x00080000
+ 5 0 0xe8000000 0x00010000>;
+
+ flash@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0 0x0 0x02000000>;
+ compatible = "cfi-flash";
+ bank-width = <2>;
+ partition@0x0 {
+ label = "u-boot";
+ reg = <0x00000000 0x00040000>;
+ read-only;
+ };
+ partition@0x00040000 {
+ label = "env";
+ reg = <0x00040000 0x00020000>;
+ };
+ partition@0x00060000 {
+ label = "dtb";
+ reg = <0x00060000 0x00020000>;
+ };
+ partition@0x00080000 {
+ label = "kernel";
+ reg = <0x00080000 0x00500000>;
+ };
+ partition@0x00580000 {
+ label = "root";
+ reg = <0x00580000 0x00A80000>;
+ };
+ };
+
+ fpga@3,0 {
+ compatible = "anonymous,a3m071-fpga";
+ reg = <3 0x0 0x00080000
+ 5 0x0 0x00010000>;
+ interrupts = <0 0 3>; /* level low */
+ };
+ };
+
+ pci@f0000d00 {
+ status = "disabled";
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 64b6abea846..5d7205b7bb0 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
@@ -354,4 +354,5 @@
/include/ "qoriq-sata2-0.dtsi"
/include/ "qoriq-sata2-1.dtsi"
/include/ "qoriq-sec4.2-0.dtsi"
+/include/ "qoriq-raid1.0-0.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
index 0a198b0a77e..8df47fc45ab 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
@@ -73,6 +73,12 @@
rtic_c = &rtic_c;
rtic_d = &rtic_d;
sec_mon = &sec_mon;
+
+ raideng = &raideng;
+ raideng_jr0 = &raideng_jr0;
+ raideng_jr1 = &raideng_jr1;
+ raideng_jr2 = &raideng_jr2;
+ raideng_jr3 = &raideng_jr3;
};
cpus {
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi
new file mode 100644
index 00000000000..8d2e8aa6cf8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-raid1.0-0.dtsi
@@ -0,0 +1,85 @@
+/*
+ * QorIQ RAID 1.0 device tree stub [ controller @ offset 0x320000 ]
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+raideng: raideng@320000 {
+ compatible = "fsl,raideng-v1.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x320000 0x10000>;
+ ranges = <0 0x320000 0x10000>;
+
+ raideng_jq0@1000 {
+ compatible = "fsl,raideng-v1.0-job-queue";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x1000 0x1000>;
+ ranges = <0x0 0x1000 0x1000>;
+
+ raideng_jr0: jr@0 {
+ compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+ reg = <0x0 0x400>;
+ interrupts = <139 2 0 0>;
+ interrupt-parent = <&mpic>;
+ };
+
+ raideng_jr1: jr@400 {
+ compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+ reg = <0x400 0x400>;
+ interrupts = <140 2 0 0>;
+ interrupt-parent = <&mpic>;
+ };
+ };
+
+ raideng_jq1@2000 {
+ compatible = "fsl,raideng-v1.0-job-queue";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x2000 0x1000>;
+ ranges = <0x0 0x2000 0x1000>;
+
+ raideng_jr2: jr@0 {
+ compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+ reg = <0x0 0x400>;
+ interrupts = <141 2 0 0>;
+ interrupt-parent = <&mpic>;
+ };
+
+ raideng_jr3: jr@400 {
+ compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-lp-ring";
+ reg = <0x400 0x400>;
+ interrupts = <142 2 0 0>;
+ interrupt-parent = <&mpic>;
+ };
+ };
+};
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 1f710a32ffa..5b8e1e50827 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -2,7 +2,7 @@ CONFIG_PPC64=y
CONFIG_ALTIVEC=y
CONFIG_VSX=y
CONFIG_SMP=y
-CONFIG_NR_CPUS=1024
+CONFIG_NR_CPUS=2048
CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a4fe15e33c6..650757c300d 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,4 +1,4 @@
-
generic-y += clkdev.h
generic-y += rwsem.h
+generic-y += trace_clock.h
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index dc2cf9c6d9e..ef918a2328b 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -52,8 +52,6 @@
#define smp_mb__before_clear_bit() smp_mb()
#define smp_mb__after_clear_bit() smp_mb()
-#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
/* Macro for generating the ***_bits() functions */
@@ -83,22 +81,22 @@ DEFINE_BITOP(change_bits, xor, "", "")
static __inline__ void set_bit(int nr, volatile unsigned long *addr)
{
- set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+ set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
{
- clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+ clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
{
- clear_bits_unlock(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+ clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
}
static __inline__ void change_bit(int nr, volatile unsigned long *addr)
{
- change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
+ change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
/* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
@@ -136,26 +134,26 @@ DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
static __inline__ int test_and_set_bit(unsigned long nr,
volatile unsigned long *addr)
{
- return test_and_set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+ return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
static __inline__ int test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *addr)
{
- return test_and_set_bits_lock(BITOP_MASK(nr),
- addr + BITOP_WORD(nr)) != 0;
+ return test_and_set_bits_lock(BIT_MASK(nr),
+ addr + BIT_WORD(nr)) != 0;
}
static __inline__ int test_and_clear_bit(unsigned long nr,
volatile unsigned long *addr)
{
- return test_and_clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+ return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
static __inline__ int test_and_change_bit(unsigned long nr,
volatile unsigned long *addr)
{
- return test_and_change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr)) != 0;
+ return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
#include <asm-generic/bitops/non-atomic.h>
@@ -280,61 +278,8 @@ unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/find.h>
/* Little-endian versions */
+#include <asm-generic/bitops/le.h>
-static __inline__ int test_bit_le(unsigned long nr,
- __const__ void *addr)
-{
- __const__ unsigned char *tmp = (__const__ unsigned char *) addr;
- return (tmp[nr >> 3] >> (nr & 7)) & 1;
-}
-
-static inline void set_bit_le(int nr, void *addr)
-{
- set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void clear_bit_le(int nr, void *addr)
-{
- clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void __set_bit_le(int nr, void *addr)
-{
- __set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline void __clear_bit_le(int nr, void *addr)
-{
- __clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int test_and_set_bit_le(int nr, void *addr)
-{
- return test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int test_and_clear_bit_le(int nr, void *addr)
-{
- return test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int __test_and_set_bit_le(int nr, void *addr)
-{
- return __test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-static inline int __test_and_clear_bit_le(int nr, void *addr)
-{
- return __test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
-}
-
-#define find_first_zero_bit_le(addr, size) \
- find_next_zero_bit_le((addr), (size), 0)
-unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset);
-
-unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset);
/* Bitmap functions for the ext2 filesystem */
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 21a0687b8c4..76f81bd64f1 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -401,6 +401,14 @@ extern const char *powerpc_base_platform;
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
+#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -421,8 +429,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \
- CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
- CPU_FTR_VSX)
+ CPU_FTRS_POWER7 | CPU_FTRS_POWER8 | CPU_FTRS_CELL | \
+ CPU_FTRS_PA6T | CPU_FTR_VSX)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 487d46ff68a..483733bd06d 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -228,6 +228,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+
#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 154c067761b..607e4eeeb69 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2009 Freescale Semicondutor, Inc.
+ * Copyright 2009 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 78160874809..e27e9ad6818 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -172,6 +172,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ debug_dma_mapping_error(dev, dma_addr);
if (dma_ops->mapping_error)
return dma_ops->mapping_error(dev, dma_addr);
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index bf2c06c3387..d3d634274d2 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -50,64 +50,13 @@
#ifndef _EPAPR_HCALLS_H
#define _EPAPR_HCALLS_H
+#include <uapi/asm/epapr_hcalls.h>
+
+#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/errno.h>
#include <asm/byteorder.h>
-#define EV_BYTE_CHANNEL_SEND 1
-#define EV_BYTE_CHANNEL_RECEIVE 2
-#define EV_BYTE_CHANNEL_POLL 3
-#define EV_INT_SET_CONFIG 4
-#define EV_INT_GET_CONFIG 5
-#define EV_INT_SET_MASK 6
-#define EV_INT_GET_MASK 7
-#define EV_INT_IACK 9
-#define EV_INT_EOI 10
-#define EV_INT_SEND_IPI 11
-#define EV_INT_SET_TASK_PRIORITY 12
-#define EV_INT_GET_TASK_PRIORITY 13
-#define EV_DOORBELL_SEND 14
-#define EV_MSGSND 15
-#define EV_IDLE 16
-
-/* vendor ID: epapr */
-#define EV_LOCAL_VENDOR_ID 0 /* for private use */
-#define EV_EPAPR_VENDOR_ID 1
-#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */
-#define EV_IBM_VENDOR_ID 3 /* IBM */
-#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */
-#define EV_ENEA_VENDOR_ID 5 /* Enea */
-#define EV_WR_VENDOR_ID 6 /* Wind River Systems */
-#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */
-#define EV_KVM_VENDOR_ID 42 /* KVM */
-
-/* The max number of bytes that a byte channel can send or receive per call */
-#define EV_BYTE_CHANNEL_MAX_BYTES 16
-
-
-#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
-#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
-
-/* epapr error codes */
-#define EV_EPERM 1 /* Operation not permitted */
-#define EV_ENOENT 2 /* Entry Not Found */
-#define EV_EIO 3 /* I/O error occured */
-#define EV_EAGAIN 4 /* The operation had insufficient
- * resources to complete and should be
- * retried
- */
-#define EV_ENOMEM 5 /* There was insufficient memory to
- * complete the operation */
-#define EV_EFAULT 6 /* Bad guest address */
-#define EV_ENODEV 7 /* No such device */
-#define EV_EINVAL 8 /* An argument supplied to the hcall
- was out of range or invalid */
-#define EV_INTERNAL 9 /* An internal error occured */
-#define EV_CONFIG 10 /* A configuration error was detected */
-#define EV_INVALID_STATE 11 /* The object is in an invalid state */
-#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
-#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */
-
/*
* Hypercall register clobber list
*
@@ -193,7 +142,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt,
r5 = priority;
r6 = destination;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -222,7 +171,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -252,7 +201,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt,
r3 = interrupt;
r4 = mask;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -277,7 +226,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -305,7 +254,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt)
r11 = EV_HCALL_TOKEN(EV_INT_EOI);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -344,7 +293,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle,
r7 = be32_to_cpu(p[2]);
r8 = be32_to_cpu(p[3]);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3),
"+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
: : EV_HCALL_CLOBBERS6
@@ -383,7 +332,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle,
r3 = handle;
r4 = *count;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4),
"=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
: : EV_HCALL_CLOBBERS6
@@ -421,7 +370,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -454,7 +403,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_INT_IACK);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -478,7 +427,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle)
r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -498,12 +447,12 @@ static inline unsigned int ev_idle(void)
r11 = EV_HCALL_TOKEN(EV_IDLE);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "=r" (r3)
: : EV_HCALL_CLOBBERS1
);
return r3;
}
-
-#endif
+#endif /* !__ASSEMBLY__ */
+#endif /* _EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index a43c1473915..ad708dda3ba 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -48,6 +48,35 @@
#define EX_LR 72
#define EX_CFAR 80
+#ifdef CONFIG_RELOCATABLE
+#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
+ ld r12,PACAKBASE(r13); /* get high part of &label */ \
+ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
+ LOAD_HANDLER(r12,label); \
+ mtlr r12; \
+ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
+ li r10,MSR_RI; \
+ mtmsrd r10,1; /* Set RI (EE=0) */ \
+ blr;
+#else
+/* If not relocatable, we can jump directly -- and save messing with LR */
+#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
+ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
+ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
+ li r10,MSR_RI; \
+ mtmsrd r10,1; /* Set RI (EE=0) */ \
+ b label;
+#endif
+
+/*
+ * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on
+ * so no need to rfid. Save lr in case we're CONFIG_RELOCATABLE, in which
+ * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr.
+ */
+#define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \
+ EXCEPTION_PROLOG_1(area, extra, vec); \
+ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
+
/*
* We're short on space and time in the exception prolog, so we can't
* use the normal SET_REG_IMMEDIATE macro. Normally we just need the
@@ -55,12 +84,29 @@
* word.
*/
#define LOAD_HANDLER(reg, label) \
- addi reg,reg,(label)-_stext; /* virt addr of handler ... */
+ /* Handlers must be within 64K of kbase, which must be 64k aligned */ \
+ ori reg,reg,(label)-_stext; /* virt addr of handler ... */
/* Exception register prefixes */
#define EXC_HV H
#define EXC_STD
+#if defined(CONFIG_RELOCATABLE)
+/*
+ * If we support interrupts with relocation on AND we're a relocatable
+ * kernel, we need to use LR to get to the 2nd level handler. So, save/restore
+ * it when required.
+ */
+#define SAVE_LR(reg, area) mflr reg ; std reg,area+EX_LR(r13)
+#define GET_LR(reg, area) ld reg,area+EX_LR(r13)
+#define RESTORE_LR(reg, area) ld reg,area+EX_LR(r13) ; mtlr reg
+#else
+/* ...else LR is unused and in register. */
+#define SAVE_LR(reg, area)
+#define GET_LR(reg, area) mflr reg
+#define RESTORE_LR(reg, area)
+#endif
+
#define __EXCEPTION_PROLOG_1(area, extra, vec) \
GET_PACA(r13); \
std r9,area+EX_R9(r13); /* save r9 - r12 */ \
@@ -69,6 +115,7 @@
mfspr r10,SPRN_CFAR; \
std r10,area+EX_CFAR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
+ SAVE_LR(r10, area); \
mfcr r9; \
extra(vec); \
std r11,area+EX_R11(r13); \
@@ -169,6 +216,7 @@ do_kvm_##n: \
sth r1,PACA_TRAP_SAVE(r13); \
std r3,area+EX_R3(r13); \
addi r3,r13,area; /* r3 -> where regs are saved*/ \
+ RESTORE_LR(r1, area); \
b bad_stack; \
3: std r9,_CCR(r1); /* save CR in stackframe */ \
std r11,_NIP(r1); /* save SRR0 in stackframe */ \
@@ -194,8 +242,8 @@ do_kvm_##n: \
ld r10,area+EX_CFAR(r13); \
std r10,ORIG_GPR3(r1); \
END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
+ GET_LR(r9,area); /* Get LR, later save to stack */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- mflr r9; /* save LR in stackframe */ \
std r9,_LINK(r1); \
mfctr r10; /* save CTR in stackframe */ \
std r10,_CTR(r1); \
@@ -232,6 +280,26 @@ label##_hv: \
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
EXC_HV, KVMTEST, vec)
+#define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \
+ . = loc; \
+ .globl label##_relon_pSeries; \
+label##_relon_pSeries: \
+ HMT_MEDIUM; \
+ /* No guest interrupts come through here */ \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
+ EXC_STD, KVMTEST_PR, vec)
+
+#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
+ . = loc; \
+ .globl label##_relon_hv; \
+label##_relon_hv: \
+ HMT_MEDIUM; \
+ /* No guest interrupts come through here */ \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
+ EXC_HV, KVMTEST, vec)
+
/* This associate vector numbers with bits in paca->irq_happened */
#define SOFTEN_VALUE_0x500 PACA_IRQ_EE
#define SOFTEN_VALUE_0x502 PACA_IRQ_EE
@@ -257,6 +325,9 @@ label##_hv: \
KVMTEST(vec); \
_SOFTEN_TEST(EXC_STD, vec)
+#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
+#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
+
#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
HMT_MEDIUM; \
SET_SCRATCH0(r13); /* save r13 */ \
@@ -279,6 +350,28 @@ label##_hv: \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
EXC_HV, SOFTEN_TEST_HV)
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+ HMT_MEDIUM; \
+ SET_SCRATCH0(r13); /* save r13 */ \
+ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h);
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
+ . = loc; \
+ .globl label##_relon_pSeries; \
+label##_relon_pSeries: \
+ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
+ EXC_STD, SOFTEN_NOTEST_PR)
+
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
+ . = loc; \
+ .globl label##_relon_hv; \
+label##_relon_hv: \
+ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
+ EXC_HV, SOFTEN_NOTEST_HV)
+
/*
* Our exception common code can be passed various "additions"
* to specify the behaviour of interrupts, whether to kick the
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index ad0b751b0d7..973cc3be011 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -49,6 +49,7 @@
#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000)
#define FW_FEATURE_OPAL ASM_CONST(0x0000000010000000)
#define FW_FEATURE_OPALv2 ASM_CONST(0x0000000020000000)
+#define FW_FEATURE_SET_MODE ASM_CONST(0x0000000040000000)
#ifndef __ASSEMBLY__
@@ -62,7 +63,8 @@ enum {
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
- FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
+ FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
+ FW_FEATURE_SET_MODE,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/fsl_gtm.h b/arch/powerpc/include/asm/fsl_gtm.h
index 8e8c9b5032d..3b05808f9ca 100644
--- a/arch/powerpc/include/asm/fsl_gtm.h
+++ b/arch/powerpc/include/asm/fsl_gtm.h
@@ -1,7 +1,7 @@
/*
* Freescale General-purpose Timers Module
*
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright 2006 Freescale Semiconductor, Inc.
* Shlomi Gridish <gridish@freescale.com>
* Jerry Huang <Chang-Ming.Huang@freescale.com>
* Copyright (c) MontaVista Software, Inc. 2008.
diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
index dd5ba2c2277..77ced0b3d81 100644
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ b/arch/powerpc/include/asm/fsl_guts.h
@@ -71,7 +71,9 @@ struct ccsr_guts {
u8 res0c4[0x224 - 0xc4];
__be32 iodelay1; /* 0x.0224 - IO delay control register 1 */
__be32 iodelay2; /* 0x.0228 - IO delay control register 2 */
- u8 res22c[0x800 - 0x22c];
+ u8 res22c[0x604 - 0x22c];
+ __be32 pamubypenr; /* 0x.604 - PAMU bypass enable register */
+ u8 res608[0x800 - 0x608];
__be32 clkdvdr; /* 0x.0800 - Clock Divide Register */
u8 res804[0x900 - 0x804];
__be32 ircr; /* 0x.0900 - Infrared Control Register */
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h
index 922d9b5fe3d..3abb58394da 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
r3 = vcpu_mask;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle,
r9 = (uint32_t)propvalue_addr;
r10 = *propvalue_len;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
"+r" (r8), "+r" (r9), "+r" (r10)
@@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle,
r9 = (uint32_t)propvalue_addr;
r10 = propvalue_len;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
"+r" (r8), "+r" (r9), "+r" (r10)
@@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition)
r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition,
r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition,
r4 = entry_point;
r5 = load;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition)
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source,
#endif
r7 = count;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7)
: : EV_HCALL_CLOBBERS5
@@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn)
r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE);
r3 = liodn;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn)
r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE);
r3 = liodn;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt,
r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void)
r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "=r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize,
r6 = addr_lo;
r7 = peek;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6),
"+r" (r7)
: : EV_HCALL_CLOBBERS5
@@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle,
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu)
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu)
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle)
r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle)
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 7a867065db7..0975e5c0bb1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -267,7 +267,8 @@
#define H_RANDOM 0x300
#define H_COP 0x304
#define H_GET_MPP_X 0x314
-#define MAX_HCALL_OPCODE H_GET_MPP_X
+#define H_SET_MODE 0x31C
+#define MAX_HCALL_OPCODE H_SET_MODE
#ifndef __ASSEMBLY__
@@ -355,6 +356,26 @@ struct hvcall_mpp_x_data {
int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+static inline unsigned int get_longbusy_msecs(int longbusy_rc)
+{
+ switch (longbusy_rc) {
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ return 1;
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ return 10;
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ return 100;
+ case H_LONG_BUSY_ORDER_1_SEC:
+ return 1000;
+ case H_LONG_BUSY_ORDER_10_SEC:
+ return 10000;
+ case H_LONG_BUSY_ORDER_100_SEC:
+ return 100000;
+ default:
+ return 1;
+ }
+}
+
#ifdef CONFIG_PPC_PSERIES
extern int CMO_PrPSP;
extern int CMO_SecPSP;
diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h
index 61e8490786b..bedbff89142 100644
--- a/arch/powerpc/include/asm/immap_qe.h
+++ b/arch/powerpc/include/asm/immap_qe.h
@@ -3,7 +3,7 @@
* The Internal Memory Map for devices with QE on them. This
* is the superset of all QE devices (8360, etc.).
- * Copyright (C) 2006. Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006. Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 76fdcfef088..aabcdba8f6b 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -118,6 +118,7 @@
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
+#define RESUME_FLAG_ARCH1 (1<<2)
#define RESUME_GUEST 0
#define RESUME_GUEST_NV RESUME_FLAG_NV
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 7aefdb3e1ce..5a56e1c5f85 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
+ u64 purr_offset;
+ u64 spurr_offset;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
u32 vsid_next;
@@ -157,10 +159,14 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
-extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
+extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot, unsigned long *map);
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0dd1d86d3e3..38bec1dc992 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */
#define HPTE_V_HVLOCK 0x40UL
#define HPTE_V_ABSENT 0x20UL
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED (1ul << 62)
+
+/* These bits are reserved in the guest view of the HPTE */
+#define HPTE_GR_RESERVED HPTE_GR_MODIFIED
+
static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
{
unsigned long tmp, old;
@@ -60,7 +69,7 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
" ori %0,%0,%4\n"
" stdcx. %0,0,%2\n"
" beq+ 2f\n"
- " li %1,%3\n"
+ " mr %1,%3\n"
"2: isync"
: "=&r" (tmp), "=&r" (old)
: "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
@@ -237,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
}
+/*
+ * This works for 4k, 64k and 16M pages on POWER7,
+ * and 4k and 16M pages on PPC970.
+ */
+static inline unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
+static inline int is_vrma_hpte(unsigned long hpte_v)
+{
+ return (hpte_v & ~0xffffffUL) ==
+ (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index 30a600fa1b6..3a79f532571 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -17,6 +17,7 @@
* there are no exceptions for which we fall through directly to
* the normal host handler.
*
+ * 32-bit host
* Expected inputs (normal exceptions):
* SCRATCH0 = saved r10
* r10 = thread struct
@@ -33,14 +34,38 @@
* *(r8 + GPR9) = saved r9
* *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
* *(r8 + GPR11) = saved r11
+ *
+ * 64-bit host
+ * Expected inputs (GEN/GDBELL/DBG/MC exception types):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * SPRN_SPRG_##type##_SCRATCH = saved r13
+ *
+ * Expected inputs (CRIT exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * *(r13 + PACA_EX##type + EX_R13) = saved r13
+ *
+ * Expected inputs (TLB exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
+ * SPRN_SPRG_GEN_SCRATCH = saved r13
+ *
+ * Only the bolted version of TLB miss exception handlers is supported now.
*/
.macro DO_KVM intno srr1
#ifdef CONFIG_KVM_BOOKE_HV
BEGIN_FTR_SECTION
mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
- bf 3, kvmppc_resume_\intno\()_\srr1
+ bf 3, 1975f
b kvmppc_handler_\intno\()_\srr1
-kvmppc_resume_\intno\()_\srr1:
+1975:
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
.endm
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28e8f5e5c63..ca9bf459db6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,7 +46,7 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#if !defined(CONFIG_KVM_440)
#include <linux/mmu_notifier.h>
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -204,7 +204,7 @@ struct revmap_entry {
};
/*
- * We use the top bit of each memslot->rmap entry as a lock bit,
+ * We use the top bit of each memslot->arch.rmap entry as a lock bit,
* and bit 32 as a present flag. The bottom 32 bits are the
* index in the guest HPT of a HPTE that points to the page.
*/
@@ -215,14 +215,17 @@ struct revmap_entry {
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
-/* Low-order bits in kvm->arch.slot_phys[][] */
+/* Low-order bits in memslot->arch.slot_phys[] */
#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch_memory_slot {
+#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long *rmap;
+ unsigned long *slot_phys;
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
};
struct kvm_arch {
@@ -243,12 +246,12 @@ struct kvm_arch {
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
+ u32 online_vcores;
unsigned long hpt_npte;
unsigned long hpt_mask;
+ atomic_t hpte_mod_interest;
spinlock_t slot_phys_lock;
- unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
- int slot_npages[KVM_MEM_SLOTS_NUM];
- unsigned short last_vcpu[NR_CPUS];
+ cpumask_t need_tlb_flush;
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
@@ -273,6 +276,7 @@ struct kvmppc_vcore {
int nap_count;
int napping_threads;
u16 pcpu;
+ u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct list_head runnable_threads;
@@ -288,9 +292,10 @@ struct kvmppc_vcore {
/* Values for vcore_state */
#define VCORE_INACTIVE 0
-#define VCORE_RUNNING 1
-#define VCORE_EXITING 2
-#define VCORE_SLEEPING 3
+#define VCORE_SLEEPING 1
+#define VCORE_STARTING 2
+#define VCORE_RUNNING 3
+#define VCORE_EXITING 4
/*
* Struct used to manage memory for a virtual processor area
@@ -346,6 +351,27 @@ struct kvmppc_slb {
bool class : 1;
};
+# ifdef CONFIG_PPC_FSL_BOOK3E
+#define KVMPPC_BOOKE_IAC_NUM 2
+#define KVMPPC_BOOKE_DAC_NUM 2
+# else
+#define KVMPPC_BOOKE_IAC_NUM 4
+#define KVMPPC_BOOKE_DAC_NUM 2
+# endif
+#define KVMPPC_BOOKE_MAX_IAC 4
+#define KVMPPC_BOOKE_MAX_DAC 2
+
+struct kvmppc_booke_debug_reg {
+ u32 dbcr0;
+ u32 dbcr1;
+ u32 dbcr2;
+#ifdef CONFIG_KVM_E500MC
+ u32 dbcr4;
+#endif
+ u64 iac[KVMPPC_BOOKE_MAX_IAC];
+ u64 dac[KVMPPC_BOOKE_MAX_DAC];
+};
+
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@@ -380,13 +406,18 @@ struct kvm_vcpu_arch {
u32 host_mas4;
u32 host_mas6;
u32 shadow_epcr;
- u32 epcr;
u32 shadow_msrp;
u32 eplc;
u32 epsc;
u32 oldpir;
#endif
+#if defined(CONFIG_BOOKE)
+#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
+ u32 epcr;
+#endif
+#endif
+
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
@@ -440,8 +471,6 @@ struct kvm_vcpu_arch {
u32 ccr0;
u32 ccr1;
- u32 dbcr0;
- u32 dbcr1;
u32 dbsr;
u64 mmcr[3];
@@ -471,9 +500,12 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ spinlock_t wdt_lock;
+ struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
+ struct kvmppc_booke_debug_reg dbg_reg;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
@@ -486,6 +518,7 @@ struct kvm_vcpu_arch {
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
+ u8 watchdog_enabled;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
@@ -497,7 +530,6 @@ struct kvm_vcpu_arch {
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
- u16 last_cpu;
u8 ceded;
u8 prodded;
u32 last_inst;
@@ -534,13 +566,17 @@ struct kvm_vcpu_arch {
unsigned long dtl_index;
u64 stolen_logged;
struct kvmppc_vpa slb_shadow;
+
+ spinlock_t tbacct_lock;
+ u64 busy_stolen;
+ u64 busy_preempt;
#endif
};
/* Values for vcpu->arch.state */
-#define KVMPPC_VCPU_STOPPED 0
-#define KVMPPC_VCPU_BUSY_IN_HOST 1
-#define KVMPPC_VCPU_RUNNABLE 2
+#define KVMPPC_VCPU_NOTREADY 0
+#define KVMPPC_VCPU_RUNNABLE 1
+#define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */
#define KVM_MMIO_REG_MASK 0x001f
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 9365860fb7f..2b119654b4c 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -21,7 +21,6 @@
#include <uapi/asm/kvm_para.h>
-
#ifdef CONFIG_KVM_GUEST
#include <linux/of.h>
@@ -55,7 +54,7 @@ static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
- return HC_EV_UNIMPLEMENTED;
+ return EV_UNIMPLEMENTED;
}
#endif
@@ -66,7 +65,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
unsigned long out[8];
unsigned long r;
- r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
*r2 = out[0];
return r;
@@ -77,7 +76,7 @@ static inline long kvm_hypercall0(unsigned int nr)
unsigned long in[8];
unsigned long out[8];
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
@@ -86,7 +85,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
unsigned long out[8];
in[0] = p1;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
@@ -97,7 +96,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
in[0] = p1;
in[1] = p2;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
@@ -109,7 +108,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
in[0] = p1;
in[1] = p2;
in[2] = p3;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
@@ -123,7 +122,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
in[1] = p2;
in[2] = p3;
in[3] = p4;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e006f0bdea9..572aa753061 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
+#include <linux/bug.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/kvm_book3s.h>
#else
@@ -68,6 +69,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
extern void kvmppc_decrementer_func(unsigned long data);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
@@ -104,6 +107,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
+extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
@@ -111,6 +115,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
ulong val);
extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
ulong *val);
+extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
@@ -139,16 +144,28 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
+extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont);
+extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
+extern void kvmppc_core_flush_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
extern int kvmppc_bookehv_init(void);
extern void kvmppc_bookehv_exit(void);
+extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+
+extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@@ -182,6 +199,41 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
return r;
}
+union kvmppc_one_reg {
+ u32 wval;
+ u64 dval;
+ vector128 vval;
+ u64 vsxval[2];
+ struct {
+ u64 addr;
+ u64 length;
+ } vpaval;
+};
+
+#define one_reg_size(id) \
+ (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+#define get_reg_val(id, reg) ({ \
+ union kvmppc_one_reg __u; \
+ switch (one_reg_size(id)) { \
+ case 4: __u.wval = (reg); break; \
+ case 8: __u.dval = (reg); break; \
+ default: BUG(); \
+ } \
+ __u; \
+})
+
+
+#define set_reg_val(id, val) ({ \
+ u64 __v; \
+ switch (one_reg_size(id)) { \
+ case 4: __v = (val).wval; break; \
+ case 8: __v = (val).dval; break; \
+ default: BUG(); \
+ } \
+ __v; \
+})
+
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
@@ -190,6 +242,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
@@ -230,5 +284,36 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
}
}
+/* Please call after prepare_to_enter. This function puts the lazy ee state
+ back to normal mode, without actually enabling interrupts. */
+static inline void kvmppc_lazy_ee_enable(void)
+{
+#ifdef CONFIG_PPC64
+ /* Only need to enable IRQs by hard enabling them after this */
+ local_paca->irq_happened = 0;
+ local_paca->soft_enabled = 1;
+#endif
+}
+
+static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ ulong ea;
+ ulong msr_64bit = 0;
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+#if defined(CONFIG_PPC_BOOK3E_64)
+ msr_64bit = MSR_CM;
+#elif defined(CONFIG_PPC_BOOK3S_64)
+ msr_64bit = MSR_SF;
+#endif
+
+ if (!(vcpu->arch.shared->msr & msr_64bit))
+ ea = (uint32_t)ea;
+
+ return ea;
+}
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c4231973edd..19d9d96eb8d 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -166,9 +166,6 @@ struct machdep_calls {
unsigned long size,
pgprot_t vma_prot);
- /* Idle loop for this platform, leave empty for default idle loop */
- void (*idle_loop)(void);
-
/*
* Function for waiting for work with reduced power in idle loop;
* called with interrupts disabled.
@@ -320,28 +317,28 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal)
ppc_md.log_error(buf, err_type, fatal);
}
-#define __define_machine_initcall(mach,level,fn,id) \
+#define __define_machine_initcall(mach, fn, id) \
static int __init __machine_initcall_##mach##_##fn(void) { \
if (machine_is(mach)) return fn(); \
return 0; \
} \
- __define_initcall(level,__machine_initcall_##mach##_##fn,id);
-
-#define machine_core_initcall(mach,fn) __define_machine_initcall(mach,"1",fn,1)
-#define machine_core_initcall_sync(mach,fn) __define_machine_initcall(mach,"1s",fn,1s)
-#define machine_postcore_initcall(mach,fn) __define_machine_initcall(mach,"2",fn,2)
-#define machine_postcore_initcall_sync(mach,fn) __define_machine_initcall(mach,"2s",fn,2s)
-#define machine_arch_initcall(mach,fn) __define_machine_initcall(mach,"3",fn,3)
-#define machine_arch_initcall_sync(mach,fn) __define_machine_initcall(mach,"3s",fn,3s)
-#define machine_subsys_initcall(mach,fn) __define_machine_initcall(mach,"4",fn,4)
-#define machine_subsys_initcall_sync(mach,fn) __define_machine_initcall(mach,"4s",fn,4s)
-#define machine_fs_initcall(mach,fn) __define_machine_initcall(mach,"5",fn,5)
-#define machine_fs_initcall_sync(mach,fn) __define_machine_initcall(mach,"5s",fn,5s)
-#define machine_rootfs_initcall(mach,fn) __define_machine_initcall(mach,"rootfs",fn,rootfs)
-#define machine_device_initcall(mach,fn) __define_machine_initcall(mach,"6",fn,6)
-#define machine_device_initcall_sync(mach,fn) __define_machine_initcall(mach,"6s",fn,6s)
-#define machine_late_initcall(mach,fn) __define_machine_initcall(mach,"7",fn,7)
-#define machine_late_initcall_sync(mach,fn) __define_machine_initcall(mach,"7s",fn,7s)
+ __define_initcall(__machine_initcall_##mach##_##fn, id);
+
+#define machine_core_initcall(mach, fn) __define_machine_initcall(mach, fn, 1)
+#define machine_core_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 1s)
+#define machine_postcore_initcall(mach, fn) __define_machine_initcall(mach, fn, 2)
+#define machine_postcore_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 2s)
+#define machine_arch_initcall(mach, fn) __define_machine_initcall(mach, fn, 3)
+#define machine_arch_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 3s)
+#define machine_subsys_initcall(mach, fn) __define_machine_initcall(mach, fn, 4)
+#define machine_subsys_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 4s)
+#define machine_fs_initcall(mach, fn) __define_machine_initcall(mach, fn, 5)
+#define machine_fs_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 5s)
+#define machine_rootfs_initcall(mach, fn) __define_machine_initcall(mach, fn, rootfs)
+#define machine_device_initcall(mach, fn) __define_machine_initcall(mach, fn, 6)
+#define machine_device_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 6s)
+#define machine_late_initcall(mach, fn) __define_machine_initcall(mach, fn, 7)
+#define machine_late_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 7s)
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MACHDEP_H */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index eeabcdbc30f..99d43e0c1e4 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -59,7 +59,7 @@
#define MAS1_TSIZE_SHIFT 7
#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
-#define MAS2_EPN 0xFFFFF000
+#define MAS2_EPN (~0xFFFUL)
#define MAS2_X0 0x00000040
#define MAS2_X1 0x00000020
#define MAS2_W 0x00000010
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 9673f73eb8d..2fdb47a19ef 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -121,6 +121,16 @@ extern char initial_stab[];
#define PP_RXRX 3 /* Supervisor read, User read */
#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
+#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
+#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
+#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT 12
+
+#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
+
#ifndef __ASSEMBLY__
struct hash_pte {
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 5e38eedea21..691fd8aca93 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -101,6 +101,7 @@
#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 639dc96077a..d697b08994c 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -34,7 +34,7 @@ struct op_system_config {
unsigned long mmcra;
#ifdef CONFIG_OPROFILE_CELL
/* Register for oprofile user tool to check cell kernel profiling
- * suport.
+ * support.
*/
unsigned long cell_support;
#endif
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
deleted file mode 100644
index c07edfe98b9..00000000000
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _PPC64_PSERIES_RECONFIG_H
-#define _PPC64_PSERIES_RECONFIG_H
-#ifdef __KERNEL__
-
-#include <linux/notifier.h>
-
-/*
- * Use this API if your code needs to know about OF device nodes being
- * added or removed on pSeries systems.
- */
-
-#define PSERIES_RECONFIG_ADD 0x0001
-#define PSERIES_RECONFIG_REMOVE 0x0002
-#define PSERIES_DRCONF_MEM_ADD 0x0003
-#define PSERIES_DRCONF_MEM_REMOVE 0x0004
-#define PSERIES_UPDATE_PROPERTY 0x0005
-
-/**
- * pSeries_reconfig_notify - Notifier value structure for OFDT property updates
- *
- * @node: Device tree node which owns the property being updated
- * @property: Updated property
- */
-struct pSeries_reconfig_prop_update {
- struct device_node *node;
- struct property *property;
-};
-
-#ifdef CONFIG_PPC_PSERIES
-extern int pSeries_reconfig_notifier_register(struct notifier_block *);
-extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
-extern int pSeries_reconfig_notify(unsigned long action, void *p);
-/* Not the best place to put this, will be fixed when we move some
- * of the rtas suspend-me stuff to pseries */
-extern void pSeries_coalesce_init(void);
-#else /* !CONFIG_PPC_PSERIES */
-static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
- return 0;
-}
-static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
-static inline void pSeries_coalesce_init(void) { }
-#endif /* CONFIG_PPC_PSERIES */
-
-
-#endif /* __KERNEL__ */
-#endif /* _PPC64_PSERIES_RECONFIG_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 5f73ce63fca..51fb00a20d7 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2009 Freescale Semicondutor, Inc.
+ * Copyright 2009 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -86,6 +86,7 @@
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
#define PPC_INST_DCBZL 0x7c2007ec
+#define PPC_INST_ICBT 0x7c00002c
#define PPC_INST_ISEL 0x7c00001e
#define PPC_INST_ISEL_MASK 0xfc00003e
#define PPC_INST_LDARX 0x7c0000a8
@@ -168,9 +169,12 @@
#define PPC_INST_AND 0x7c000038
#define PPC_INST_ANDDOT 0x7c000039
#define PPC_INST_OR 0x7c000378
+#define PPC_INST_XOR 0x7c000278
#define PPC_INST_ANDI 0x70000000
#define PPC_INST_ORI 0x60000000
#define PPC_INST_ORIS 0x64000000
+#define PPC_INST_XORI 0x68000000
+#define PPC_INST_XORIS 0x6c000000
#define PPC_INST_NEG 0x7c0000d0
#define PPC_INST_BRANCH 0x48000000
#define PPC_INST_BRANCH_COND 0x40800000
@@ -198,6 +202,7 @@
#define __PPC_MB(s) (((s) & 0x1f) << 6)
#define __PPC_ME(s) (((s) & 0x1f) << 1)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
+#define __PPC_CT(t) (((t) & 0x0f) << 21)
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
@@ -260,6 +265,8 @@
__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
__PPC_RT(t) | __PPC_RB(b))
+#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \
+ __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
/* PASemi instructions */
#define LBZCIX(t,a,b) stringify_in_c(.long PPC_INST_LBZCIX | \
__PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b5c91901e38..99c92d5363e 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -58,6 +58,22 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
extern void of_instantiate_rtc(void);
+/* The of_drconf_cell struct defines the layout of the LMB array
+ * specified in the device tree property
+ * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
+ */
+struct of_drconf_cell {
+ u64 base_addr;
+ u32 drc_index;
+ u32 reserved;
+ u32 aa_index;
+ u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
/* These includes are put at the bottom because they may contain things
* that are overridden by this file. Ideally they shouldn't be included
* by this file, but there are a bunch of .c files that currently depend
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index eedf427c912..3e13e23e4fd 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -23,7 +23,7 @@
/* Note the full page bits must be in the same location as for normal
* 4k pages as the same assembly will be used to insert 64K pages
- * wether the kernel has CONFIG_PPC_64K_PAGES or not
+ * whether the kernel has CONFIG_PPC_64K_PAGES or not
*/
#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 229571a4939..32b9bfa0c9b 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
index f706164b0bd..25784cc959a 100644
--- a/arch/powerpc/include/asm/qe_ic.h
+++ b/arch/powerpc/include/asm/qe_ic.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d24c1416396..3d5c9dc8917 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -249,6 +249,8 @@
#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */
#define LPCR_RMLS_SH (63-37)
#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */
+#define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */
+#define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */
#define LPCR_PECE 0x00007000 /* powersave exit cause enable */
#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */
#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
@@ -518,6 +520,7 @@
#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
+#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
@@ -1029,6 +1032,7 @@
#define PVR_970MP 0x0044
#define PVR_970GX 0x0045
#define PVR_POWER7p 0x004A
+#define PVR_POWER8 0x004B
#define PVR_BE 0x0070
#define PVR_PA6T 0x0090
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4982c..e07e6af5e1f 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
+ (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
+#endif
+
/* Bit definitions for the TSR. */
#define TSR_ENW 0x80000000 /* Enable Next Watchdog */
#define TSR_WIS 0x40000000 /* WDT Interrupt Status */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 557cff845de..aef00c67590 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -353,8 +353,13 @@ static inline int page_is_rtas_user_buf(unsigned long pfn)
return 1;
return 0;
}
+
+/* Not the best place to put pSeries_coalesce_init, will be fixed when we
+ * move some of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
#else
static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
+static inline void pSeries_coalesce_init(void) { }
#endif
extern int call_rtas(const char *, int, int, unsigned long *, ...);
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
new file mode 100644
index 00000000000..d3ca85529b8
--- /dev/null
+++ b/arch/powerpc/include/asm/setup.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_POWERPC_SETUP_H
+#define _ASM_POWERPC_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+#ifndef __ASSEMBLY__
+extern void ppc_printk_progress(char *s, unsigned short hex);
+
+extern unsigned int rtas_data;
+extern int mem_init_done; /* set on boot once kmalloc can be called */
+extern int init_bootmem_done; /* set once bootmem is available */
+extern unsigned long long memory_limit;
+extern unsigned long klimit;
+extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
+
+struct device_node;
+extern void note_scsi_host(struct device_node *, void *);
+
+/* Used in very early kernel initialization. */
+extern unsigned long reloc_offset(void);
+extern unsigned long add_reloc_offset(unsigned long);
+extern void reloc_got2(unsigned long);
+
+#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_SETUP_H */
+
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index e807e9d8e3f..5a4e437c238 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -67,6 +67,14 @@ void generic_mach_cpu_die(void);
void generic_set_cpu_dead(unsigned int cpu);
void generic_set_cpu_up(unsigned int cpu);
int generic_check_cpu_restart(unsigned int cpu);
+
+extern void inhibit_secondary_onlining(void);
+extern void uninhibit_secondary_onlining(void);
+
+#else /* HOTPLUG_CPU */
+static inline void inhibit_secondary_onlining(void) {}
+static inline void uninhibit_secondary_onlining(void) {}
+
#endif
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index ae20ce1af4c..6e909f3e6a4 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -132,7 +132,7 @@
*
* At this point, the OF driver seems to have a limitation on transfer
* sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know
- * wether this is just an OF limit due to some temporary buffer size
+ * whether this is just an OF limit due to some temporary buffer size
* or if this is an SMU imposed limit. This driver has the same limitation
* for now as I use a 0x10 bytes temporary buffer as well
*
@@ -236,7 +236,7 @@
* 3 (optional): enable nmi? [0x00 or 0x01]
*
* Returns:
- * If parameter 2 is 0x00 and parameter 3 is not specified, returns wether
+ * If parameter 2 is 0x00 and parameter 3 is not specified, returns whether
* NMI is enabled. Otherwise unknown.
*/
#define SMU_CMD_MISC_df_NMI_OPTION 0x04
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 84083876985..97909d3b1d7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -164,7 +164,7 @@ COMPAT_SYS_SPU(sched_getscheduler)
SYSCALL_SPU(sched_yield)
COMPAT_SYS_SPU(sched_get_priority_max)
COMPAT_SYS_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
+SYSX_SPU(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval_wrapper,sys_sched_rr_get_interval)
COMPAT_SYS_SPU(nanosleep)
SYSCALL_SPU(mremap)
SYSCALL_SPU(setresuid)
@@ -356,3 +356,4 @@ COMPAT_SYS_SPU(sendmmsg)
SYSCALL_SPU(setns)
COMPAT_SYS(process_vm_readv)
COMPAT_SYS(process_vm_writev)
+SYSCALL(finit_module)
diff --git a/arch/powerpc/include/asm/ucc.h b/arch/powerpc/include/asm/ucc.h
index 46b09ba6bea..6927ac26516 100644
--- a/arch/powerpc/include/asm/ucc.h
+++ b/arch/powerpc/include/asm/ucc.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
index 4644c840e2f..72ea9bab07d 100644
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ b/arch/powerpc/include/asm/ucc_fast.h
@@ -1,7 +1,7 @@
/*
* Internal header file for UCC FAST unit routines.
*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index cf131ffdb8d..c44131e68e1 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index b3038817b8d..5a7510e9d09 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -21,7 +21,6 @@ extern int (*udbg_getc_poll)(void);
extern void udbg_puts(const char *s);
extern int udbg_write(const char *s, int n);
-extern int udbg_read(char *buf, int buflen);
extern void register_early_udbg_console(void);
extern void udbg_printf(const char *fmt, ...)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 784872f9371..1d4864a40e3 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define __NR_syscalls 353
+#define __NR_syscalls 354
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
@@ -54,6 +54,7 @@
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_NEWFSTATAT
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
#endif
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index a33c3c03bb2..f7bca637074 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ header-y += bootx.h
header-y += byteorder.h
header-y += cputable.h
header-y += elf.h
+header-y += epapr_hcalls.h
header-y += errno.h
header-y += fcntl.h
header-y += ioctl.h
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
new file mode 100644
index 00000000000..7f9c74b4670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -0,0 +1,98 @@
+/*
+ * ePAPR hcall interface
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+
+#define EV_BYTE_CHANNEL_SEND 1
+#define EV_BYTE_CHANNEL_RECEIVE 2
+#define EV_BYTE_CHANNEL_POLL 3
+#define EV_INT_SET_CONFIG 4
+#define EV_INT_GET_CONFIG 5
+#define EV_INT_SET_MASK 6
+#define EV_INT_GET_MASK 7
+#define EV_INT_IACK 9
+#define EV_INT_EOI 10
+#define EV_INT_SEND_IPI 11
+#define EV_INT_SET_TASK_PRIORITY 12
+#define EV_INT_GET_TASK_PRIORITY 13
+#define EV_DOORBELL_SEND 14
+#define EV_MSGSND 15
+#define EV_IDLE 16
+
+/* vendor ID: epapr */
+#define EV_LOCAL_VENDOR_ID 0 /* for private use */
+#define EV_EPAPR_VENDOR_ID 1
+#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */
+#define EV_IBM_VENDOR_ID 3 /* IBM */
+#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */
+#define EV_ENEA_VENDOR_ID 5 /* Enea */
+#define EV_WR_VENDOR_ID 6 /* Wind River Systems */
+#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */
+#define EV_KVM_VENDOR_ID 42 /* KVM */
+
+/* The max number of bytes that a byte channel can send or receive per call */
+#define EV_BYTE_CHANNEL_MAX_BYTES 16
+
+
+#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
+#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
+
+/* epapr return codes */
+#define EV_SUCCESS 0
+#define EV_EPERM 1 /* Operation not permitted */
+#define EV_ENOENT 2 /* Entry Not Found */
+#define EV_EIO 3 /* I/O error occured */
+#define EV_EAGAIN 4 /* The operation had insufficient
+ * resources to complete and should be
+ * retried
+ */
+#define EV_ENOMEM 5 /* There was insufficient memory to
+ * complete the operation */
+#define EV_EFAULT 6 /* Bad guest address */
+#define EV_ENODEV 7 /* No such device */
+#define EV_EINVAL 8 /* An argument supplied to the hcall
+ was out of range or invalid */
+#define EV_INTERNAL 9 /* An internal error occured */
+#define EV_CONFIG 10 /* A configuration error was detected */
+#define EV_INVALID_STATE 11 /* The object is in an invalid state */
+#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
+#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */
+
+#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index e9b78870aaa..49a25796a61 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -97,6 +97,9 @@
#define TIOCGDEV _IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */
#define TIOCSIG _IOW('T',0x36, int) /* Generate signal on Pty slave */
#define TIOCVHANGUP 0x5437
+#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
#define TIOCSERCONFIG 0x5453
#define TIOCSERGWILD 0x5454
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1bea4d8ea6f..2fba8a66fb1 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -221,6 +221,12 @@ struct kvm_sregs {
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
+ /*
+ * iac/dac registers are 64bit wide, while this API
+ * interface provides only lower 32 bits on 64 bit
+ * processors. ONE_REG interface is added for 64bit
+ * iac/dac registers.
+ */
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
@@ -325,6 +331,86 @@ struct kvm_book3e_206_tlb_params {
__u32 reserved[8];
};
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid. Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream. The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+
+#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
+
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+
+#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 5e04383a1db..ed0e0254b47 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared {
};
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
-#define HC_VENDOR_KVM (42 << 16)
-#define HC_EV_SUCCESS 0
-#define HC_EV_UNIMPLEMENTED 12
+
+#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num)
+
+#include <uapi/asm/epapr_hcalls.h>
#define KVM_FEATURE_MAGIC_PAGE 1
diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h
index 8b9a306260b..552df83f1a4 100644
--- a/arch/powerpc/include/uapi/asm/setup.h
+++ b/arch/powerpc/include/uapi/asm/setup.h
@@ -1,32 +1 @@
-#ifndef _ASM_POWERPC_SETUP_H
-#define _ASM_POWERPC_SETUP_H
-
#include <asm-generic/setup.h>
-
-#ifndef __ASSEMBLY__
-extern void ppc_printk_progress(char *s, unsigned short hex);
-
-extern unsigned int rtas_data;
-extern int mem_init_done; /* set on boot once kmalloc can be called */
-extern int init_bootmem_done; /* set once bootmem is available */
-extern unsigned long long memory_limit;
-extern unsigned long klimit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
-extern void via_cuda_init(void);
-extern void read_rtc_time(void);
-extern void pmac_find_display(void);
-
-struct device_node;
-extern void note_scsi_host(struct device_node *, void *);
-
-/* Used in very early kernel initialization. */
-extern unsigned long reloc_offset(void);
-extern unsigned long add_reloc_offset(unsigned long);
-extern void reloc_got2(unsigned long);
-
-#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 3d5179bb122..eb0b1864d40 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -47,6 +47,7 @@
/* Socket filtering */
#define SO_ATTACH_FILTER 26
#define SO_DETACH_FILTER 27
+#define SO_GET_FILTER SO_ATTACH_FILTER
#define SO_PEERNAME 28
#define SO_TIMESTAMP 29
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 380b5d37a90..8c478c6c6b1 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -375,6 +375,7 @@
#define __NR_setns 350
#define __NR_process_vm_readv 351
#define __NR_process_vm_writev 352
+#define __NR_finit_module 353
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cde12f8a4eb..8f619342f14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7523539cfe9..4e23ba2f3ca 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -441,8 +441,7 @@ int main(void)
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
- DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
- DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
+ DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -470,7 +469,6 @@ int main(void)
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
- DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power.S
index 76797c5105d..57cf14065ae 100644
--- a/arch/powerpc/kernel/cpu_setup_power7.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -27,6 +27,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
bl __init_LPCR
bl __init_TLB
mtlr r11
@@ -39,6 +40,35 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+_GLOBAL(__setup_cpu_power8)
+ mflr r11
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ oris r3, r3, LPCR_AIL_3@h
+ bl __init_LPCR
+ bl __init_TLB
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power8)
+ mflr r11
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ oris r3, r3, LPCR_AIL_3@h
bl __init_LPCR
bl __init_TLB
mtlr r11
@@ -57,6 +87,7 @@ __init_hvmode_206:
__init_LPCR:
/* Setup a sane LPCR:
+ * Called with initial LPCR in R3
*
* LPES = 0b01 (HSRR0/1 used for 0x500)
* PECE = 0b111
@@ -67,7 +98,6 @@ __init_LPCR:
*
* Other bits untouched for now
*/
- mfspr r3,SPRN_LPCR
li r5,1
rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 0514c21f138..75a3d71b895 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -68,6 +68,8 @@ extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power7(void);
+extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power8(void);
extern void __restore_cpu_a2(void);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
@@ -94,6 +96,10 @@ extern void __restore_cpu_e5500(void);
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -429,6 +435,21 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_restore = __restore_cpu_power7,
.platform = "power7",
},
+ { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000004,
+ .cpu_name = "POWER8 (architected)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .platform = "power8",
+ },
{ /* Power7 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003f0000,
@@ -463,6 +484,23 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_restore = __restore_cpu_power7,
.platform = "power7+",
},
+ { /* Power8 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004b0000,
+ .cpu_name = "POWER8 (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .platform = "power8",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e9a906c2723..b310a057362 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -373,6 +373,8 @@ _GLOBAL(ret_from_fork)
_GLOBAL(ret_from_kernel_thread)
bl .schedule_tail
REST_NVGPRS(r1)
+ li r3,0
+ std r3,0(r1)
ld r14, 0(r14)
mtlr r14
mr r3,r15
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 697b390ebfd..62c0dc23782 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -8,13 +8,41 @@
*/
#include <linux/threads.h>
+#include <asm/epapr_hcalls.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
+/* epapr_ev_idle() was derived from e500_idle() */
+_GLOBAL(epapr_ev_idle)
+ CURRENT_THREAD_INFO(r3, r1)
+ PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4, r4,_TLF_NAPPING /* so when we take an exception */
+ PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+ wrteei 1
+
+idle_loop:
+ LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+ li r3, -1
+ nop
+ nop
+ nop
+
+ /*
+ * Guard against spurious wakeups from a hypervisor --
+ * only interrupt will cause us to return to LR due to
+ * _TLF_NAPPING.
+ */
+ b idle_loop
+
/* Hypercall entry point. Will be patched with device tree instructions. */
.global epapr_hypercall_start
epapr_hypercall_start:
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 028aeae370b..f3eab8594d9 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -21,6 +21,10 @@
#include <asm/epapr_hcalls.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
+#include <asm/machdep.h>
+
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
bool epapr_paravirt_enabled;
@@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void)
if (len % 4 || len > (4 * 4))
return -ENODEV;
- for (i = 0; i < (len / 4); i++)
+ for (i = 0; i < (len / 4); i++) {
patch_instruction(epapr_hypercall_start + i, insts[i]);
+ patch_instruction(epapr_ev_idle_start + i, insts[i]);
+ }
+
+ if (of_get_property(hyper_node, "has-idle", NULL))
+ ppc_md.power_save = epapr_ev_idle;
epapr_paravirt_enabled = true;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 10b658ad65e..4665e82fa37 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,12 +19,76 @@
/*
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x5fff : interrupt support common interrupt prologs
- * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x0100 - 0x17ff : pSeries Interrupt prologs
+ * 0x1800 - 0x4000 : interrupt support common interrupt prologs
+ * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
+ * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
* 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 - : Early init and support code
+ * 0x8000 - 0x8fff : Initial (CPU0) segment table
+ * 0x9000 - : Early init and support code
*/
+ /* Syscall routine is used twice, in reloc-off and reloc-on paths */
+#define SYSCALL_PSERIES_1 \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+ mr r9,r13 ; \
+ GET_PACA(r13) ; \
+ mfspr r11,SPRN_SRR0 ; \
+0:
+
+#define SYSCALL_PSERIES_2_RFID \
+ mfspr r12,SPRN_SRR1 ; \
+ ld r10,PACAKBASE(r13) ; \
+ LOAD_HANDLER(r10, system_call_entry) ; \
+ mtspr SPRN_SRR0,r10 ; \
+ ld r10,PACAKMSR(r13) ; \
+ mtspr SPRN_SRR1,r10 ; \
+ rfid ; \
+ b . ; /* prevent speculative execution */
+
+#define SYSCALL_PSERIES_3 \
+ /* Fast LE/BE switch system call */ \
+1: mfspr r12,SPRN_SRR1 ; \
+ xori r12,r12,MSR_LE ; \
+ mtspr SPRN_SRR1,r12 ; \
+ rfid ; /* return to userspace */ \
+ b . ; \
+2: mfspr r12,SPRN_SRR1 ; \
+ andi. r12,r12,MSR_PR ; \
+ bne 0b ; \
+ mtspr SPRN_SRR0,r3 ; \
+ mtspr SPRN_SRR1,r4 ; \
+ mtspr SPRN_SDR1,r5 ; \
+ rfid ; \
+ b . ; /* prevent speculative execution */
+
+#if defined(CONFIG_RELOCATABLE)
+ /*
+ * We can't branch directly; in the direct case we use LR
+ * and system_call_entry restores LR. (We thus need to move
+ * LR to r10 in the RFID case too.)
+ */
+#define SYSCALL_PSERIES_2_DIRECT \
+ mflr r10 ; \
+ ld r12,PACAKBASE(r13) ; \
+ LOAD_HANDLER(r12, system_call_entry_direct) ; \
+ mtlr r12 ; \
+ mfspr r12,SPRN_SRR1 ; \
+ /* Re-use of r13... No spare regs to do this */ \
+ li r13,MSR_RI ; \
+ mtmsrd r13,1 ; \
+ GET_PACA(r13) ; /* get r13 back */ \
+ blr ;
+#else
+ /* We can branch directly */
+#define SYSCALL_PSERIES_2_DIRECT \
+ mfspr r12,SPRN_SRR1 ; \
+ li r10,MSR_RI ; \
+ mtmsrd r10,1 ; /* Set RI (EE=0) */ \
+ b system_call_entry_direct ;
+#endif
/*
* This is the start of the interrupt handlers for pSeries
@@ -207,31 +271,11 @@ system_call_pSeries:
KVMTEST(0xc00)
GET_SCRATCH0(r13)
#endif
-BEGIN_FTR_SECTION
- cmpdi r0,0x1ebe
- beq- 1f
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
- mr r9,r13
- GET_PACA(r13)
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, system_call_entry)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- rfid
- b . /* prevent speculative execution */
-
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_RFID
+ SYSCALL_PSERIES_3
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
-/* Fast LE/BE switch system call */
-1: mfspr r12,SPRN_SRR1
- xori r12,r12,MSR_LE
- mtspr SPRN_SRR1,r12
- rfid /* return to userspace */
- b .
-
STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
@@ -276,7 +320,7 @@ vsx_unavailable_pSeries_1:
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
. = 0x1500
- .global denorm_Hypervisor
+ .global denorm_exception_hv
denorm_exception_hv:
HMT_MEDIUM
mtspr SPRN_SPRG_HSCRATCH0,r13
@@ -311,12 +355,14 @@ denorm_exception_hv:
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
+#else
+ . = 0x1800
#endif /* CONFIG_CBE_RAS */
- . = 0x3000
/*** Out of line interrupts support ***/
+ .align 7
/* moved from 0x200 */
machine_check_pSeries:
.globl machine_check_fwnmi
@@ -575,16 +621,12 @@ slb_miss_user_pseries:
b . /* prevent spec. execution */
#endif /* __DISABLED__ */
- .align 7
- .globl __end_interrupts
-__end_interrupts:
-
/*
* Code from here down to __end_handlers is invoked from the
* exception prologs above. Because the prologs assemble the
* addresses of these handlers using the LOAD_HANDLER macro,
- * which uses an addi instruction, these handlers must be in
- * the first 32k of the kernel image.
+ * which uses an ori instruction, these handlers must be in
+ * the first 64k of the kernel image.
*/
/*** Common interrupt handlers ***/
@@ -613,8 +655,8 @@ machine_check_common:
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
- STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
- STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+ STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception)
@@ -629,7 +671,158 @@ machine_check_common:
STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
#endif /* CONFIG_CBE_RAS */
+ /*
+ * Relocation-on interrupts: A subset of the interrupts can be delivered
+ * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
+ * it. Addresses are the same as the original interrupt addresses, but
+ * offset by 0xc000000000004000.
+ * It's impossible to receive interrupts below 0x300 via this mechanism.
+ * KVM: None of these traps are from the guest ; anything that escalated
+ * to HV=1 from HV=0 is delivered via real mode handlers.
+ */
+
+ /*
+ * This uses the standard macro, since the original 0x300 vector
+ * only has extra guff for STAB-based processors -- which never
+ * come here.
+ */
+ STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
+ . = 0x4380
+ .globl data_access_slb_relon_pSeries
+data_access_slb_relon_pSeries:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b .slb_miss_realmode
+#else
+ /*
+ * We can't just use a direct branch to .slb_miss_realmode
+ * because the distance from here to there depends on where
+ * the kernel ends up being put.
+ */
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, .slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
+ . = 0x4480
+ .globl instruction_access_slb_relon_pSeries
+instruction_access_slb_relon_pSeries:
+ HMT_MEDIUM
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b .slb_miss_realmode
+#else
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, .slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ . = 0x4500
+ .globl hardware_interrupt_relon_pSeries;
+ .globl hardware_interrupt_relon_hv;
+hardware_interrupt_relon_pSeries:
+hardware_interrupt_relon_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
+ FTR_SECTION_ELSE
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_206)
+ STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
+ STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
+ STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
+ MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
+ STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
+ STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
+
+ . = 0x4c00
+ .globl system_call_relon_pSeries
+system_call_relon_pSeries:
+ HMT_MEDIUM
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_DIRECT
+ SYSCALL_PSERIES_3
+
+ STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
+
+ . = 0x4e00
+ b h_data_storage_relon_hv
+
+ . = 0x4e20
+ b h_instr_storage_relon_hv
+
+ . = 0x4e40
+ b emulation_assist_relon_hv
+
+ . = 0x4e50
+ b hmi_exception_relon_hv
+
+ . = 0x4e60
+ b hmi_exception_relon_hv
+
+ /* For when we support the doorbell interrupt:
+ STD_RELON_EXCEPTION_HYPERVISOR(0x4e80, 0xe80, doorbell_hyper)
+ */
+
+performance_monitor_relon_pSeries_1:
+ . = 0x4f00
+ b performance_monitor_relon_pSeries
+
+altivec_unavailable_relon_pSeries_1:
+ . = 0x4f20
+ b altivec_unavailable_relon_pSeries
+
+vsx_unavailable_relon_pSeries_1:
+ . = 0x4f40
+ b vsx_unavailable_relon_pSeries
+
+#ifdef CONFIG_CBE_RAS
+ STD_RELON_EXCEPTION_HV(0x5200, 0x1202, cbe_system_error)
+#endif /* CONFIG_CBE_RAS */
+ STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
+#ifdef CONFIG_PPC_DENORMALISATION
+ . = 0x5500
+ b denorm_exception_hv
+#endif
+#ifdef CONFIG_CBE_RAS
+ STD_RELON_EXCEPTION_HV(0x5600, 0x1602, cbe_maintenance)
+#else
+#ifdef CONFIG_HVC_SCOM
+ STD_RELON_EXCEPTION_HV(0x5600, 0x1600, maintence_interrupt)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1600)
+#endif /* CONFIG_HVC_SCOM */
+#endif /* CONFIG_CBE_RAS */
+ STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
+#ifdef CONFIG_CBE_RAS
+ STD_RELON_EXCEPTION_HV(0x5800, 0x1802, cbe_thermal)
+#endif /* CONFIG_CBE_RAS */
+
+ /* Other future vectors */
.align 7
+ .globl __end_interrupts
+__end_interrupts:
+
+ .align 7
+system_call_entry_direct:
+#if defined(CONFIG_RELOCATABLE)
+ /* The first level prologue may have used LR to get here, saving
+ * orig in r10. To save hacking/ifdeffing common code, restore here.
+ */
+ mtlr r10
+#endif
system_call_entry:
b system_call_common
@@ -714,21 +907,21 @@ data_access_common:
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
li r5,0x300
- b .do_hash_page /* Try to handle as hpte fault */
+ b .do_hash_page /* Try to handle as hpte fault */
.align 7
- .globl h_data_storage_common
+ .globl h_data_storage_common
h_data_storage_common:
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
- bl .save_nvgprs
+ mfspr r10,SPRN_HDAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_HDSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ bl .save_nvgprs
DISABLE_INTS
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unknown_exception
- b .ret_from_except
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unknown_exception
+ b .ret_from_except
.align 7
.globl instruction_access_common
@@ -741,7 +934,7 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
- STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
/*
* Here is the common SLB miss user that is used when going to virtual
@@ -1152,6 +1345,21 @@ _GLOBAL(do_stab_bolted)
rfid
b . /* prevent speculative execution */
+
+ /* Equivalents to the above handlers for relocation-on interrupt vectors */
+ STD_RELON_EXCEPTION_HV(., 0xe00, h_data_storage)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00)
+ STD_RELON_EXCEPTION_HV(., 0xe20, h_instr_storage)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20)
+ STD_RELON_EXCEPTION_HV(., 0xe40, emulation_assist)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40)
+ STD_RELON_EXCEPTION_HV(., 0xe60, hmi_exception)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60)
+
+ STD_RELON_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+ STD_RELON_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+ STD_RELON_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
@@ -1164,7 +1372,7 @@ fwnmi_data_area:
/* pseries and powernv need to keep the whole page from
* 0x7000 to 0x8000 free for use by the firmware
*/
- . = 0x8000
+ . = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
/* Space for CPU0's segment table */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 58bddee8e1e..116f0868695 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -422,7 +422,7 @@ _STATIC(__after_prom_start)
tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */
#endif
-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
/*
* Check if the kernel has to be running as relocatable kernel based on the
* variable __run_at_load, if it is set the kernel is treated as relocatable
@@ -432,7 +432,8 @@ _STATIC(__after_prom_start)
cmplwi cr0,r7,1
bne 3f
- li r5,__end_interrupts - _stext /* just copy interrupts */
+ /* just copy interrupts */
+ LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
b 5f
3:
#endif
@@ -703,6 +704,7 @@ _INIT_STATIC(start_here_multiplatform)
#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
/* Setup OPAL entry */
+ LOAD_REG_ADDR(r11, opal)
std r28,0(r11);
std r29,8(r11);
#endif
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 2099d9a879e..ea78761aa16 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -55,9 +55,6 @@ __setup("powersave=off", powersave_off);
*/
void cpu_idle(void)
{
- if (ppc_md.idle_loop)
- ppc_md.idle_loop(); /* doesn't return */
-
set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
tick_nohz_idle_enter();
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 8226c6cb348..c862fd716fe 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -656,7 +656,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
struct iommu_pool *p;
/* number of bytes needed for the bitmap */
- sz = (tbl->it_size + 7) >> 3;
+ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
if (!page)
@@ -708,7 +708,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
void iommu_free_table(struct iommu_table *tbl, const char *node_name)
{
- unsigned long bitmap_sz, i;
+ unsigned long bitmap_sz;
unsigned int order;
if (!tbl || !tbl->it_map) {
@@ -718,17 +718,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
}
/* verify that table contains no entries */
- /* it_size is in entries, and we're examining 64 at a time */
- for (i = 0; i < (tbl->it_size/64); i++) {
- if (tbl->it_map[i] != 0) {
- printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
- __func__, node_name);
- break;
- }
- }
+ if (!bitmap_empty(tbl->it_map, tbl->it_size))
+ pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
/* calculate bitmap size in bytes */
- bitmap_sz = (tbl->it_size + 7) / 8;
+ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
/* free bitmap */
order = get_order(bitmap_sz);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 867db1de894..a61b133c4f9 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data)
in[0] = KVM_MAGIC_PAGE;
in[1] = KVM_MAGIC_PAGE;
- kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+ kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
*features = out[0];
}
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index bedd12e1cfb..0733b05eb85 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -387,7 +387,7 @@ void __init find_legacy_serial_ports(void)
of_node_put(parent);
continue;
}
- /* Check for known pciclass, and also check wether we have
+ /* Check for known pciclass, and also check whether we have
* a device with child nodes for ports or not
*/
if (of_device_is_compatible(np, "pciclass,0700") ||
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index fa9f6c72f55..e1ec57e87b3 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -218,23 +218,23 @@ static void __init export_crashk_values(struct device_node *node)
* be sure what's in them, so remove them. */
prop = of_find_property(node, "linux,crashkernel-base", NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
prop = of_find_property(node, "linux,crashkernel-size", NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
if (crashk_res.start != 0) {
- prom_add_property(node, &crashk_base_prop);
+ of_add_property(node, &crashk_base_prop);
crashk_size = resource_size(&crashk_res);
- prom_add_property(node, &crashk_size_prop);
+ of_add_property(node, &crashk_size_prop);
}
/*
* memory_limit is required by the kexec-tools to limit the
* crash regions to the actual memory used.
*/
- prom_update_property(node, &memory_limit_prop);
+ of_update_property(node, &memory_limit_prop);
}
static int __init kexec_setup(void)
@@ -249,11 +249,11 @@ static int __init kexec_setup(void)
/* remove any stale properties so ours can be found */
prop = of_find_property(node, kernel_end_prop.name, NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
/* information needed by userspace when using default_machine_kexec */
kernel_end = __pa(_end);
- prom_add_property(node, &kernel_end_prop);
+ of_add_property(node, &kernel_end_prop);
export_crashk_values(node);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index d7f609086a9..7206701b1ff 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -389,14 +389,14 @@ static int __init export_htab_values(void)
/* remove any stale propertys so ours can be found */
prop = of_find_property(node, htab_base_prop.name, NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
prop = of_find_property(node, htab_size_prop.name, NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
htab_base = __pa(htab_address);
- prom_add_property(node, &htab_base_prop);
- prom_add_property(node, &htab_size_prop);
+ of_add_property(node, &htab_base_prop);
+ of_add_property(node, &htab_size_prop);
of_node_put(node);
return 0;
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 2049f2d00ff..9db8ec07ec9 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -82,7 +82,7 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev)
return -ENXIO;
/* Claim resources. This might need some rework as well depending
- * wether we are doing probe-only or not, like assigning unassigned
+ * whether we are doing probe-only or not, like assigning unassigned
* resources etc...
*/
pcibios_claim_one_bus(phb->bus);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 7f94f760dd0..abc0d085699 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1428,8 +1428,6 @@ void __init pcibios_resource_survey(void)
ppc_md.pcibios_fixup();
}
-#ifdef CONFIG_HOTPLUG
-
/* This is used by the PCI hotplug driver to allocate resource
* of newly plugged busses. We can try to consolidate with the
* rest of the code later, for now, keep it as-is as our main
@@ -1488,8 +1486,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
}
EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
-#endif /* CONFIG_HOTPLUG */
-
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
if (ppc_md.pcibios_enable_device_hook)
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4b06ec5a502..64f526a321f 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -208,7 +208,7 @@ pci_create_OF_bus_map(void)
of_prop->name = "pci-OF-bus-map";
of_prop->length = 256;
of_prop->value = &of_prop[1];
- prom_add_property(dn, of_prop);
+ of_add_property(dn, of_prop);
of_node_put(dn);
}
}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 4ff190ff24a..2cbe6768fdd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -74,8 +74,6 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
-#ifdef CONFIG_HOTPLUG
-
int pcibios_unmap_io_space(struct pci_bus *bus)
{
struct pci_controller *hose;
@@ -124,8 +122,6 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
}
EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-#endif /* CONFIG_HOTPLUG */
-
static int __devinit pcibios_map_phb_io_space(struct pci_controller *hose)
{
struct vm_struct *area;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 19e4288d848..78b8766fd79 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -43,6 +43,7 @@
#include <asm/dcr.h>
#include <asm/ftrace.h>
#include <asm/switch_to.h>
+#include <asm/epapr_hcalls.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@@ -191,3 +192,7 @@ EXPORT_SYMBOL(__arch_hweight64);
#ifdef CONFIG_PPC_BOOK3S_64
EXPORT_SYMBOL_GPL(mmu_psize_defs);
#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+EXPORT_SYMBOL(epapr_hypercall_start);
+#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 37725e86651..8b6f7a99cce 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -32,6 +32,7 @@
#include <linux/debugfs.h>
#include <linux/irq.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -49,11 +50,11 @@
#include <asm/btext.h>
#include <asm/sections.h>
#include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/pci-bridge.h>
#include <asm/kexec.h>
#include <asm/opal.h>
#include <asm/fadump.h>
+#include <asm/debug.h>
#include <mm/mmu_decl.h>
@@ -802,7 +803,7 @@ static int prom_reconfig_notifier(struct notifier_block *nb,
int err;
switch (action) {
- case PSERIES_RECONFIG_ADD:
+ case OF_RECONFIG_ATTACH_NODE:
err = of_finish_dynamic_node(node);
if (err < 0)
printk(KERN_ERR "finish_node returned %d\n", err);
@@ -821,7 +822,7 @@ static struct notifier_block prom_reconfig_nb = {
static int __init prom_reconfig_setup(void)
{
- return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
+ return of_reconfig_notifier_register(&prom_reconfig_nb);
}
__initcall(prom_reconfig_setup);
#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cb6c123722a..779f34049a5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -671,6 +671,7 @@ static void __init early_cmdline_parse(void)
#define OV1_PPC_2_04 0x08 /* set if we support PowerPC 2.04 */
#define OV1_PPC_2_05 0x04 /* set if we support PowerPC 2.05 */
#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
+#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */
/* Option vector 2: Open Firmware options supported */
#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
@@ -707,6 +708,7 @@ static void __init early_cmdline_parse(void)
#define OV5_PFO_HW_RNG 0x80 /* PFO Random Number Generator */
#define OV5_PFO_HW_842 0x40 /* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x20 /* PFO Encryption Accelerator */
+#define OV5_SUB_PROCESSORS 0x01 /* 1,2,or 4 Sub-Processors supported */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
@@ -719,6 +721,8 @@ static unsigned char ibm_architecture_vec[] = {
W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
W(0xffff0000), W(0x003e0000), /* POWER6 */
W(0xffff0000), W(0x003f0000), /* POWER7 */
+ W(0xffff0000), W(0x004b0000), /* POWER8 */
+ W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
@@ -728,7 +732,7 @@ static unsigned char ibm_architecture_vec[] = {
3 - 2, /* length */
0, /* don't ignore, don't halt */
OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
- OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06,
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
/* option vector 2: Open Firmware options supported */
34 - 2, /* length */
@@ -755,7 +759,7 @@ static unsigned char ibm_architecture_vec[] = {
OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
/* option vector 5: PAPR/OF options */
- 18 - 2, /* length */
+ 19 - 2, /* length */
0, /* don't ignore, don't halt */
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
@@ -769,13 +773,14 @@ static unsigned char ibm_architecture_vec[] = {
* must match by the macro below. Update the definition if
* the structure layout changes.
*/
-#define IBM_ARCH_VEC_NRCORES_OFFSET 101
+#define IBM_ARCH_VEC_NRCORES_OFFSET 117
W(NR_CPUS), /* number of cores supported */
0,
0,
0,
0,
OV5_PFO_HW_RNG | OV5_PFO_HW_ENCR | OV5_PFO_HW_842,
+ OV5_SUB_PROCESSORS,
/* option vector 6: IBM PAPR hints */
4 - 2, /* length */
0,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 79d8e56470d..c4970004d44 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -952,6 +952,10 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
arch_bp_generic_fields(data &
(DABR_DATA_WRITE | DABR_DATA_READ),
&attr.bp_type);
+
+ /* Enable breakpoint */
+ attr.disabled = false;
+
ret = modify_user_hw_breakpoint(bp, &attr);
if (ret) {
ptrace_put_breakpoints(task);
@@ -1037,7 +1041,7 @@ void ptrace_disable(struct task_struct *child)
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_intruction_bp(struct task_struct *child,
+static long set_instruction_bp(struct task_struct *child,
struct ppc_hw_breakpoint *bp_info)
{
int slot;
@@ -1338,6 +1342,12 @@ static int set_dac_range(struct task_struct *child,
static long ppc_set_hwdebug(struct task_struct *child,
struct ppc_hw_breakpoint *bp_info)
{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int len = 0;
+ struct thread_struct *thread = &(child->thread);
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#ifndef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long dabr;
#endif
@@ -1365,7 +1375,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
(bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
return -EINVAL;
- return set_intruction_bp(child, bp_info);
+ return set_instruction_bp(child, bp_info);
}
if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
return set_dac(child, bp_info);
@@ -1381,13 +1391,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
*/
if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
(bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
- bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT ||
bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
return -EINVAL;
- if (child->thread.dabr)
- return -ENOSPC;
-
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
@@ -1397,6 +1403,50 @@ static long ppc_set_hwdebug(struct task_struct *child,
dabr |= DABR_DATA_READ;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
dabr |= DABR_DATA_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (ptrace_get_breakpoints(child) < 0)
+ return -ESRCH;
+
+ /*
+ * Check if the request is for 'range' breakpoints. We can
+ * support it if range < 8 bytes.
+ */
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) {
+ len = bp_info->addr2 - bp_info->addr;
+ } else if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+ ptrace_put_breakpoints(child);
+ return -EINVAL;
+ }
+ bp = thread->ptrace_bps[0];
+ if (bp) {
+ ptrace_put_breakpoints(child);
+ return -ENOSPC;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+ attr.bp_len = len;
+ arch_bp_generic_fields(dabr & (DABR_DATA_WRITE | DABR_DATA_READ),
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, child);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ ptrace_put_breakpoints(child);
+ return PTR_ERR(bp);
+ }
+
+ ptrace_put_breakpoints(child);
+ return 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+ return -EINVAL;
+
+ if (child->thread.dabr)
+ return -ENOSPC;
child->thread.dabr = dabr;
child->thread.dabrx = DABRX_ALL;
@@ -1405,8 +1455,13 @@ static long ppc_set_hwdebug(struct task_struct *child,
#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
}
-static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
+static long ppc_del_hwdebug(struct task_struct *child, long data)
{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret = 0;
+ struct thread_struct *thread = &(child->thread);
+ struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
int rc;
@@ -1426,10 +1481,25 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data)
#else
if (data != 1)
return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (ptrace_get_breakpoints(child) < 0)
+ return -ESRCH;
+
+ bp = thread->ptrace_bps[0];
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ } else
+ ret = -ENOENT;
+ ptrace_put_breakpoints(child);
+ return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
if (child->thread.dabr == 0)
return -ENOENT;
child->thread.dabr = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
return 0;
#endif
@@ -1536,7 +1606,11 @@ long arch_ptrace(struct task_struct *child, long request,
dbginfo.data_bp_alignment = 4;
#endif
dbginfo.sizeof_condition = 0;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+#else
dbginfo.features = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
if (!access_ok(VERIFY_WRITE, datavp,
@@ -1563,7 +1637,7 @@ long arch_ptrace(struct task_struct *child, long request,
}
case PPC_PTRACE_DELHWDEBUG: {
- ret = ppc_del_hwdebug(child, addr, data);
+ ret = ppc_del_hwdebug(child, data);
break;
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index fcec38241f7..1fd6e7b2f39 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,7 +42,6 @@
#include <asm/time.h>
#include <asm/mmu.h>
#include <asm/topology.h>
-#include <asm/pSeries_reconfig.h>
struct rtas_t rtas = {
.lock = __ARCH_SPIN_LOCK_UNLOCKED
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 20b0120db0c..8329190312c 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -650,10 +650,8 @@ static int initialize_flash_pde_data(const char *rtas_call_name,
int token;
dp->data = kzalloc(buf_size, GFP_KERNEL);
- if (dp->data == NULL) {
- remove_flash_pde(dp);
+ if (dp->data == NULL)
return -ENOMEM;
- }
/*
* This code assumes that the status int is the first member of the
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index efb6a41b313..6da881b35da 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -601,6 +601,11 @@ void __init setup_arch(char **cmdline_p)
kvm_linear_init();
+ /* Interrupt code needs to be 64K-aligned */
+ if ((unsigned long)_stext & 0xffff)
+ panic("Kernelbase not 64K-aligned (0x%lx)!\n",
+ (unsigned long)_stext);
+
ppc64_boot_msg(0x15, "Setup Done");
}
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index a2dc75793bd..3b997118df5 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -158,10 +158,8 @@ static int do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
- if (thread_info_flags & _TIF_UPROBE) {
- clear_thread_flag(TIF_UPROBE);
+ if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- }
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index d183f8719a5..1ca045d4432 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -83,7 +83,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
* the context). This is very important because we must ensure we
* don't lose the VRSAVE content that may have been set prior to
* the process doing its first vector operation
- * Userland shall check AT_HWCAP to know wether it can rely on the
+ * Userland shall check AT_HWCAP to know whether it can rely on the
* v_regs pointer or not
*/
#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2b952b5386f..e5b133ebd8a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -427,6 +427,45 @@ int generic_check_cpu_restart(unsigned int cpu)
{
return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
+
+static atomic_t secondary_inhibit_count;
+
+/*
+ * Don't allow secondary CPU threads to come online
+ */
+void inhibit_secondary_onlining(void)
+{
+ /*
+ * This makes secondary_inhibit_count stable during cpu
+ * online/offline operations.
+ */
+ get_online_cpus();
+
+ atomic_inc(&secondary_inhibit_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
+
+/*
+ * Allow secondary CPU threads to come online again
+ */
+void uninhibit_secondary_onlining(void)
+{
+ get_online_cpus();
+ atomic_dec(&secondary_inhibit_count);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
+
+static int secondaries_inhibited(void)
+{
+ return atomic_read(&secondary_inhibit_count);
+}
+
+#else /* HOTPLUG_CPU */
+
+#define secondaries_inhibited() 0
+
#endif
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
@@ -445,6 +484,13 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int rc, c;
+ /*
+ * Don't allow secondary threads to come online if inhibited
+ */
+ if (threads_per_core > 1 && secondaries_inhibited() &&
+ cpu % threads_per_core != 0)
+ return -EBUSY;
+
if (smp_ops == NULL ||
(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
return -EINVAL;
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 9c2ed90ece8..8a93778ed9f 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -175,19 +175,10 @@ asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 a
* proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
* and the register representation of a signed int (msr in 64-bit mode) is performed.
*/
-asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs ();
-
- /* The __user pointer cast is valid because of the set_fs() */
- set_fs (KERNEL_DS);
- ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
- set_fs (old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
+asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid,
+ struct compat_timespec __user *interval)
+{
+ return compat_sys_sched_rr_get_interval((int)pid, interval);
}
/* Note: it is necessary to treat mode as an unsigned int,
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index cf357a059dd..3ce1f864c2d 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -607,7 +607,7 @@ static void register_nodes(void)
int sysfs_add_device_to_node(struct device *dev, int nid)
{
- struct node *node = &node_devices[nid];
+ struct node *node = node_devices[nid];
return sysfs_create_link(&node->dev.kobj, &dev->kobj,
kobject_name(&dev->kobj));
}
@@ -615,7 +615,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
void sysfs_remove_device_from_node(struct device *dev, int nid)
{
- struct node *node = &node_devices[nid];
+ struct node *node = node_devices[nid];
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
}
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc7..b3b14352b05 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -297,6 +297,8 @@ static u64 vtime_delta(struct task_struct *tsk,
u64 now, nowscaled, deltascaled;
u64 udelta, delta, user_scaled;
+ WARN_ON_ONCE(!irqs_disabled());
+
now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
@@ -355,15 +357,15 @@ void vtime_account_idle(struct task_struct *tsk)
}
/*
- * Transfer the user and system times accumulated in the paca
- * by the exception entry and exit code to the generic process
- * user and system time records.
+ * Transfer the user time accumulated in the paca
+ * by the exception entry and exit code to the generic
+ * process user time records.
* Must be called with interrupts disabled.
- * Assumes that vtime_account() has been called recently
- * (i.e. since the last entry from usermode) so that
+ * Assumes that vtime_account_system/idle() has been called
+ * recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void vtime_account_user(struct task_struct *tsk)
{
cputime_t utime, utimescaled;
@@ -375,12 +377,6 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}
-void vtime_task_switch(struct task_struct *prev)
-{
- vtime_account(prev);
- account_process_tick(prev, 0);
-}
-
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index c39c1ca77f4..f9748498fe5 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -122,29 +122,6 @@ int udbg_write(const char *s, int n)
return n - remain;
}
-int udbg_read(char *buf, int buflen)
-{
- char *p = buf;
- int i, c;
-
- if (!udbg_getc)
- return 0;
-
- for (i = 0; i < buflen; ++i) {
- do {
- c = udbg_getc();
- if (c == -1 && i == 0)
- return -1;
-
- } while (c == 0x11 || c == 0x13);
- if (c == 0 || c == -1)
- break;
- *p++ = c;
- }
-
- return i;
-}
-
#define UDBG_BUFSIZE 256
void udbg_printf(const char *fmt, ...)
{
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index d2d46d1014f..bc77834dbf4 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -64,6 +64,8 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
autask->saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
regs->nip = current->utask->xol_vaddr;
+
+ user_enable_single_step(current);
return 0;
}
@@ -119,6 +121,8 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* to be executed.
*/
regs->nip = utask->vaddr + MAX_UINSN_BYTES;
+
+ user_disable_single_step(current);
return 0;
}
@@ -162,6 +166,8 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
current->thread.trap_nr = utask->autask.saved_trap_nr;
instruction_pointer_set(regs, utask->vaddr);
+
+ user_disable_single_step(current);
}
/*
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc7356..3d7fd21c65f 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
vcpu->arch.cpu_type = KVM_CPU_440;
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
return 0;
}
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index c8c61578fdf..35ec0a8547d 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,12 +27,70 @@
#include "booke.h"
#include "44x_tlb.h"
+#define XOP_MFDCRX 259
#define XOP_MFDCR 323
+#define XOP_MTDCRX 387
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
+static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
+{
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
+ return EMULATE_DONE;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
+ vcpu->run->dcr.is_write = 1;
+ vcpu->arch.dcr_is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+}
+
+static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
+{
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ kvmppc_set_gpr(vcpu, rt,
+ mfdcr(DCRN_CPR0_CONFIG_DATA));
+ local_irq_enable();
+ break;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = 0;
+ vcpu->run->dcr.is_write = 0;
+ vcpu->arch.dcr_is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+
+ return EMULATE_DONE;
+}
+
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@@ -50,55 +108,21 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (get_xop(inst)) {
case XOP_MFDCR:
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- kvmppc_set_gpr(vcpu, rt,
- mfdcr(DCRN_CPR0_CONFIG_DATA));
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mfdcr(vcpu, rt, dcrn);
+ break;
+ case XOP_MFDCRX:
+ emulated = emulate_mfdcr(vcpu, rt,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_MTDCR:
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = kvmppc_get_gpr(vcpu, rs);
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mtdcr(vcpu, rs, dcrn);
+ break;
+ case XOP_MTDCRX:
+ emulated = emulate_mtdcr(vcpu, rs,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_TLBWE:
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f4dacb9c57f..4730c953f43 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_EVENTFD
config KVM_BOOK3S_HANDLER
bool
@@ -36,6 +37,7 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR
bool
select KVM_MMIO
+ select MMU_NOTIFIER
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
@@ -123,6 +125,7 @@ config KVM_E500V2
depends on EXPERIMENTAL && E500 && !PPC_E500MC
select KVM
select KVM_MMIO
+ select MMU_NOTIFIER
---help---
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -138,6 +141,7 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
+ select MMU_NOTIFIER
---help---
Support running unmodified E500MC/E5500 (32-bit) guest kernels in
virtual machines on E500MC/E5500 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a08636e6d..1e473d46322 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
+ eventfd.o)
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_tlb.o := -I.
@@ -72,10 +73,12 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
+ book3s_hv_ras.o \
book3s_hv_builtin.o
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
+ ../../../virt/kvm/eventfd.o \
powerpc.o \
emulate.o \
book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a8360c85..a4b64528524 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
@@ -476,6 +485,122 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ val = get_reg_val(reg->id, vcpu->arch.shared->dar);
+ break;
+ case KVM_REG_PPC_DSISR:
+ val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
+ break;
+ case KVM_REG_PPC_FPSCR:
+ val = get_reg_val(reg->id, vcpu->arch.fpscr);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ vcpu->arch.shared->dar = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_DSISR:
+ vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_FPSCR:
+ vcpu->arch.fpscr = set_reg_val(reg->id, val);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ return r;
+}
+
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index b0f625a3334..00e619bf608 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
- if (is_error_pfn(hpaddr)) {
+ if (is_error_noslot_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
orig_pte->eaddr);
r = -EINVAL;
@@ -254,6 +254,7 @@ next_pteg:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
out:
return r;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 4d72f9ebc55..ead58e31729 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
- if (is_error_pfn(hpaddr)) {
+ if (is_error_noslot_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
r = -EINVAL;
goto out;
@@ -171,6 +171,7 @@ map_again:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
}
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
out:
return r;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d95d11322a1..8cc18abd6dd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -24,6 +24,9 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
+#include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -40,6 +43,11 @@
/* Power architecture requires HPT is at least 256kB */
#define PPC_MIN_HPT_ORDER 18
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret);
+static void kvmppc_rmap_reset(struct kvm *kvm);
+
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
@@ -137,10 +145,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
/*
- * Set the whole last_vcpu array to an invalid vcpu number.
- * This ensures that each vcpu will flush its TLB on next entry.
+ * Reset all the reverse-mapping chains for all memslots
*/
- memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+ kvmppc_rmap_reset(kvm);
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
*htab_orderp = order;
err = 0;
} else {
@@ -184,6 +193,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long addr, hash;
unsigned long psize;
unsigned long hp0, hp1;
+ unsigned long idx_ret;
long ret;
struct kvm *kvm = vcpu->kvm;
@@ -215,7 +225,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
hash = (hash << 3) + 7;
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
hp_r = hp1 | addr;
- ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+ &idx_ret);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
addr, ret);
@@ -260,7 +271,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
/*
* This is called to get a reference to a guest page if there isn't
- * one already in the kvm->arch.slot_phys[][] arrays.
+ * one already in the memslot->arch.slot_phys[] array.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct kvm_memory_slot *memslot,
@@ -275,7 +286,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct vm_area_struct *vma;
unsigned long pfn, i, npages;
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
return -EINVAL;
if (physp[gfn - memslot->base_gfn])
@@ -353,15 +364,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
return err;
}
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long psize, gpa, gfn;
struct kvm_memory_slot *memslot;
long ret;
@@ -389,8 +395,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
do_insert:
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
- vcpu->arch.pgdir = current->mm->pgd;
- ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+ current->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -401,6 +407,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
}
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel)
+{
+ return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+ pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
gva_t eaddr)
{
@@ -570,7 +589,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct kvm *kvm = vcpu->kvm;
unsigned long *hptep, hpte[3], r;
unsigned long mmu_seq, psize, pte_size;
- unsigned long gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
@@ -608,15 +627,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Translate the logical address and get the page */
psize = hpte_page_size(hpte[0], r);
- gfn = hpte_rpn(r, psize);
+ gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+ gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
/* No memslot means it's an emulated MMIO region */
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
- unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
- }
if (!kvm->arch.using_mmu_notifiers)
return -EFAULT; /* should never get here */
@@ -710,7 +728,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Check if we might have been invalidated; let the guest retry if so */
ret = RESUME_GUEST;
- if (mmu_notifier_retry(vcpu, mmu_seq)) {
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
unlock_rmap(rmap);
goto out_unlock;
}
@@ -756,6 +774,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
+static void kvmppc_rmap_reset(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm->memslots;
+ kvm_for_each_memslot(memslot, slots) {
+ /*
+ * This assumes it is acceptable to lose reference and
+ * change bits across a reset.
+ */
+ memset(memslot->arch.rmap, 0,
+ memslot->npages * sizeof(*memslot->arch.rmap));
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long start,
unsigned long end,
@@ -850,7 +887,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
psize = hpte_page_size(hptep[0], ptel);
if ((hptep[0] & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) {
- hptep[0] |= HPTE_V_ABSENT;
+ if (kvm->arch.using_mmu_notifiers)
+ hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, i);
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
@@ -877,6 +915,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return 0;
}
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ unsigned long *rmapp;
+ unsigned long gfn;
+ unsigned long n;
+
+ rmapp = memslot->arch.rmap;
+ gfn = memslot->base_gfn;
+ for (n = memslot->npages; n; --n) {
+ /*
+ * Testing the present bit without locking is OK because
+ * the memslot has been marked invalid already, and hence
+ * no new HPTEs referencing this page can be created,
+ * thus the present bit can't go from 0 to 1.
+ */
+ if (*rmapp & KVMPPC_RMAP_PRESENT)
+ kvm_unmap_rmapp(kvm, rmapp, gfn);
+ ++rmapp;
+ ++gfn;
+ }
+}
+
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
@@ -1030,16 +1090,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
return ret;
}
-long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long *map)
{
unsigned long i;
- unsigned long *rmapp, *map;
+ unsigned long *rmapp;
preempt_disable();
rmapp = memslot->arch.rmap;
- map = memslot->dirty_bitmap;
for (i = 0; i < memslot->npages; ++i) {
- if (kvm_test_clear_dirty(kvm, rmapp))
+ if (kvm_test_clear_dirty(kvm, rmapp) && map)
__set_bit_le(i, map);
++rmapp;
}
@@ -1057,20 +1117,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
unsigned long hva, psize, offset;
unsigned long pa;
unsigned long *physp;
+ int srcu_idx;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- return NULL;
+ goto err;
if (!kvm->arch.using_mmu_notifiers) {
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
- return NULL;
+ goto err;
physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
if (kvmppc_get_guest_page(kvm, gfn, memslot,
PAGE_SIZE) < 0)
- return NULL;
+ goto err;
pa = *physp;
}
page = pfn_to_page(pa >> PAGE_SHIFT);
@@ -1079,9 +1141,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, 1, pages);
if (npages < 1)
- return NULL;
+ goto err;
page = pages[0];
}
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
psize = PAGE_SIZE;
if (PageHuge(page)) {
page = compound_head(page);
@@ -1091,6 +1155,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
if (nb_ret)
*nb_ret = psize - offset;
return page_address(page) + offset;
+
+ err:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return NULL;
}
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
@@ -1100,6 +1168,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
put_page(page);
}
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done. When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+ unsigned long index;
+ unsigned long flags;
+ struct kvm *kvm;
+ int first_pass;
+};
+
+#define HPTE_SIZE (2 * sizeof(unsigned long))
+
+static long record_hpte(unsigned long flags, unsigned long *hptp,
+ unsigned long *hpte, struct revmap_entry *revp,
+ int want_valid, int first_pass)
+{
+ unsigned long v, r;
+ int ok = 1;
+ int valid, dirty;
+
+ /* Unmodified entries are uninteresting except on the first pass */
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ if (!first_pass && !dirty)
+ return 0;
+
+ valid = 0;
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ valid = 1;
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+ !(hptp[0] & HPTE_V_BOLTED))
+ valid = 0;
+ }
+ if (valid != want_valid)
+ return 0;
+
+ v = r = 0;
+ if (valid || dirty) {
+ /* lock the HPTE so it's stable and read it */
+ preempt_disable();
+ while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hptp[0];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ /* re-evaluate valid and dirty from synchronized HPTE value */
+ valid = !!(v & HPTE_V_VALID);
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+ valid = 0;
+ r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ /* only clear modified if this is the right sort of entry */
+ if (valid == want_valid && dirty) {
+ r &= ~HPTE_GR_MODIFIED;
+ revp->guest_rpte = r;
+ }
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hptp[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ if (!(valid == want_valid && (first_pass || dirty)))
+ ok = 0;
+ }
+ hpte[0] = v;
+ hpte[1] = r;
+ return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long *hptp;
+ struct revmap_entry *revp;
+ unsigned long i, nb, nw;
+ unsigned long __user *lbuf;
+ struct kvm_get_htab_header __user *hptr;
+ unsigned long flags;
+ int first_pass;
+ unsigned long hpte[2];
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ first_pass = ctx->first_pass;
+ flags = ctx->flags;
+
+ i = ctx->index;
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ revp = kvm->arch.revmap + i;
+ lbuf = (unsigned long __user *)buf;
+
+ nb = 0;
+ while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+ /* Initialize header */
+ hptr = (struct kvm_get_htab_header __user *)buf;
+ hdr.n_valid = 0;
+ hdr.n_invalid = 0;
+ nw = nb;
+ nb += sizeof(hdr);
+ lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+ /* Skip uninteresting entries, i.e. clean on not-first pass */
+ if (!first_pass) {
+ while (i < kvm->arch.hpt_npte &&
+ !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ }
+ hdr.index = i;
+
+ /* Grab a series of valid entries */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_valid < 0xffff &&
+ nb + HPTE_SIZE < count &&
+ record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+ /* valid entry, write it out */
+ ++hdr.n_valid;
+ if (__put_user(hpte[0], lbuf) ||
+ __put_user(hpte[1], lbuf + 1))
+ return -EFAULT;
+ nb += HPTE_SIZE;
+ lbuf += 2;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ /* Now skip invalid entries while we can */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_invalid < 0xffff &&
+ record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+ /* found an invalid entry */
+ ++hdr.n_invalid;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+
+ if (hdr.n_valid || hdr.n_invalid) {
+ /* write back the header */
+ if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+ nw = nb;
+ buf = (char __user *)lbuf;
+ } else {
+ nb = nw;
+ }
+
+ /* Check if we've wrapped around the hash table */
+ if (i >= kvm->arch.hpt_npte) {
+ i = 0;
+ ctx->first_pass = 0;
+ break;
+ }
+ }
+
+ ctx->index = i;
+
+ return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long i, j;
+ unsigned long v, r;
+ unsigned long __user *lbuf;
+ unsigned long *hptp;
+ unsigned long tmp[2];
+ ssize_t nb;
+ long int err, ret;
+ int rma_setup;
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+
+ /* lock out vcpus from running while we're doing this */
+ mutex_lock(&kvm->lock);
+ rma_setup = kvm->arch.rma_setup_done;
+ if (rma_setup) {
+ kvm->arch.rma_setup_done = 0; /* temporarily */
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ }
+
+ err = 0;
+ for (nb = 0; nb + sizeof(hdr) <= count; ) {
+ err = -EFAULT;
+ if (__copy_from_user(&hdr, buf, sizeof(hdr)))
+ break;
+
+ err = 0;
+ if (nb + hdr.n_valid * HPTE_SIZE > count)
+ break;
+
+ nb += sizeof(hdr);
+ buf += sizeof(hdr);
+
+ err = -EINVAL;
+ i = hdr.index;
+ if (i >= kvm->arch.hpt_npte ||
+ i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+ break;
+
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ lbuf = (unsigned long __user *)buf;
+ for (j = 0; j < hdr.n_valid; ++j) {
+ err = -EFAULT;
+ if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+ goto out;
+ err = -EINVAL;
+ if (!(v & HPTE_V_VALID))
+ goto out;
+ lbuf += 2;
+ nb += HPTE_SIZE;
+
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ err = -EIO;
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
+ tmp);
+ if (ret != H_SUCCESS) {
+ pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
+ "r=%lx\n", ret, i, v, r);
+ goto out;
+ }
+ if (!rma_setup && is_vrma_hpte(v)) {
+ unsigned long psize = hpte_page_size(v, r);
+ unsigned long senc = slb_pgsize_encoding(psize);
+ unsigned long lpcr;
+
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
+ kvm->arch.lpcr = lpcr;
+ rma_setup = 1;
+ }
+ ++i;
+ hptp += 2;
+ }
+
+ for (j = 0; j < hdr.n_invalid; ++j) {
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ ++i;
+ hptp += 2;
+ }
+ err = 0;
+ }
+
+ out:
+ /* Order HPTE updates vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = rma_setup;
+ mutex_unlock(&kvm->lock);
+
+ if (err)
+ return err;
+ return nb;
+}
+
+static int kvm_htab_release(struct inode *inode, struct file *filp)
+{
+ struct kvm_htab_ctx *ctx = filp->private_data;
+
+ filp->private_data = NULL;
+ if (!(ctx->flags & KVM_GET_HTAB_WRITE))
+ atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
+ kvm_put_kvm(ctx->kvm);
+ kfree(ctx);
+ return 0;
+}
+
+static struct file_operations kvm_htab_fops = {
+ .read = kvm_htab_read,
+ .write = kvm_htab_write,
+ .llseek = default_llseek,
+ .release = kvm_htab_release,
+};
+
+int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
+{
+ int ret;
+ struct kvm_htab_ctx *ctx;
+ int rwflag;
+
+ /* reject flags we don't recognize */
+ if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
+ return -EINVAL;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ kvm_get_kvm(kvm);
+ ctx->kvm = kvm;
+ ctx->index = ghf->start_index;
+ ctx->flags = ghf->flags;
+ ctx->first_pass = 1;
+
+ rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
+ ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+ if (ret < 0) {
+ kvm_put_kvm(kvm);
+ return ret;
+ }
+
+ if (rwflag == O_RDONLY) {
+ mutex_lock(&kvm->slots_lock);
+ atomic_inc(&kvm->arch.hpte_mod_interest);
+ /* make sure kvmppc_do_h_enter etc. see the increment */
+ synchronize_srcu_expedited(&kvm->srcu);
+ mutex_unlock(&kvm->slots_lock);
+ }
+
+ return ret;
+}
+
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9a989dc76c..d31a716f7f2 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -22,6 +22,7 @@
#include <asm/kvm_book3s.h>
#include <asm/reg.h>
#include <asm/switch_to.h>
+#include <asm/time.h>
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
@@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
(mfmsr() & MSR_HV))
vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
break;
+ case SPRN_PURR:
+ to_book3s(vcpu)->purr_offset = spr_val - get_tb();
+ break;
+ case SPRN_SPURR:
+ to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
@@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_CTRLF:
case SPRN_CTRLT:
case SPRN_L2CR:
+ case SPRN_DSCR:
case SPRN_MMCR0_GEKKO:
case SPRN_MMCR1_GEKKO:
case SPRN_PMC1_GEKKO:
@@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = to_book3s(vcpu)->hid[5];
break;
case SPRN_CFAR:
- case SPRN_PURR:
+ case SPRN_DSCR:
*spr_val = 0;
break;
+ case SPRN_PURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+ break;
+ case SPRN_SPURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index a150817d6d4..7057a02f090 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
#endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
-#endif
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 721d4603a23..71d0c90b62b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -30,6 +30,7 @@
#include <linux/cpumask.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
+#include <linux/srcu.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -46,6 +47,7 @@
#include <asm/page.h>
#include <asm/hvcall.h>
#include <asm/switch_to.h>
+#include <asm/smp.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
@@ -55,25 +57,77 @@
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
+/* Used to indicate that a guest page fault needs to be handled */
+#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
+
+/* Used as a "null" value for timebase values */
+#define TB_NIL (~(u64)0)
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+/*
+ * We use the vcpu_load/put functions to measure stolen time.
+ * Stolen time is counted as time when either the vcpu is able to
+ * run as part of a virtual core, but the task running the vcore
+ * is preempted or sleeping, or when the vcpu needs something done
+ * in the kernel by the task running the vcpu, but that task is
+ * preempted or sleeping. Those two things have to be counted
+ * separately, since one of the vcpu tasks will take on the job
+ * of running the core, and the other vcpu tasks in the vcore will
+ * sleep waiting for it to do that, but that sleep shouldn't count
+ * as stolen time.
+ *
+ * Hence we accumulate stolen time when the vcpu can run as part of
+ * a vcore using vc->stolen_tb, and the stolen time when the vcpu
+ * needs its task to do other things in the kernel (for example,
+ * service a page fault) in busy_stolen. We don't accumulate
+ * stolen time for a vcore when it is inactive, or for a vcpu
+ * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of
+ * a misnomer; it means that the vcpu task is not executing in
+ * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
+ * the kernel. We don't have any way of dividing up that time
+ * between time that the vcpu is genuinely stopped, time that
+ * the task is actively working on behalf of the vcpu, and time
+ * that the task is preempted, so we don't count any of it as
+ * stolen.
+ *
+ * Updates to busy_stolen are protected by arch.tbacct_lock;
+ * updates to vc->stolen_tb are protected by the arch.tbacct_lock
+ * of the vcpu that has taken responsibility for running the vcore
+ * (i.e. vc->runner). The stolen times are measured in units of
+ * timebase ticks. (Note that the != TB_NIL checks below are
+ * purely defensive; they should never fail.)
+ */
+
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- local_paca->kvm_hstate.kvm_vcpu = vcpu;
- local_paca->kvm_hstate.kvm_vcore = vc;
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ spin_lock(&vcpu->arch.tbacct_lock);
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
+ vc->preempt_tb != TB_NIL) {
vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
+ vcpu->arch.busy_preempt != TB_NIL) {
+ vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_preempt = TB_NIL;
+ }
+ spin_unlock(&vcpu->arch.tbacct_lock);
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ spin_lock(&vcpu->arch.tbacct_lock);
if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
vc->preempt_tb = mftb();
+ if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+ vcpu->arch.busy_preempt = mftb();
+ spin_unlock(&vcpu->arch.tbacct_lock);
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -142,6 +196,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
vpa->yield_count = 1;
}
+static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
+ unsigned long addr, unsigned long len)
+{
+ /* check address is cacheline aligned */
+ if (addr & (L1_CACHE_BYTES - 1))
+ return -EINVAL;
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (v->next_gpa != addr || v->len != len) {
+ v->next_gpa = addr;
+ v->len = addr ? len : 0;
+ v->update_pending = 1;
+ }
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ return 0;
+}
+
/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
struct reg_vpa {
u32 dummy;
@@ -317,10 +387,16 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
{
+ if (!(vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending))
+ return;
+
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
- init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ if (vcpu->arch.vpa.pinned_addr)
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
}
if (vcpu->arch.dtl.update_pending) {
kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
@@ -332,24 +408,61 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->arch.vpa_update_lock);
}
+/*
+ * Return the accumulated stolen time for the vcore up until `now'.
+ * The caller should hold the vcore lock.
+ */
+static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
+{
+ u64 p;
+
+ /*
+ * If we are the task running the vcore, then since we hold
+ * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
+ * can't be updated, so we don't need the tbacct_lock.
+ * If the vcore is inactive, it can't become active (since we
+ * hold the vcore lock), so the vcpu load/put functions won't
+ * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
+ */
+ if (vc->vcore_state != VCORE_INACTIVE &&
+ vc->runner->arch.run_task != current) {
+ spin_lock(&vc->runner->arch.tbacct_lock);
+ p = vc->stolen_tb;
+ if (vc->preempt_tb != TB_NIL)
+ p += now - vc->preempt_tb;
+ spin_unlock(&vc->runner->arch.tbacct_lock);
+ } else {
+ p = vc->stolen_tb;
+ }
+ return p;
+}
+
static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
struct kvmppc_vcore *vc)
{
struct dtl_entry *dt;
struct lppaca *vpa;
- unsigned long old_stolen;
+ unsigned long stolen;
+ unsigned long core_stolen;
+ u64 now;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
- old_stolen = vcpu->arch.stolen_logged;
- vcpu->arch.stolen_logged = vc->stolen_tb;
+ now = mftb();
+ core_stolen = vcore_stolen_time(vc, now);
+ stolen = core_stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = core_stolen;
+ spin_lock(&vcpu->arch.tbacct_lock);
+ stolen += vcpu->arch.busy_stolen;
+ vcpu->arch.busy_stolen = 0;
+ spin_unlock(&vcpu->arch.tbacct_lock);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
dt->dispatch_reason = 7;
dt->processor_id = vc->pcpu + vcpu->arch.ptid;
- dt->timebase = mftb();
- dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+ dt->timebase = now;
+ dt->enqueue_to_dispatch_time = stolen;
dt->srr0 = kvmppc_get_pc(vcpu);
dt->srr1 = vcpu->arch.shregs.msr;
++dt;
@@ -366,13 +479,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
struct kvm_vcpu *tvcpu;
+ int idx;
switch (req) {
case H_ENTER:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case H_CEDE:
break;
@@ -429,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_PERFMON:
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ /*
+ * Deliver a machine check interrupt to the guest.
+ * We have to do this, even if the host has handled the
+ * machine check, because machine checks use SRR0/1 and
+ * the interrupt might have trashed guest state in them.
+ */
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_MACHINE_CHECK);
+ r = RESUME_GUEST;
+ break;
case BOOK3S_INTERRUPT_PROGRAM:
{
ulong flags;
@@ -470,12 +597,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* have been handled already.
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = kvmppc_book3s_hv_page_fault(run, vcpu,
- vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_PAGE_FAULT;
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
- r = kvmppc_book3s_hv_page_fault(run, vcpu,
- kvmppc_get_pc(vcpu), 0);
+ vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+ vcpu->arch.fault_dsisr = 0;
+ r = RESUME_PAGE_FAULT;
break;
/*
* This occurs if the guest executes an illegal instruction.
@@ -535,36 +662,174 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
+ long int i;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = put_user(0, (u64 __user *)reg->addr);
+ *val = get_reg_val(id, 0);
+ break;
+ case KVM_REG_PPC_DABR:
+ *val = get_reg_val(id, vcpu->arch.dabr);
+ break;
+ case KVM_REG_PPC_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr);
+ break;
+ case KVM_REG_PPC_PURR:
+ *val = get_reg_val(id, vcpu->arch.purr);
+ break;
+ case KVM_REG_PPC_SPURR:
+ *val = get_reg_val(id, vcpu->arch.spurr);
+ break;
+ case KVM_REG_PPC_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ *val = get_reg_val(id, vcpu->arch.uamor);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ i = id - KVM_REG_PPC_MMCR0;
+ *val = get_reg_val(id, vcpu->arch.mmcr[i]);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ *val = get_reg_val(id, vcpu->arch.pmc[i]);
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ /* VSX => FP reg i is stored in arch.vsr[2*i] */
+ long int i = id - KVM_REG_PPC_FPR0;
+ *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
+ } else {
+ /* let generic code handle it */
+ r = -EINVAL;
+ }
+ break;
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = id - KVM_REG_PPC_VSR0;
+ val->vsxval[0] = vcpu->arch.vsr[2 * i];
+ val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+ case KVM_REG_PPC_VPA_ADDR:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
+ val->vpaval.length = vcpu->arch.slb_shadow.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.dtl.next_gpa;
+ val->vpaval.length = vcpu->arch.dtl.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
break;
default:
+ r = -EINVAL;
break;
}
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
+ long int i;
+ unsigned long addr, len;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- {
- u64 hior;
/* Only allow this to be set to zero */
- r = get_user(hior, (u64 __user *)reg->addr);
- if (!r && (hior != 0))
+ if (set_reg_val(id, *val))
r = -EINVAL;
break;
- }
+ case KVM_REG_PPC_DABR:
+ vcpu->arch.dabr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DSCR:
+ vcpu->arch.dscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PURR:
+ vcpu->arch.purr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SPURR:
+ vcpu->arch.spurr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_AMR:
+ vcpu->arch.amr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ vcpu->arch.uamor = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ i = id - KVM_REG_PPC_MMCR0;
+ vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ vcpu->arch.pmc[i] = set_reg_val(id, *val);
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ /* VSX => FP reg i is stored in arch.vsr[2*i] */
+ long int i = id - KVM_REG_PPC_FPR0;
+ vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
+ } else {
+ /* let generic code handle it */
+ r = -EINVAL;
+ }
+ break;
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = id - KVM_REG_PPC_VSR0;
+ vcpu->arch.vsr[2 * i] = val->vsxval[0];
+ vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+ case KVM_REG_PPC_VPA_ADDR:
+ addr = set_reg_val(id, *val);
+ r = -EINVAL;
+ if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
+ vcpu->arch.dtl.next_gpa))
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (addr && !vcpu->arch.vpa.next_gpa)
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (addr && (len < sizeof(struct dtl_entry) ||
+ !vcpu->arch.vpa.next_gpa))
+ break;
+ len -= len % sizeof(struct dtl_entry);
+ r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
+ break;
default:
+ r = -EINVAL;
break;
}
@@ -599,20 +864,18 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vcpu->arch.shared = &vcpu->arch.shregs;
- vcpu->arch.last_cpu = -1;
vcpu->arch.mmcr[0] = MMCR0_FC;
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
vcpu->arch.pvr = mfspr(SPRN_PVR);
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
spin_lock_init(&vcpu->arch.vpa_update_lock);
+ spin_lock_init(&vcpu->arch.tbacct_lock);
+ vcpu->arch.busy_preempt = TB_NIL;
kvmppc_mmu_book3s_hv_init(vcpu);
- /*
- * We consider the vcpu stopped until we see the first run ioctl for it.
- */
- vcpu->arch.state = KVMPPC_VCPU_STOPPED;
+ vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
init_waitqueue_head(&vcpu->arch.cpu_run);
@@ -624,9 +887,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
init_waitqueue_head(&vcore->wq);
- vcore->preempt_tb = mftb();
+ vcore->preempt_tb = TB_NIL;
}
kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
}
mutex_unlock(&kvm->lock);
@@ -637,7 +901,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
- vcpu->arch.stolen_logged = vcore->stolen_tb;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -697,17 +960,18 @@ extern void xics_wake_cpu(int cpu);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *v;
+ u64 now;
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
+ spin_lock(&vcpu->arch.tbacct_lock);
+ now = mftb();
+ vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
+ vcpu->arch.stolen_logged;
+ vcpu->arch.busy_preempt = now;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ spin_unlock(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
- ++vc->n_busy;
- /* decrement the physical thread id of each following vcpu */
- v = vcpu;
- list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
- --v->arch.ptid;
list_del(&vcpu->arch.run_list);
}
@@ -720,6 +984,7 @@ static int kvmppc_grab_hwthread(int cpu)
/* Ensure the thread won't go into the kernel if it wakes */
tpaca->kvm_hstate.hwthread_req = 1;
+ tpaca->kvm_hstate.kvm_vcpu = NULL;
/*
* If the thread is already executing in the kernel (e.g. handling
@@ -769,7 +1034,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
smp_wmb();
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
if (vcpu->arch.ptid) {
- kvmppc_grab_hwthread(cpu);
xics_wake_cpu(cpu);
++vc->n_woken;
}
@@ -795,7 +1059,8 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
/*
* Check that we are on thread 0 and that any other threads in
- * this core are off-line.
+ * this core are off-line. Then grab the threads so they can't
+ * enter the kernel.
*/
static int on_primary_thread(void)
{
@@ -807,6 +1072,17 @@ static int on_primary_thread(void)
while (++thr < threads_per_core)
if (cpu_online(cpu + thr))
return 0;
+
+ /* Grab all hw threads so they can't go into the kernel */
+ for (thr = 1; thr < threads_per_core; ++thr) {
+ if (kvmppc_grab_hwthread(cpu + thr)) {
+ /* Couldn't grab one; let the others go */
+ do {
+ kvmppc_release_hwthread(cpu + thr);
+ } while (--thr > 0);
+ return 0;
+ }
+ }
return 1;
}
@@ -814,21 +1090,24 @@ static int on_primary_thread(void)
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
*/
-static int kvmppc_run_core(struct kvmppc_vcore *vc)
+static void kvmppc_run_core(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vcpu0, *vnext;
long ret;
u64 now;
int ptid, i, need_vpa_update;
+ int srcu_idx;
+ struct kvm_vcpu *vcpus_to_update[threads_per_core];
/* don't start if any threads have a signal pending */
need_vpa_update = 0;
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
if (signal_pending(vcpu->arch.run_task))
- return 0;
- need_vpa_update |= vcpu->arch.vpa.update_pending |
- vcpu->arch.slb_shadow.update_pending |
- vcpu->arch.dtl.update_pending;
+ return;
+ if (vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending)
+ vcpus_to_update[need_vpa_update++] = vcpu;
}
/*
@@ -838,7 +1117,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
vc->n_woken = 0;
vc->nap_count = 0;
vc->entry_exit_count = 0;
- vc->vcore_state = VCORE_RUNNING;
+ vc->vcore_state = VCORE_STARTING;
vc->in_guest = 0;
vc->napping_threads = 0;
@@ -848,24 +1127,12 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if (need_vpa_update) {
spin_unlock(&vc->lock);
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- kvmppc_update_vpas(vcpu);
+ for (i = 0; i < need_vpa_update; ++i)
+ kvmppc_update_vpas(vcpus_to_update[i]);
spin_lock(&vc->lock);
}
/*
- * Make sure we are running on thread 0, and that
- * secondary threads are offline.
- * XXX we should also block attempts to bring any
- * secondary threads online.
- */
- if (threads_per_core > 1 && !on_primary_thread()) {
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- vcpu->arch.ret = -EBUSY;
- goto out;
- }
-
- /*
* Assign physical thread IDs, first to non-ceded vcpus
* and then to ceded ones.
*/
@@ -879,28 +1146,36 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
if (!vcpu0)
- return 0; /* nothing to run */
+ goto out; /* nothing to run; should never happen */
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
if (vcpu->arch.ceded)
vcpu->arch.ptid = ptid++;
- vc->stolen_tb += mftb() - vc->preempt_tb;
+ /*
+ * Make sure we are running on thread 0, and that
+ * secondary threads are offline.
+ */
+ if (threads_per_core > 1 && !on_primary_thread()) {
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ vcpu->arch.ret = -EBUSY;
+ goto out;
+ }
+
vc->pcpu = smp_processor_id();
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
kvmppc_create_dtl_entry(vcpu, vc);
}
- /* Grab any remaining hw threads so they can't go into the kernel */
- for (i = ptid; i < threads_per_core; ++i)
- kvmppc_grab_hwthread(vc->pcpu + i);
+ vc->vcore_state = VCORE_RUNNING;
preempt_disable();
spin_unlock(&vc->lock);
kvm_guest_enter();
+
+ srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
+
__kvmppc_vcore_entry(NULL, vcpu0);
- for (i = 0; i < threads_per_core; ++i)
- kvmppc_release_hwthread(vc->pcpu + i);
spin_lock(&vc->lock);
/* disable sending of IPIs on virtual external irqs */
@@ -909,10 +1184,14 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
/* wait for secondary threads to finish writing their state to memory */
if (vc->nap_count < vc->n_woken)
kvmppc_wait_for_nap(vc);
+ for (i = 0; i < threads_per_core; ++i)
+ kvmppc_release_hwthread(vc->pcpu + i);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
spin_unlock(&vc->lock);
+ srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
+
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
kvm_guest_exit();
@@ -920,6 +1199,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
preempt_enable();
kvm_resched(vcpu);
+ spin_lock(&vc->lock);
now = get_tb();
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
/* cancel pending dec exception if dec is positive */
@@ -943,10 +1223,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
- spin_lock(&vc->lock);
out:
vc->vcore_state = VCORE_INACTIVE;
- vc->preempt_tb = mftb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
if (vcpu->arch.ret != RESUME_GUEST) {
@@ -954,8 +1232,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
wake_up(&vcpu->arch.cpu_run);
}
}
-
- return 1;
}
/*
@@ -979,20 +1255,11 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
DEFINE_WAIT(wait);
- struct kvm_vcpu *v;
- int all_idle = 1;
prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
vc->vcore_state = VCORE_SLEEPING;
spin_unlock(&vc->lock);
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
- if (!v->arch.ceded || v->arch.pending_exceptions) {
- all_idle = 0;
- break;
- }
- }
- if (all_idle)
- schedule();
+ schedule();
finish_wait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
@@ -1001,13 +1268,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int n_ceded;
- int prev_state;
struct kvmppc_vcore *vc;
struct kvm_vcpu *v, *vn;
kvm_run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
+ kvmppc_update_vpas(vcpu);
/*
* Synchronize with other threads in this virtual core
@@ -1017,8 +1284,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
vcpu->arch.kvm_run = kvm_run;
- prev_state = vcpu->arch.state;
+ vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+ vcpu->arch.busy_preempt = TB_NIL;
list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
++vc->n_runnable;
@@ -1027,33 +1295,26 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* If the vcore is already running, we may be able to start
* this thread straight away and have it join in.
*/
- if (prev_state == KVMPPC_VCPU_STOPPED) {
+ if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_RUNNING &&
VCORE_EXIT_COUNT(vc) == 0) {
vcpu->arch.ptid = vc->n_runnable - 1;
+ kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu);
+ } else if (vc->vcore_state == VCORE_SLEEPING) {
+ wake_up(&vc->wq);
}
- } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
- --vc->n_busy;
+ }
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
!signal_pending(current)) {
- if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+ if (vc->vcore_state != VCORE_INACTIVE) {
spin_unlock(&vc->lock);
kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
spin_lock(&vc->lock);
continue;
}
- vc->runner = vcpu;
- n_ceded = 0;
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
- n_ceded += v->arch.ceded;
- if (n_ceded == vc->n_runnable)
- kvmppc_vcore_blocked(vc);
- else
- kvmppc_run_core(vc);
-
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
arch.run_list) {
kvmppc_core_prepare_to_enter(v);
@@ -1065,22 +1326,40 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
wake_up(&v->arch.cpu_run);
}
}
+ if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+ break;
+ vc->runner = vcpu;
+ n_ceded = 0;
+ list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+ if (!v->arch.pending_exceptions)
+ n_ceded += v->arch.ceded;
+ if (n_ceded == vc->n_runnable)
+ kvmppc_vcore_blocked(vc);
+ else
+ kvmppc_run_core(vc);
vc->runner = NULL;
}
- if (signal_pending(current)) {
- if (vc->vcore_state == VCORE_RUNNING ||
- vc->vcore_state == VCORE_EXITING) {
- spin_unlock(&vc->lock);
- kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
- spin_lock(&vc->lock);
- }
- if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
- kvmppc_remove_runnable(vc, vcpu);
- vcpu->stat.signal_exits++;
- kvm_run->exit_reason = KVM_EXIT_INTR;
- vcpu->arch.ret = -EINTR;
- }
+ while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ (vc->vcore_state == VCORE_RUNNING ||
+ vc->vcore_state == VCORE_EXITING)) {
+ spin_unlock(&vc->lock);
+ kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+ spin_lock(&vc->lock);
+ }
+
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ kvmppc_remove_runnable(vc, vcpu);
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ }
+
+ if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
+ /* Wake up some vcpu to run the core */
+ v = list_first_entry(&vc->runnable_threads,
+ struct kvm_vcpu, arch.run_list);
+ wake_up(&v->arch.cpu_run);
}
spin_unlock(&vc->lock);
@@ -1090,6 +1369,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
+ int srcu_idx;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1120,6 +1400,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_vsx_to_thread(current);
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
r = kvmppc_run_vcpu(run, vcpu);
@@ -1128,10 +1409,16 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
!(vcpu->arch.shregs.msr & MSR_PR)) {
r = kvmppc_pseries_do_hcall(vcpu);
kvmppc_core_prepare_to_enter(vcpu);
+ } else if (r == RESUME_PAGE_FAULT) {
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
}
} while (r == RESUME_GUEST);
out:
+ vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
return r;
}
@@ -1273,7 +1560,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
- r = kvmppc_hv_get_dirty_log(kvm, memslot);
+ r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
if (r)
goto out;
@@ -1287,67 +1574,88 @@ out:
return r;
}
-static unsigned long slb_pgsize_encoding(unsigned long psize)
+static void unpin_slot(struct kvm_memory_slot *memslot)
{
- unsigned long senc = 0;
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
+ struct page *page;
- if (psize > 0x1000) {
- senc = SLB_VSID_L;
- if (psize == 0x10000)
- senc |= SLB_VSID_LP_01;
+ physp = memslot->arch.slot_phys;
+ npages = memslot->npages;
+ if (!physp)
+ return;
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ SetPageDirty(page);
+ put_page(page);
+ }
+}
+
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ if (!dont || free->arch.rmap != dont->arch.rmap) {
+ vfree(free->arch.rmap);
+ free->arch.rmap = NULL;
+ }
+ if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
+ unpin_slot(free);
+ vfree(free->arch.slot_phys);
+ free->arch.slot_phys = NULL;
}
- return senc;
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ slot->arch.slot_phys = NULL;
+
+ return 0;
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
{
- unsigned long npages;
unsigned long *phys;
- /* Allocate a slot_phys array */
- phys = kvm->arch.slot_phys[mem->slot];
- if (!kvm->arch.using_mmu_notifiers && !phys) {
- npages = mem->memory_size >> PAGE_SHIFT;
- phys = vzalloc(npages * sizeof(unsigned long));
+ /* Allocate a slot_phys array if needed */
+ phys = memslot->arch.slot_phys;
+ if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
+ phys = vzalloc(memslot->npages * sizeof(unsigned long));
if (!phys)
return -ENOMEM;
- kvm->arch.slot_phys[mem->slot] = phys;
- kvm->arch.slot_npages[mem->slot] = npages;
+ memslot->arch.slot_phys = phys;
}
return 0;
}
-static void unpin_slot(struct kvm *kvm, int slot_id)
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
{
- unsigned long *physp;
- unsigned long j, npages, pfn;
- struct page *page;
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
- physp = kvm->arch.slot_phys[slot_id];
- npages = kvm->arch.slot_npages[slot_id];
- if (physp) {
- spin_lock(&kvm->arch.slot_phys_lock);
- for (j = 0; j < npages; j++) {
- if (!(physp[j] & KVMPPC_GOT_PAGE))
- continue;
- pfn = physp[j] >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
- SetPageDirty(page);
- put_page(page);
- }
- kvm->arch.slot_phys[slot_id] = NULL;
- spin_unlock(&kvm->arch.slot_phys_lock);
- vfree(physp);
+ if (npages && old.npages) {
+ /*
+ * If modifying a memslot, reset all the rmap dirty bits.
+ * If this is a new memslot, we don't need to do anything
+ * since the rmap array starts out as all zeroes,
+ * i.e. no pages are dirty.
+ */
+ memslot = id_to_memslot(kvm->memslots, mem->slot);
+ kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
}
}
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
-{
-}
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
@@ -1362,6 +1670,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
unsigned long rmls;
unsigned long *physp;
unsigned long i, npages;
+ int srcu_idx;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@@ -1377,12 +1686,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
}
/* Look up the memslot for guest physical address 0 */
+ srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, 0);
/* We must have some memory at 0 by now */
err = -EINVAL;
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- goto out;
+ goto out_srcu;
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
@@ -1406,14 +1716,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
err = -EPERM;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
pr_err("KVM: CPU requires an RMO\n");
- goto out;
+ goto out_srcu;
}
/* We can handle 4k, 64k or 16M pages in the VRMA */
err = -EINVAL;
if (!(psize == 0x1000 || psize == 0x10000 ||
psize == 0x1000000))
- goto out;
+ goto out_srcu;
/* Update VRMASD field in the LPCR */
senc = slb_pgsize_encoding(psize);
@@ -1436,7 +1746,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
err = -EINVAL;
if (rmls < 0) {
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
- goto out;
+ goto out_srcu;
}
atomic_inc(&ri->use_count);
kvm->arch.rma = ri;
@@ -1465,17 +1775,24 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Initialize phys addrs of pages in RMO */
npages = ri->npages;
porder = __ilog2(npages);
- physp = kvm->arch.slot_phys[memslot->id];
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i)
- physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
- spin_unlock(&kvm->arch.slot_phys_lock);
+ physp = memslot->arch.slot_phys;
+ if (physp) {
+ if (npages > memslot->npages)
+ npages = memslot->npages;
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i)
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
+ porder;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ }
}
/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
smp_wmb();
kvm->arch.rma_setup_done = 1;
err = 0;
+ out_srcu:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
out:
mutex_unlock(&kvm->lock);
return err;
@@ -1496,6 +1813,13 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
+ /*
+ * Since we don't flush the TLB when tearing down a VM,
+ * and this lpid might have previously been used,
+ * make sure we flush on each core before running the new VM.
+ */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
kvm->arch.rma = NULL;
@@ -1523,16 +1847,19 @@ int kvmppc_core_init_vm(struct kvm *kvm)
kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
spin_lock_init(&kvm->arch.slot_phys_lock);
+
+ /*
+ * Don't allow secondary CPU threads to come online
+ * while any KVM VMs exist.
+ */
+ inhibit_secondary_onlining();
+
return 0;
}
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
- unsigned long i;
-
- if (!kvm->arch.using_mmu_notifiers)
- for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
- unpin_slot(kvm, i);
+ uninhibit_secondary_onlining();
if (kvm->arch.rma) {
kvm_release_rma(kvm->arch.rma);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fb4eac290fe..ec0a9e5de10 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
for (i = 0; i < count; ++i) {
linear = alloc_bootmem_align(size, size);
- pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
- size >> 20);
+ pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
+ size >> 20);
linear_info[i].base_virt = linear;
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
linear_info[i].npages = npages;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 00000000000..35f3cf0269b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,144 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <asm/opal.h>
+
+/* SRR1 bits for machine check on POWER7 */
+#define SRR1_MC_LDSTERR (1ul << (63-42))
+#define SRR1_MC_IFETCH_SH (63-45)
+#define SRR1_MC_IFETCH_MASK 0x7
+#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
+#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
+#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
+#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
+
+/* DSISR bits for machine check on POWER7 */
+#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
+#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
+#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
+#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
+#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
+
+/* POWER7 SLB flush and reload */
+static void reload_slb(struct kvm_vcpu *vcpu)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* First clear out SLB */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+ /* Do they have an SLB shadow buffer registered? */
+ slb = vcpu->arch.slb_shadow.pinned_addr;
+ if (!slb)
+ return;
+
+ /* Sanity check */
+ n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+ if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
+ return;
+
+ /* Load up the SLB from that */
+ for (i = 0; i < n; ++i) {
+ unsigned long rb = slb->save_area[i].esid;
+ unsigned long rs = slb->save_area[i].vsid;
+
+ rb = (rb & ~0xFFFul) | i; /* insert entry number */
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+/* POWER7 TLB flush */
+static void flush_tlb_power7(struct kvm_vcpu *vcpu)
+{
+ unsigned long i, rb;
+
+ rb = TLBIEL_INVAL_SET_LPID;
+ for (i = 0; i < POWER7_TLB_SETS; ++i) {
+ asm volatile("tlbiel %0" : : "r" (rb));
+ rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+ }
+}
+
+/*
+ * On POWER7, see if we can handle a machine check that occurred inside
+ * the guest in real mode, without switching to the host partition.
+ *
+ * Returns: 0 => exit guest, 1 => deliver machine check to guest
+ */
+static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+{
+ unsigned long srr1 = vcpu->arch.shregs.msr;
+ struct opal_machine_check_event *opal_evt;
+ long handled = 1;
+
+ if (srr1 & SRR1_MC_LDSTERR) {
+ /* error on load/store */
+ unsigned long dsisr = vcpu->arch.shregs.dsisr;
+
+ if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
+ /* flush and reload SLB; flushes D-ERAT too */
+ reload_slb(vcpu);
+ dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
+ }
+ if (dsisr & DSISR_MC_TLB_MULTI) {
+ flush_tlb_power7(vcpu);
+ dsisr &= ~DSISR_MC_TLB_MULTI;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+ }
+
+ switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
+ case 0:
+ break;
+ case SRR1_MC_IFETCH_SLBPAR:
+ case SRR1_MC_IFETCH_SLBMULTI:
+ case SRR1_MC_IFETCH_SLBPARMULTI:
+ reload_slb(vcpu);
+ break;
+ case SRR1_MC_IFETCH_TLBMULTI:
+ flush_tlb_power7(vcpu);
+ break;
+ default:
+ handled = 0;
+ }
+
+ /*
+ * See if OPAL has already handled the condition.
+ * We assume that if the condition is recovered then OPAL
+ * will have generated an error log event that we will pick
+ * up and log later.
+ */
+ opal_evt = local_paca->opal_mc_evt;
+ if (opal_evt->version == OpalMCE_V1 &&
+ (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
+ opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+ handled = 1;
+
+ if (handled)
+ opal_evt->in_use = 0;
+
+ return handled;
+}
+
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return kvmppc_realmode_mc_power7(vcpu);
+
+ return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fb0e821622d..19c93bae1ae 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
return __va(addr);
}
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm, unsigned long flags)
+{
+ int global;
+
+ /*
+ * If there is only one vcore, and it's currently running,
+ * we can use tlbiel as long as we mark all other physical
+ * cores as potentially having stale TLB entries for this lpid.
+ * If we're not using MMU notifiers, we never take pages away
+ * from the guest, so we can use tlbiel if requested.
+ * Otherwise, don't use tlbiel.
+ */
+ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+ global = 0;
+ else if (kvm->arch.using_mmu_notifiers)
+ global = 1;
+ else
+ global = !(flags & H_LOCAL);
+
+ if (!global) {
+ /* any other core might now have stale TLB entries... */
+ smp_wmb();
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+ cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
+ &kvm->arch.need_tlb_flush);
+ }
+
+ return global;
+}
+
/*
* Add this HPTE into the chain for the real page.
* Must be called with the chain locked; it unlocks the chain.
@@ -59,13 +90,24 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
head->back = pte_index;
} else {
rev->forw = rev->back = pte_index;
- i = pte_index;
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+ pte_index | KVMPPC_RMAP_PRESENT;
}
- smp_wmb();
- *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+ unlock_rmap(rmap);
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+ struct revmap_entry *rev)
+{
+ if (atomic_read(&kvm->arch.hpte_mod_interest))
+ rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
@@ -81,7 +123,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ if (!memslot)
return;
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
@@ -103,14 +145,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unlock_rmap(rmap);
}
-static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
if (!ptep)
return __pte(0);
if (shift)
@@ -130,15 +172,15 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
hpte[0] = hpte_v;
}
-long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i, pa, gpa, gfn, psize;
unsigned long slot_fn, hva;
unsigned long *hpte;
struct revmap_entry *rev;
- unsigned long g_ptel = ptel;
+ unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned long *physp, pte_size;
unsigned long is_io;
@@ -147,13 +189,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
- bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel);
if (!psize)
return H_PARAMETER;
writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+ ptel &= ~HPTE_GR_RESERVED;
+ g_ptel = ptel;
/* used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
@@ -183,7 +226,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
rmap = &memslot->arch.rmap[slot_fn];
if (!kvm->arch.using_mmu_notifiers) {
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
return H_PARAMETER;
physp += slot_fn;
@@ -201,7 +244,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
if (pte_present(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
@@ -210,6 +253,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = pte_pfn(pte) << PAGE_SHIFT;
}
}
+
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
@@ -287,8 +331,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
rev = &kvm->arch.revmap[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
- if (rev)
+ if (rev) {
rev->guest_rpte = g_ptel;
+ note_hpte_modification(kvm, rev);
+ }
/* Link HPTE into reverse-map chain */
if (pteh & HPTE_V_VALID) {
@@ -297,7 +343,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(vcpu, mmu_seq)) {
+ mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
@@ -318,10 +364,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
hpte[0] = pteh;
asm volatile("ptesync" : : : "memory");
- vcpu->arch.gpr[4] = pte_index;
+ *pte_idx_ret = pte_index;
return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+ vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -343,11 +396,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
-long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
- unsigned long pte_index, unsigned long avpn,
- unsigned long va)
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
@@ -369,7 +421,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index);
- if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ if (global_invalidates(kvm, flags)) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
@@ -385,13 +437,22 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
- r = rev->guest_rpte;
+ r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+ note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
- vcpu->arch.gpr[4] = v;
- vcpu->arch.gpr[5] = r;
+ hpret[0] = v;
+ hpret[1] = r;
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn)
+{
+ return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+ &vcpu->arch.gpr[4]);
+}
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
@@ -459,6 +520,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
args[j] = ((0x80 | flags) << 56) + pte_index;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ note_hpte_modification(kvm, rev);
if (!(hp[0] & HPTE_V_VALID)) {
/* insert R and C bits from PTE */
@@ -534,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_NOT_FOUND;
}
- if (atomic_read(&kvm->online_vcpus) == 1)
- flags |= H_LOCAL;
v = hpte[0];
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -548,6 +608,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (rev) {
r = (rev->guest_rpte & ~mask) | bits;
rev->guest_rpte = r;
+ note_hpte_modification(kvm, rev);
}
r = (hpte[1] & ~mask) | bits;
@@ -555,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID;
- if (!(flags & H_LOCAL)) {
+ if (global_invalidates(kvm, flags)) {
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
@@ -568,6 +629,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
+ /*
+ * If the host has this page as readonly but the guest
+ * wants to make it read/write, reduce the permissions.
+ * Checking the host permissions involves finding the
+ * memslot and then the Linux PTE for the page.
+ */
+ if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
+ unsigned long psize, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ pgd_t *pgdir = vcpu->arch.pgdir;
+ pte_t pte;
+
+ psize = hpte_page_size(v, r);
+ gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (memslot) {
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+ pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
+ }
}
hpte[1] = r;
eieio();
@@ -599,8 +682,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
}
- if (v & HPTE_V_VALID)
+ if (v & HPTE_V_VALID) {
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ r &= ~HPTE_GR_RESERVED;
+ }
vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 74a24bbb963..10b6c358dd7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,6 +27,7 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu-hash64.h>
/*****************************************************************************
* *
@@ -134,8 +135,11 @@ kvm_start_guest:
27: /* XXX should handle hypervisor maintenance interrupts etc. here */
+ /* reload vcpu pointer after clearing the IPI */
+ ld r4,HSTATE_KVM_VCPU(r13)
+ cmpdi r4,0
/* if we have no vcpu to run, go back to sleep */
- beq cr1,kvm_no_guest
+ beq kvm_no_guest
/* were we napping due to cede? */
lbz r0,HSTATE_NAPPING(r13)
@@ -310,7 +314,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_SDR1,r6 /* switch to partition page table */
mtspr SPRN_LPID,r7
isync
+
+ /* See if we need to flush the TLB */
+ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
+ clrldi r7,r6,64-6 /* extract bit number (6 bits) */
+ srdi r6,r6,6 /* doubleword number */
+ sldi r6,r6,3 /* address offset */
+ add r6,r6,r9
+ addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
li r0,1
+ sld r0,r0,r7
+ ld r7,0(r6)
+ and. r7,r7,r0
+ beq 22f
+23: ldarx r7,0,r6 /* if set, clear the bit */
+ andc r7,r7,r0
+ stdcx. r7,0,r6
+ bne 23b
+ li r6,128 /* and flush the TLB */
+ mtctr r6
+ li r7,0x800 /* IS field = 0b10 */
+ ptesync
+28: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 28b
+ ptesync
+
+22: li r0,1
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
b 10f
@@ -333,36 +363,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mr r9,r4
blt hdec_soon
- /*
- * Invalidate the TLB if we could possibly have stale TLB
- * entries for this partition on this core due to the use
- * of tlbiel.
- * XXX maybe only need this on primary thread?
- */
- ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
- lwz r5,VCPU_VCPUID(r4)
- lhz r6,PACAPACAINDEX(r13)
- rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
- lhz r8,VCPU_LAST_CPU(r4)
- sldi r7,r6,1 /* see if this is the same vcpu */
- add r7,r7,r9 /* as last ran on this pcpu */
- lhz r0,KVM_LAST_VCPU(r7)
- cmpw r6,r8 /* on the same cpu core as last time? */
- bne 3f
- cmpw r0,r5 /* same vcpu as this core last ran? */
- beq 1f
-3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
- sth r5,KVM_LAST_VCPU(r7)
- li r6,128
- mtctr r6
- li r7,0x800 /* IS field = 0b10 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1:
-
/* Save purr/spurr */
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
@@ -679,8 +679,7 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-nohpte_cont:
-hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
mfspr r5,SPRN_DEC
mftb r6
@@ -701,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r6, VCPU_FAULT_DAR(r9)
stw r7, VCPU_FAULT_DSISR(r9)
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+
/* Save guest CTRL register, set runlatch to 1 */
6: mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
@@ -1113,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
- * We do that by jumping to the interrupt vector address
- * which we have in r12. The [h]rfid at the end of the
+ * We do that by jumping to absolute address 0x500 for
+ * external interrupts, or the machine_check_fwnmi label
+ * for machine checks (since firmware might have patched
+ * the vector area at 0x200). The [h]rfid at the end of the
* handler will return to the book3s_hv_interrupts.S code.
* For other interrupts we do the rfid to get back
- * to the book3s_interrupts.S code here.
+ * to the book3s_hv_interrupts.S code here.
*/
ld r8, HSTATE_VMHANDLER(r13)
ld r7, HSTATE_HOST_MSR(r13)
+ cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
beq 11f
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* RFI into the highmem handler, or branch to interrupt handler */
-12: mfmsr r6
- mtctr r12
+ mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beqctr
+ beqa 0x500 /* external interrupt (PPC970) */
+ beq cr1, 13f /* machine check */
RFI
-11:
-BEGIN_FTR_SECTION
- b 12b
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_HSRR0, r8
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+11: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0x500
+13: b machine_check_fwnmi
+
/*
* Check whether an HDSI is an HPTE not found fault or something else.
* If it is an HPTE not found fault that is due to the guest accessing
@@ -1177,7 +1184,7 @@ kvmppc_hdsi:
cmpdi r3, 0 /* retry the instruction */
beq 6f
cmpdi r3, -1 /* handle in kernel mode */
- beq nohpte_cont
+ beq guest_exit_cont
cmpdi r3, -2 /* MMIO emulation; need instr word */
beq 2f
@@ -1191,6 +1198,7 @@ kvmppc_hdsi:
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
+fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
lwz r8, VCPU_XER(r9)
mtctr r7
@@ -1223,7 +1231,7 @@ kvmppc_hdsi:
/* Unset guest mode. */
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- b nohpte_cont
+ b guest_exit_cont
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
@@ -1249,9 +1257,9 @@ kvmppc_hisi:
ld r11, VCPU_MSR(r9)
li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
cmpdi r3, 0 /* retry the instruction */
- beq 6f
+ beq fast_interrupt_c_return
cmpdi r3, -1 /* handle in kernel mode */
- beq nohpte_cont
+ beq guest_exit_cont
/* Synthesize an ISI for the guest */
mr r11, r3
@@ -1260,12 +1268,7 @@ kvmppc_hisi:
li r10, BOOK3S_INTERRUPT_INST_STORAGE
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
-6: ld r7, VCPU_CTR(r9)
- lwz r8, VCPU_XER(r9)
- mtctr r7
- mtxer r8
- mr r4, r9
- b fast_guest_return
+ b fast_interrupt_c_return
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
ld r5, KVM_VRMA_SLB_V(r6)
@@ -1281,14 +1284,14 @@ kvmppc_hisi:
hcall_try_real_mode:
ld r3,VCPU_GPR(R3)(r9)
andi. r0,r11,MSR_PR
- bne hcall_real_cont
+ bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
- bge hcall_real_cont
+ bge guest_exit_cont
LOAD_REG_ADDR(r4, hcall_real_table)
lwzx r3,r3,r4
cmpwi r3,0
- beq hcall_real_cont
+ beq guest_exit_cont
add r3,r3,r4
mtctr r3
mr r3,r9 /* get vcpu pointer */
@@ -1309,7 +1312,7 @@ hcall_real_fallback:
li r12,BOOK3S_INTERRUPT_SYSCALL
ld r9, HSTATE_KVM_VCPU(r13)
- b hcall_real_cont
+ b guest_exit_cont
.globl hcall_real_table
hcall_real_table:
@@ -1568,6 +1571,21 @@ kvm_cede_exit:
li r3,H_TOO_HARD
blr
+ /* Try to handle a machine check in real mode */
+machine_check_realmode:
+ mr r3, r9 /* get vcpu pointer */
+ bl .kvmppc_realmode_machine_check
+ nop
+ cmpdi r3, 0 /* continue exiting from guest? */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq mc_cont
+ /* If not, deliver a machine check. SRR0/1 are already set */
+ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+ b fast_interrupt_c_return
+
secondary_too_late:
ld r5,HSTATE_KVM_VCORE(r13)
HMT_LOW
@@ -1587,6 +1605,10 @@ secondary_too_late:
.endr
secondary_nap:
+ /* Clear our vcpu pointer so we don't come back in early */
+ li r0, 0
+ std r0, HSTATE_KVM_VCPU(r13)
+ lwsync
/* Clear any pending IPI - assume we're a secondary thread */
ld r5, HSTATE_XICS_PHYS(r13)
li r7, XICS_XIRR
@@ -1612,8 +1634,6 @@ secondary_nap:
kvm_no_guest:
li r0, KVM_HWTHREAD_IN_NAP
stb r0, HSTATE_HWTHREAD_STATE(r13)
- li r0, 0
- std r0, HSTATE_KVM_VCPU(r13)
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 41cb0017e75..2c86b0d6371 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
hlist_del_init_rcu(&pte->list_vpte);
hlist_del_init_rcu(&pte->list_vpte_long);
- if (pte->pte.may_write)
- kvm_release_pfn_dirty(pte->pfn);
- else
- kvm_release_pfn_clean(pte->pfn);
-
spin_unlock(&vcpu3s->mmu_lock);
vcpu3s->hpte_cache_count--;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05c28f59f77..28d38adeca7 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
-#define __hard_irq_disable local_irq_disable
-#define __hard_irq_enable local_irq_enable
#endif
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -66,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
svcpu_put(svcpu);
#endif
-
+ vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
#endif
@@ -83,17 +81,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
svcpu_put(svcpu);
#endif
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+ vcpu->cpu = -1;
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+ int r = 1; /* Indicate we want to get back into the guest */
+
+ /* We misuse TLB_FLUSH to indicate that we want to clear
+ all shadow cache entries */
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return r;
+}
+
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ /*
+ * Flush all shadow tlb entries everywhere. This is slow, but
+ * we are 100% sure that we catch the to be unmapped page
+ */
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ /* kvm_unmap_hva flushes everything anyways */
+ kvm_unmap_hva(kvm, start);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
}
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
ulong smsr = vcpu->arch.shared->msr;
/* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
/* Process MSR values */
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
/* External providers the guest reserved */
@@ -379,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
static inline int get_fpr_index(int i)
{
-#ifdef CONFIG_VSX
- i *= 2;
-#endif
- return i;
+ return i * TS_FPRWIDTH;
}
/* Give up external provider (FPU, Altivec, VSX) */
@@ -396,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
u64 *thread_fpr = (u64*)t->fpr;
int i;
- if (!(vcpu->arch.guest_owned_ext & msr))
+ /*
+ * VSX instructions can access FP and vector registers, so if
+ * we are giving up VSX, make sure we give up FP and VMX as well.
+ */
+ if (msr & MSR_VSX)
+ msr |= MSR_FP | MSR_VEC;
+
+ msr &= vcpu->arch.guest_owned_ext;
+ if (!msr)
return;
#ifdef DEBUG_EXT
printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
#endif
- switch (msr) {
- case MSR_FP:
+ if (msr & MSR_FP) {
+ /*
+ * Note that on CPUs with VSX, giveup_fpu stores
+ * both the traditional FP registers and the added VSX
+ * registers into thread.fpr[].
+ */
giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
vcpu->arch.fpscr = t->fpscr.val;
- break;
- case MSR_VEC:
+
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+ vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+ }
+
#ifdef CONFIG_ALTIVEC
+ if (msr & MSR_VEC) {
giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr;
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- __giveup_vsx(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
- break;
- default:
- BUG();
}
+#endif
- vcpu->arch.guest_owned_ext &= ~msr;
- current->thread.regs->msr &= ~msr;
+ vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
kvmppc_recalc_shadow_msr(vcpu);
}
@@ -490,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
return RESUME_GUEST;
}
- /* We already own the ext */
- if (vcpu->arch.guest_owned_ext & msr) {
- return RESUME_GUEST;
+ if (msr == MSR_VSX) {
+ /* No VSX? Give an illegal instruction interrupt */
+#ifdef CONFIG_VSX
+ if (!cpu_has_feature(CPU_FTR_VSX))
+#endif
+ {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+
+ /*
+ * We have to load up all the FP and VMX registers before
+ * we can let the guest use VSX instructions.
+ */
+ msr = MSR_FP | MSR_VEC | MSR_VSX;
}
+ /* See if we already own all the ext(s) needed */
+ msr &= ~vcpu->arch.guest_owned_ext;
+ if (!msr)
+ return RESUME_GUEST;
+
#ifdef DEBUG_EXT
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif
current->thread.regs->msr |= msr;
- switch (msr) {
- case MSR_FP:
+ if (msr & MSR_FP) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-
+#ifdef CONFIG_VSX
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+ thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+#endif
t->fpscr.val = vcpu->arch.fpscr;
t->fpexc_mode = 0;
kvmppc_load_up_fpu();
- break;
- case MSR_VEC:
+ }
+
+ if (msr & MSR_VEC) {
#ifdef CONFIG_ALTIVEC
memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
t->vscr = vcpu->arch.vscr;
t->vrsave = -1;
kvmppc_load_up_altivec();
#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
- kvmppc_load_up_vsx();
-#endif
- break;
- default:
- BUG();
}
vcpu->arch.guest_owned_ext |= msr;
-
kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST;
@@ -540,18 +606,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
int r = RESUME_HOST;
+ int s;
vcpu->stat.sum_exits++;
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
- /* We get here with MSR.EE=0, so enable it to be a nice citizen */
- __hard_irq_enable();
+ /* We get here with MSR.EE=1 */
+
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
- trace_kvm_book3s_exit(exit_nr, vcpu);
- preempt_enable();
- kvm_resched(vcpu);
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
{
@@ -802,7 +868,6 @@ program_interrupt:
}
}
- preempt_disable();
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@@ -814,20 +879,13 @@ program_interrupt:
* and if we really did time things so badly, then we just exit
* again due to a host external interrupt.
*/
- __hard_irq_disable();
- if (signal_pending(current)) {
- __hard_irq_enable();
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "KVM: Going back to host\n");
-#endif
- vcpu->stat.signal_exits++;
- run->exit_reason = KVM_EXIT_INTR;
- r = -EINTR;
+ local_irq_disable();
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ r = s;
} else {
- /* In case an interrupt came in that was triggered
- * from userspace (like DEC), we need to check what
- * to inject now! */
- kvmppc_core_prepare_to_enter(vcpu);
+ kvmppc_lazy_ee_enable();
}
}
@@ -899,34 +957,59 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = copy_to_user((u64 __user *)(long)reg->addr,
- &to_book3s(vcpu)->hior, sizeof(u64));
+ *val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+ long int i = id - KVM_REG_PPC_VSR0;
+
+ if (!cpu_has_feature(CPU_FTR_VSX)) {
+ r = -ENXIO;
+ break;
+ }
+ val->vsxval[0] = vcpu->arch.fpr[i];
+ val->vsxval[1] = vcpu->arch.vsr[i];
+ break;
+ }
+#endif /* CONFIG_VSX */
default:
+ r = -EINVAL;
break;
}
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = copy_from_user(&to_book3s(vcpu)->hior,
- (u64 __user *)(long)reg->addr, sizeof(u64));
- if (!r)
- to_book3s(vcpu)->hior_explicit = true;
+ to_book3s(vcpu)->hior = set_reg_val(id, *val);
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+ long int i = id - KVM_REG_PPC_VSR0;
+
+ if (!cpu_has_feature(CPU_FTR_VSX)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.fpr[i] = val->vsxval[0];
+ vcpu->arch.vsr[i] = val->vsxval[1];
break;
+ }
+#endif /* CONFIG_VSX */
default:
+ r = -EINVAL;
break;
}
@@ -1020,8 +1103,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
ulong ext_msr;
- preempt_disable();
-
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1029,21 +1110,16 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
}
- kvmppc_core_prepare_to_enter(vcpu);
-
/*
* Interrupts could be timers for the guest which we have to inject
* again, so let's postpone them until we're in the guest and if we
* really did time things so badly, then we just exit again due to
* a host external interrupt.
*/
- __hard_irq_disable();
-
- /* No need to go into the guest when all we do is going out */
- if (signal_pending(current)) {
- __hard_irq_enable();
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ret = -EINTR;
+ local_irq_disable();
+ ret = kvmppc_prepare_to_enter(vcpu);
+ if (ret <= 0) {
+ local_irq_enable();
goto out;
}
@@ -1070,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* Save VSX state in stack */
used_vsr = current->thread.used_vsr;
if (used_vsr && (current->thread.regs->msr & MSR_VSX))
- __giveup_vsx(current);
+ __giveup_vsx(current);
#endif
/* Remember the MSR with disabled extensions */
@@ -1080,20 +1156,19 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
- kvm_guest_enter();
+ kvmppc_lazy_ee_enable();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
- kvm_guest_exit();
-
- current->thread.regs->msr = ext_msr;
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
/* Make sure we save the guest FPU/Altivec/VSX state */
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+
+ current->thread.regs->msr = ext_msr;
- /* Restore FPU state from stack */
+ /* Restore FPU/VSX state from stack */
memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
current->thread.fpscr.val = fpscr;
current->thread.fpexc_mode = fpexc_mode;
@@ -1113,7 +1188,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
out:
- preempt_enable();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
}
@@ -1181,14 +1256,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
}
#endif /* CONFIG_PPC64 */
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9ecf6e35cd8..8f7633e3afb 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -170,20 +170,21 @@ kvmppc_handler_skip_ins:
* Call kvmppc_handler_trampoline_enter in real mode
*
* On entry, r4 contains the guest shadow MSR
+ * MSR.EE has to be 0 when calling this function
*/
_GLOBAL(kvmppc_entry_trampoline)
mfmsr r5
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
toreal(r7)
- li r9, MSR_RI
- ori r9, r9, MSR_EE
- andc r9, r5, r9 /* Clear EE and RI in MSR value */
li r6, MSR_IR | MSR_DR
- ori r6, r6, MSR_EE
- andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
- MTMSR_EERI(r9) /* Clear EE and RI in MSR */
- mtsrr0 r7 /* before we set srr0/1 */
+ andc r6, r5, r6 /* Clear DR and IR in MSR value */
+ /*
+ * Set EE in HOST_MSR so that it's enabled when we get into our
+ * C exit handler function
+ */
+ ori r5, r5, MSR_EE
+ mtsrr0 r7
mtsrr1 r6
RFI
@@ -233,8 +234,5 @@ define_load_up(fpu)
#ifdef CONFIG_ALTIVEC
define_load_up(altivec)
#endif
-#ifdef CONFIG_VSX
-define_load_up(vsx)
-#endif
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097c852..69f11401578 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,9 +36,11 @@
#include <asm/dbell.h>
#include <asm/hw_irq.h>
#include <asm/irq.h>
+#include <asm/time.h>
#include "timing.h"
#include "booke.h"
+#include "trace.h"
unsigned long kvmppc_booke_handlers;
@@ -62,6 +64,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "doorbell", VCPU_STAT(dbell_exits) },
{ "guest doorbell", VCPU_STAT(gdbell_exits) },
+ { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ NULL }
};
@@ -120,6 +123,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
}
#endif
+static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
+ /* We always treat the FP bit as enabled from the host
+ perspective, so only need to adjust the shadow MSR */
+ vcpu->arch.shadow_msr &= ~MSR_FP;
+ vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
+#endif
+}
+
/*
* Helper function for "full" MSR writes. No need to call this if only
* EE/CE/ME/DE/RI are changing.
@@ -136,11 +149,13 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
+ kvmppc_vcpu_sync_fpu(vcpu);
}
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
{
+ trace_kvm_booke_queue_irqprio(vcpu, priority);
set_bit(priority, &vcpu->arch.pending_exceptions);
}
@@ -206,6 +221,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
#ifdef CONFIG_KVM_BOOKE_HV
@@ -287,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
bool crit;
bool keep_irq = false;
enum int_class int_class;
+ ulong new_msr = vcpu->arch.shared->msr;
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -325,6 +351,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
break;
+ case BOOKE_IRQPRIO_WATCHDOG:
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -381,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
set_guest_dear(vcpu, vcpu->arch.queued_dear);
- kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
+
+ new_msr &= msr_mask;
+#if defined(CONFIG_64BIT)
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ new_msr |= MSR_CM;
+#endif
+ kvmppc_set_msr(vcpu, new_msr);
if (!keep_irq)
clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -404,12 +437,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
return allowed;
}
+/*
+ * Return the number of jiffies until the next timeout. If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+ u64 tb, wdt_tb, wdt_ticks = 0;
+ u64 nr_jiffies = 0;
+ u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+ wdt_tb = 1ULL << (63 - period);
+ tb = get_tb();
+ /*
+ * The watchdog timeout will hapeen when TB bit corresponding
+ * to watchdog will toggle from 0 to 1.
+ */
+ if (tb & wdt_tb)
+ wdt_ticks = wdt_tb;
+
+ wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+ /* Convert timebase ticks to jiffies */
+ nr_jiffies = wdt_ticks;
+
+ if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+ nr_jiffies++;
+
+ return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr_jiffies;
+ unsigned long flags;
+
+ /*
+ * If TSR_ENW and TSR_WIS are not set then no need to exit to
+ * userspace, so clear the KVM_REQ_WATCHDOG request.
+ */
+ if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+ clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+
+ spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+ nr_jiffies = watchdog_next_timeout(vcpu);
+ /*
+ * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+ * then do not run the watchdog timer as this can break timer APIs.
+ */
+ if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+ else
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ u32 tsr, new_tsr;
+ int final;
+
+ do {
+ new_tsr = tsr = vcpu->arch.tsr;
+ final = 0;
+
+ /* Time out event */
+ if (tsr & TSR_ENW) {
+ if (tsr & TSR_WIS)
+ final = 1;
+ else
+ new_tsr = tsr | TSR_WIS;
+ } else {
+ new_tsr = tsr | TSR_ENW;
+ }
+ } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+ if (new_tsr & TSR_WIS) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * If this is final watchdog expiry and some action is required
+ * then exit to userspace.
+ */
+ if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+ vcpu->arch.watchdog_enabled) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * Stop running the watchdog timer after final expiration to
+ * prevent the host from being flooded with timers if the
+ * guest sets a short period.
+ * Timers will resume when TSR/TCR is updated next time.
+ */
+ if (!final)
+ arm_next_watchdog(vcpu);
+}
+
static void update_timer_ints(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
kvmppc_core_queue_dec(vcpu);
else
kvmppc_core_dequeue_dec(vcpu);
+
+ if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+ kvmppc_core_queue_watchdog(vcpu);
+ else
+ kvmppc_core_dequeue_watchdog(vcpu);
}
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -417,13 +559,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
- if (vcpu->requests) {
- if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
- smp_mb();
- update_timer_ints(vcpu);
- }
- }
-
priority = __ffs(*pending);
while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -459,37 +594,20 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
return r;
}
-/*
- * Common checks before entering the guest world. Call with interrupts
- * disabled.
- *
- * returns !0 if a signal is pending and check_signal is true
- */
-static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
{
- int r = 0;
+ int r = 1; /* Indicate we want to get back into the guest */
- WARN_ON_ONCE(!irqs_disabled());
- while (true) {
- if (need_resched()) {
- local_irq_enable();
- cond_resched();
- local_irq_disable();
- continue;
- }
-
- if (signal_pending(current)) {
- r = 1;
- break;
- }
-
- if (kvmppc_core_prepare_to_enter(vcpu)) {
- /* interrupts got enabled in between, so we
- are back at square 1 */
- continue;
- }
+ if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+ update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_core_flush_tlb(vcpu);
+#endif
- break;
+ if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+ r = 0;
}
return r;
@@ -497,7 +615,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
- int ret;
+ int ret, s;
#ifdef CONFIG_PPC_FPU
unsigned int fpscr;
int fpexc_mode;
@@ -510,11 +628,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
local_irq_disable();
- if (kvmppc_prepare_to_enter(vcpu)) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ret = -EINTR;
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ ret = s;
goto out;
}
+ kvmppc_lazy_ee_enable();
kvm_guest_enter();
@@ -542,6 +662,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
+
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
@@ -557,10 +680,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
current->thread.fpexc_mode = fpexc_mode;
#endif
- kvm_guest_exit();
-
out:
- local_irq_enable();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
}
@@ -668,6 +789,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
int r = RESUME_HOST;
+ int s;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
@@ -677,6 +799,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable();
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
+
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
@@ -971,10 +1096,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (!(r & RESUME_HOST)) {
local_irq_disable();
- if (kvmppc_prepare_to_enter(vcpu)) {
- run->exit_reason = KVM_EXIT_INTR;
- r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
- kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+ } else {
+ kvmppc_lazy_ee_enable();
}
}
@@ -1011,6 +1138,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return r;
}
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ /* setup watchdog timer once */
+ spin_lock_init(&vcpu->arch.wdt_lock);
+ setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+ (unsigned long)vcpu);
+
+ return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
@@ -1106,7 +1248,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ u32 old_tsr = vcpu->arch.tsr;
+
vcpu->arch.tsr = sregs->u.e.tsr;
+
+ if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1221,12 +1369,70 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- return -EINVAL;
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ case KVM_REG_PPC_IAC2:
+ case KVM_REG_PPC_IAC3:
+ case KVM_REG_PPC_IAC4: {
+ int iac = reg->id - KVM_REG_PPC_IAC1;
+ r = copy_to_user((u64 __user *)(long)reg->addr,
+ &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+ break;
+ }
+ case KVM_REG_PPC_DAC1:
+ case KVM_REG_PPC_DAC2: {
+ int dac = reg->id - KVM_REG_PPC_DAC1;
+ r = copy_to_user((u64 __user *)(long)reg->addr,
+ &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+ break;
+ }
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR:
+ r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
+ break;
+#endif
+ default:
+ break;
+ }
+ return r;
}
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- return -EINVAL;
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ case KVM_REG_PPC_IAC2:
+ case KVM_REG_PPC_IAC3:
+ case KVM_REG_PPC_IAC4: {
+ int iac = reg->id - KVM_REG_PPC_IAC1;
+ r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
+ (u64 __user *)(long)reg->addr, sizeof(u64));
+ break;
+ }
+ case KVM_REG_PPC_DAC1:
+ case KVM_REG_PPC_DAC2: {
+ int dac = reg->id - KVM_REG_PPC_DAC1;
+ r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
+ (u64 __user *)(long)reg->addr, sizeof(u64));
+ break;
+ }
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR: {
+ u32 new_epcr;
+ r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
+ if (r == 0)
+ kvmppc_set_epcr(vcpu, new_epcr);
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+ return r;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@@ -1253,20 +1459,50 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
{
+#if defined(CONFIG_64BIT)
+ vcpu->arch.epcr = new_epcr;
+#ifdef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
+#endif
+#endif
}
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
+ arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
@@ -1281,6 +1517,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
{
clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+ /*
+ * We may have stopped the watchdog due to
+ * being stuck on final expiration.
+ */
+ if (tsr_bits & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1298,12 +1542,14 @@ void kvmppc_decrementer_func(unsigned long data)
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ vcpu->cpu = smp_processor_id();
current->thread.kvm_vcpu = vcpu;
}
void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
{
current->thread.kvm_vcpu = NULL;
+ vcpu->cpu = -1;
}
int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974c1e2..e9b88e433f6 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb608a..4685b8cf224 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
vcpu->arch.csrr1 = spr_val;
break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = spr_val;
+ vcpu->arch.dbg_reg.dbcr0 = spr_val;
break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = spr_val;
+ vcpu->arch.dbg_reg.dbcr1 = spr_val;
break;
case SPRN_DBSR:
vcpu->arch.dbsr &= ~spr_val;
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
case SPRN_TCR:
+ /*
+ * WRC is a 2-bit field that is supposed to preserve its
+ * value once written to non-zero.
+ */
+ if (vcpu->arch.tcr & TCR_WRC_MASK) {
+ spr_val &= ~TCR_WRC_MASK;
+ spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+ }
kvmppc_set_tcr(vcpu, spr_val);
break;
@@ -229,7 +237,17 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_IVOR15:
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
break;
-
+ case SPRN_MCSR:
+ vcpu->arch.mcsr &= ~spr_val;
+ break;
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ kvmppc_set_epcr(vcpu, spr_val);
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+#endif
+ break;
+#endif
default:
emulated = EMULATE_FAIL;
}
@@ -258,10 +276,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = vcpu->arch.csrr1;
break;
case SPRN_DBCR0:
- *spr_val = vcpu->arch.dbcr0;
+ *spr_val = vcpu->arch.dbg_reg.dbcr0;
break;
case SPRN_DBCR1:
- *spr_val = vcpu->arch.dbcr1;
+ *spr_val = vcpu->arch.dbg_reg.dbcr1;
break;
case SPRN_DBSR:
*spr_val = vcpu->arch.dbsr;
@@ -321,6 +339,14 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_IVOR15:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
break;
+ case SPRN_MCSR:
+ *spr_val = vcpu->arch.mcsr;
+ break;
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ *spr_val = vcpu->arch.epcr;
+ break;
+#endif
default:
emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe8272b5..e8ed7d659c5 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -16,6 +16,7 @@
*
* Author: Varun Sethi <varun.sethi@freescale.com>
* Author: Scott Wood <scotwood@freescale.com>
+ * Author: Mihai Caraman <mihai.caraman@freescale.com>
*
* This file is derived from arch/powerpc/kvm/booke_interrupts.S
*/
@@ -30,31 +31,33 @@
#include <asm/bitsperlong.h>
#include <asm/thread_info.h>
+#ifdef CONFIG_64BIT
+#include <asm/exception-64e.h>
+#else
#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
-
-#define GET_VCPU(vcpu, thread) \
- PPC_LL vcpu, THREAD_KVM_VCPU(thread)
+#endif
#define LONGBYTES (BITS_PER_LONG / 8)
#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
/* The host stack layout: */
-#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
-#define HOST_CALLEE_LR (1 * LONGBYTES)
-#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
+#define HOST_R1 0 /* Implied by stwu. */
+#define HOST_CALLEE_LR PPC_LR_STKOFF
+#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
/*
* r2 is special: it holds 'current', and it made nonvolatile in the
* kernel with the -ffixed-r2 gcc option.
*/
-#define HOST_R2 (3 * LONGBYTES)
-#define HOST_CR (4 * LONGBYTES)
-#define HOST_NV_GPRS (5 * LONGBYTES)
+#define HOST_R2 (HOST_RUN + LONGBYTES)
+#define HOST_CR (HOST_R2 + LONGBYTES)
+#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
-#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
+/* LR in caller stack frame. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
#define NEED_DEAR 0x00000002 /* save faulting DEAR */
@@ -201,12 +204,128 @@
b kvmppc_resume_host
.endm
+#ifdef CONFIG_64BIT
+/* Exception types */
+#define EX_GEN 1
+#define EX_GDBELL 2
+#define EX_DBG 3
+#define EX_MC 4
+#define EX_CRIT 5
+#define EX_TLB 6
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
+ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mr r11, r4
+ /*
+ * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
+ */
+ PPC_LL r4, PACACURRENT(r13)
+ PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
+ stw r10, VCPU_CR(r4)
+ PPC_STL r11, VCPU_GPR(R4)(r4)
+ PPC_STL r5, VCPU_GPR(R5)(r4)
+ .if \type == EX_CRIT
+ PPC_LL r5, (\paca_ex + EX_R13)(r13)
+ .else
+ mfspr r5, \scratch
+ .endif
+ PPC_STL r6, VCPU_GPR(R6)(r4)
+ PPC_STL r8, VCPU_GPR(R8)(r4)
+ PPC_STL r9, VCPU_GPR(R9)(r4)
+ PPC_STL r5, VCPU_GPR(R13)(r4)
+ PPC_LL r6, (\paca_ex + \ex_r10)(r13)
+ PPC_LL r8, (\paca_ex + \ex_r11)(r13)
+ PPC_STL r3, VCPU_GPR(R3)(r4)
+ PPC_STL r7, VCPU_GPR(R7)(r4)
+ PPC_STL r12, VCPU_GPR(R12)(r4)
+ PPC_STL r6, VCPU_GPR(R10)(r4)
+ PPC_STL r8, VCPU_GPR(R11)(r4)
+ mfctr r5
+ PPC_STL r5, VCPU_CTR(r4)
+ mfspr r5, \srr0
+ mfspr r6, \srr1
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+#define EX_PARAMS(type) \
+ EX_##type, \
+ SPRN_SPRG_##type##_SCRATCH, \
+ PACA_EX##type, \
+ EX_R10, \
+ EX_R11
+
+#define EX_PARAMS_TLB \
+ EX_TLB, \
+ SPRN_SPRG_GEN_SCRATCH, \
+ PACA_EXTLB, \
+ EX_TLB_R10, \
+ EX_TLB_R11
+
+kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
+ SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
+ SPRN_CSRR0, SPRN_CSRR1, 0
+/*
+ * Only bolted TLB miss exception handlers are supported for now
+ */
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
+ SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
+ SPRN_DSRR0, SPRN_DSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+#else
/*
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
*/
.macro kvm_handler intno srr0, srr1, flags
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
- GET_VCPU(r11, r10)
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
PPC_STL r3, VCPU_GPR(R3)(r11)
mfspr r3, SPRN_SPRG_RSCRATCH0
PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -233,7 +352,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
.macro kvm_lvl_handler intno scratch srr0, srr1, flags
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
mfspr r10, SPRN_SPRG_THREAD
- GET_VCPU(r11, r10)
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
PPC_STL r3, VCPU_GPR(R3)(r11)
mfspr r3, \scratch
PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -295,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
-
+#endif
/* Registers:
* SPRG_SCRATCH0: guest r10
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index aa8b81428bf..c70d37ed770 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -27,8 +27,7 @@
#define E500_TLB_NUM 2
#define E500_TLB_VALID 1
-#define E500_TLB_DIRTY 2
-#define E500_TLB_BITMAP 4
+#define E500_TLB_BITMAP 2
struct tlbe_ref {
pfn_t pfn;
@@ -130,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
ulong value);
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
@@ -155,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
{
- return tlbe->mas2 & 0xfffff000;
+ return tlbe->mas2 & MAS2_EPN;
}
static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e04b0ef55ce..e78f353a836 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int ra = get_ra(inst);
int rb = get_rb(inst);
int rt = get_rt(inst);
+ gva_t ea;
switch (get_op(inst)) {
case 31:
@@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBSX:
- emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
break;
- case XOP_TLBILX:
- emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
+ case XOP_TLBILX: {
+ int type = rt & 0x3;
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
break;
+ }
case XOP_TLBIVAX:
- emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
break;
default:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index ff38b664195..cf3f1801237 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -304,17 +304,13 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
ref->flags = E500_TLB_VALID;
if (tlbe_is_writable(gtlbe))
- ref->flags |= E500_TLB_DIRTY;
+ kvm_set_pfn_dirty(pfn);
}
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
{
if (ref->flags & E500_TLB_VALID) {
- if (ref->flags & E500_TLB_DIRTY)
- kvm_release_pfn_dirty(ref->pfn);
- else
- kvm_release_pfn_clean(ref->pfn);
-
+ trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
ref->flags = 0;
}
}
@@ -357,6 +353,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
clear_tlb_privs(vcpu_e500);
}
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ clear_tlb_refs(vcpu_e500);
+ clear_tlb1_bitmap(vcpu_e500);
+}
+
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
unsigned int eaddr, int as)
{
@@ -412,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
struct tlbe_ref *ref)
{
struct kvm_memory_slot *slot;
- unsigned long pfn, hva;
+ unsigned long pfn = 0; /* silence GCC warning */
+ unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
@@ -521,7 +525,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (likely(!pfnmap)) {
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
pfn = gfn_to_pfn_memslot(slot, gfn);
- if (is_error_pfn(pfn)) {
+ if (is_error_noslot_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
(long)gfn);
return;
@@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* Clear i-cache for new pages */
kvmppc_mmu_flush_icache(pfn);
+
+ /* Drop refcount on page, so that mmu notifiers can clear it */
+ kvm_release_pfn_clean(pfn);
}
/* XXX only map the one-one case, for now use TLB0 */
@@ -682,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
return EMULATE_DONE;
}
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
unsigned int ia;
int esel, tlbsel;
- gva_t ea;
-
- ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
ia = (ea >> 2) & 0x1;
@@ -716,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
}
static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
- int pid, int rt)
+ int pid, int type)
{
struct kvm_book3e_206_tlb_entry *tlbe;
int tid, esel;
@@ -725,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
tlbe = get_entry(vcpu_e500, tlbsel, esel);
tid = get_tlb_tid(tlbe);
- if (rt == 0 || tid == pid) {
+ if (type == 0 || tid == pid) {
inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
}
@@ -733,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
}
static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
- int ra, int rb)
+ gva_t ea)
{
int tlbsel, esel;
- gva_t ea;
-
- ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
@@ -752,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
}
}
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int pid = get_cur_spid(vcpu);
- if (rt == 0 || rt == 1) {
- tlbilx_all(vcpu_e500, 0, pid, rt);
- tlbilx_all(vcpu_e500, 1, pid, rt);
- } else if (rt == 3) {
- tlbilx_one(vcpu_e500, pid, ra, rb);
+ if (type == 0 || type == 1) {
+ tlbilx_all(vcpu_e500, 0, pid, type);
+ tlbilx_all(vcpu_e500, 1, pid, type);
+ } else if (type == 3) {
+ tlbilx_one(vcpu_e500, pid, ea);
}
return EMULATE_DONE;
@@ -786,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int as = !!get_cur_sas(vcpu);
unsigned int pid = get_cur_spid(vcpu);
int esel, tlbsel;
struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
- gva_t ea;
-
- ea = kvmppc_get_gpr(vcpu, rb);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -875,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas1 = vcpu->arch.shared->mas1;
gtlbe->mas2 = vcpu->arch.shared->mas2;
+ if (!(vcpu->arch.shared->msr & MSR_CM))
+ gtlbe->mas2 &= 0xffffffffUL;
gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
@@ -1039,8 +1037,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
sesel = 0; /* unused */
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
- kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
- &priv->ref, eaddr, &stlbe);
+ /* Only triggers after clear_tlb_refs */
+ if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
+ kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ else
+ kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
+ &priv->ref, eaddr, &stlbe);
break;
case 1: {
@@ -1060,6 +1062,49 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ /*
+ * Flush all shadow tlb entries everywhere. This is slow, but
+ * we are 100% sure that we catch the to be unmapped page
+ */
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ /* kvm_unmap_hva flushes everything anyways */
+ kvm_unmap_hva(kvm, start);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int i;
@@ -1081,6 +1126,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
}
vcpu_e500->num_shared_tlb_pages = 0;
+
+ kfree(vcpu_e500->shared_tlb_pages);
vcpu_e500->shared_tlb_pages = NULL;
} else {
kfree(vcpu_e500->gtlb_arch);
@@ -1178,21 +1225,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
}
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
- if (!virt)
+ if (!virt) {
+ ret = -ENOMEM;
goto err_put_page;
+ }
privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
GFP_KERNEL);
privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
GFP_KERNEL);
- if (!privs[0] || !privs[1])
- goto err_put_page;
+ if (!privs[0] || !privs[1]) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
GFP_KERNEL);
- if (!g2h_bitmap)
- goto err_put_page;
+ if (!g2h_bitmap) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
free_gtlb(vcpu_e500);
@@ -1232,10 +1285,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-err_put_page:
+err_privs:
kfree(privs[0]);
kfree(privs[1]);
+err_put_page:
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
@@ -1332,7 +1386,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->gtlb_priv[1])
goto err;
- vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
+ vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
vcpu_e500->gtlb_params[1].entries,
GFP_KERNEL);
if (!vcpu_e500->g2h_tlb1_map)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index ee04abaefe2..b0855e5d890 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -131,6 +131,125 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
return vcpu->arch.dec - jd;
}
+static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ vcpu->arch.shared->srr0 = spr_val;
+ break;
+ case SPRN_SRR1:
+ vcpu->arch.shared->srr1 = spr_val;
+ break;
+
+ /* XXX We need to context-switch the timebase for
+ * watchdog and FIT. */
+ case SPRN_TBWL: break;
+ case SPRN_TBWU: break;
+
+ case SPRN_MSSSR0: break;
+
+ case SPRN_DEC:
+ vcpu->arch.dec = spr_val;
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ case SPRN_SPRG0:
+ vcpu->arch.shared->sprg0 = spr_val;
+ break;
+ case SPRN_SPRG1:
+ vcpu->arch.shared->sprg1 = spr_val;
+ break;
+ case SPRN_SPRG2:
+ vcpu->arch.shared->sprg2 = spr_val;
+ break;
+ case SPRN_SPRG3:
+ vcpu->arch.shared->sprg3 = spr_val;
+ break;
+
+ default:
+ emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
+ spr_val);
+ if (emulated == EMULATE_FAIL)
+ printk(KERN_INFO "mtspr: unknown spr "
+ "0x%x\n", sprn);
+ break;
+ }
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+ ulong spr_val = 0;
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ spr_val = vcpu->arch.shared->srr0;
+ break;
+ case SPRN_SRR1:
+ spr_val = vcpu->arch.shared->srr1;
+ break;
+ case SPRN_PVR:
+ spr_val = vcpu->arch.pvr;
+ break;
+ case SPRN_PIR:
+ spr_val = vcpu->vcpu_id;
+ break;
+ case SPRN_MSSSR0:
+ spr_val = 0;
+ break;
+
+ /* Note: mftb and TBRL/TBWL are user-accessible, so
+ * the guest can always access the real TB anyways.
+ * In fact, we probably will never see these traps. */
+ case SPRN_TBWL:
+ spr_val = get_tb() >> 32;
+ break;
+ case SPRN_TBWU:
+ spr_val = get_tb();
+ break;
+
+ case SPRN_SPRG0:
+ spr_val = vcpu->arch.shared->sprg0;
+ break;
+ case SPRN_SPRG1:
+ spr_val = vcpu->arch.shared->sprg1;
+ break;
+ case SPRN_SPRG2:
+ spr_val = vcpu->arch.shared->sprg2;
+ break;
+ case SPRN_SPRG3:
+ spr_val = vcpu->arch.shared->sprg3;
+ break;
+ /* Note: SPRG4-7 are user-readable, so we don't get
+ * a trap. */
+
+ case SPRN_DEC:
+ spr_val = kvmppc_get_dec(vcpu, get_tb());
+ break;
+ default:
+ emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
+ &spr_val);
+ if (unlikely(emulated == EMULATE_FAIL)) {
+ printk(KERN_INFO "mfspr: unknown spr "
+ "0x%x\n", sprn);
+ }
+ break;
+ }
+
+ if (emulated == EMULATE_DONE)
+ kvmppc_set_gpr(vcpu, rt, spr_val);
+ kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+
+ return emulated;
+}
+
/* XXX to do:
* lhax
* lhaux
@@ -156,7 +275,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
int sprn = get_sprn(inst);
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
- ulong spr_val = 0;
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -236,62 +354,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_MFSPR:
- switch (sprn) {
- case SPRN_SRR0:
- spr_val = vcpu->arch.shared->srr0;
- break;
- case SPRN_SRR1:
- spr_val = vcpu->arch.shared->srr1;
- break;
- case SPRN_PVR:
- spr_val = vcpu->arch.pvr;
- break;
- case SPRN_PIR:
- spr_val = vcpu->vcpu_id;
- break;
- case SPRN_MSSSR0:
- spr_val = 0;
- break;
-
- /* Note: mftb and TBRL/TBWL are user-accessible, so
- * the guest can always access the real TB anyways.
- * In fact, we probably will never see these traps. */
- case SPRN_TBWL:
- spr_val = get_tb() >> 32;
- break;
- case SPRN_TBWU:
- spr_val = get_tb();
- break;
-
- case SPRN_SPRG0:
- spr_val = vcpu->arch.shared->sprg0;
- break;
- case SPRN_SPRG1:
- spr_val = vcpu->arch.shared->sprg1;
- break;
- case SPRN_SPRG2:
- spr_val = vcpu->arch.shared->sprg2;
- break;
- case SPRN_SPRG3:
- spr_val = vcpu->arch.shared->sprg3;
- break;
- /* Note: SPRG4-7 are user-readable, so we don't get
- * a trap. */
-
- case SPRN_DEC:
- spr_val = kvmppc_get_dec(vcpu, get_tb());
- break;
- default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
- &spr_val);
- if (unlikely(emulated == EMULATE_FAIL)) {
- printk(KERN_INFO "mfspr: unknown spr "
- "0x%x\n", sprn);
- }
- break;
- }
- kvmppc_set_gpr(vcpu, rt, spr_val);
- kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+ emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
break;
case OP_31_XOP_STHX:
@@ -308,49 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case OP_31_XOP_MTSPR:
- spr_val = kvmppc_get_gpr(vcpu, rs);
- switch (sprn) {
- case SPRN_SRR0:
- vcpu->arch.shared->srr0 = spr_val;
- break;
- case SPRN_SRR1:
- vcpu->arch.shared->srr1 = spr_val;
- break;
-
- /* XXX We need to context-switch the timebase for
- * watchdog and FIT. */
- case SPRN_TBWL: break;
- case SPRN_TBWU: break;
-
- case SPRN_MSSSR0: break;
-
- case SPRN_DEC:
- vcpu->arch.dec = spr_val;
- kvmppc_emulate_dec(vcpu);
- break;
-
- case SPRN_SPRG0:
- vcpu->arch.shared->sprg0 = spr_val;
- break;
- case SPRN_SPRG1:
- vcpu->arch.shared->sprg1 = spr_val;
- break;
- case SPRN_SPRG2:
- vcpu->arch.shared->sprg2 = spr_val;
- break;
- case SPRN_SPRG3:
- vcpu->arch.shared->sprg3 = spr_val;
- break;
-
- default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
- spr_val);
- if (emulated == EMULATE_FAIL)
- printk(KERN_INFO "mtspr: unknown spr "
- "0x%x\n", sprn);
- break;
- }
- kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+ emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
break;
case OP_31_XOP_DCBI:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4d213b8b0fb..70739a08956 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
+#include <asm/irqflags.h>
#include "timing.h"
#include "../mm/mmu_decl.h"
@@ -38,8 +39,7 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions) ||
+ return !!(v->arch.pending_exceptions) ||
v->requests;
}
@@ -48,6 +48,85 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return 1;
}
+#ifndef CONFIG_KVM_BOOK3S_64_HV
+/*
+ * Common checks before entering the guest world. Call with interrupts
+ * disabled.
+ *
+ * returns:
+ *
+ * == 1 if we're ready to go into guest state
+ * <= 0 if we need to go back to the host with return value
+ */
+int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r = 1;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ while (true) {
+ if (need_resched()) {
+ local_irq_enable();
+ cond_resched();
+ local_irq_disable();
+ continue;
+ }
+
+ if (signal_pending(current)) {
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ break;
+ }
+
+ vcpu->mode = IN_GUEST_MODE;
+
+ /*
+ * Reading vcpu->requests must happen after setting vcpu->mode,
+ * so we don't miss a request because the requester sees
+ * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
+ * before next entering the guest (and thus doesn't IPI).
+ */
+ smp_mb();
+
+ if (vcpu->requests) {
+ /* Make sure we process requests preemptable */
+ local_irq_enable();
+ trace_kvm_check_requests(vcpu);
+ r = kvmppc_core_check_requests(vcpu);
+ local_irq_disable();
+ if (r > 0)
+ continue;
+ break;
+ }
+
+ if (kvmppc_core_prepare_to_enter(vcpu)) {
+ /* interrupts got enabled in between, so we
+ are back at square 1 */
+ continue;
+ }
+
+#ifdef CONFIG_PPC64
+ /* lazy EE magic */
+ hard_irq_disable();
+ if (lazy_irq_pending()) {
+ /* Got an interrupt in between, try again */
+ local_irq_enable();
+ local_irq_disable();
+ kvm_guest_exit();
+ continue;
+ }
+
+ trace_hardirqs_on();
+#endif
+
+ kvm_guest_enter();
+ break;
+ }
+
+ return r;
+}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
int nr = kvmppc_get_gpr(vcpu, 11);
@@ -67,18 +146,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
switch (nr) {
- case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+ case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
{
vcpu->arch.magic_page_pa = param1;
vcpu->arch.magic_page_ea = param2;
r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
- r = HC_EV_SUCCESS;
+ r = EV_SUCCESS;
break;
}
- case HC_VENDOR_KVM | KVM_HC_FEATURES:
- r = HC_EV_SUCCESS;
+ case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
+ r = EV_SUCCESS;
#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
/* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
@@ -86,8 +165,13 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
/* Second return value is in r4 */
break;
+ case EV_HCALL_TOKEN(EV_IDLE):
+ r = EV_SUCCESS;
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ break;
default:
- r = HC_EV_UNIMPLEMENTED;
+ r = EV_UNIMPLEMENTED;
break;
}
@@ -220,6 +304,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_SREGS:
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
#else
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_HIOR:
@@ -229,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
+ case KVM_CAP_IOEVENTFD:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -260,10 +346,22 @@ int kvm_dev_ioctl_check_extension(long ext)
if (cpu_has_feature(CPU_FTR_ARCH_201))
r = 2;
break;
+#endif
case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ r = 1;
+#else
+ r = 0;
+ break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_PPC_HTAB_FD:
+ r = 1;
break;
#endif
+ break;
case KVM_CAP_NR_VCPUS:
/*
* Recommending a number of CPUs is somewhat arbitrary; we
@@ -302,19 +400,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- if (!dont || free->arch.rmap != dont->arch.rmap) {
- vfree(free->arch.rmap);
- free->arch.rmap = NULL;
- }
+ kvmppc_core_free_memslot(free, dont);
}
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
{
- slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
- if (!slot->arch.rmap)
- return -ENOMEM;
-
- return 0;
+ return kvmppc_core_create_memslot(slot, npages);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -323,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- return kvmppc_core_prepare_memory_region(kvm, mem);
+ return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -331,7 +422,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot old,
int user_alloc)
{
- kvmppc_core_commit_memory_region(kvm, mem);
+ kvmppc_core_commit_memory_region(kvm, mem, old);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -341,6 +432,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ kvmppc_core_flush_memslot(kvm, slot);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
@@ -354,6 +446,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
return vcpu;
}
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
@@ -390,6 +487,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ int ret;
+
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -398,13 +497,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
-
- return 0;
+ ret = kvmppc_subarch_vcpu_init(vcpu);
+ return ret;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -420,7 +520,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
kvmppc_core_vcpu_load(vcpu, cpu);
- vcpu->cpu = smp_processor_id();
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -429,7 +528,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#ifdef CONFIG_BOOKE
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
- vcpu->cpu = -1;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -527,6 +625,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_is_write = 0;
vcpu->arch.mmio_sign_extend = 0;
+ if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data)) {
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
@@ -536,8 +641,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int r;
- r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
vcpu->arch.mmio_sign_extend = 1;
+ r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
return r;
}
@@ -575,6 +680,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
+ if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data)) {
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
@@ -649,6 +761,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
+ r = 0;
+ vcpu->arch.watchdog_enabled = true;
+ break;
+#endif
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
@@ -751,9 +869,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
{
+ u32 inst_nop = 0x60000000;
+#ifdef CONFIG_KVM_BOOKE_HV
+ u32 inst_sc1 = 0x44000022;
+ pvinfo->hcall[0] = inst_sc1;
+ pvinfo->hcall[1] = inst_nop;
+ pvinfo->hcall[2] = inst_nop;
+ pvinfo->hcall[3] = inst_nop;
+#else
u32 inst_lis = 0x3c000000;
u32 inst_ori = 0x60000000;
- u32 inst_nop = 0x60000000;
u32 inst_sc = 0x44000002;
u32 inst_imm_mask = 0xffff;
@@ -770,6 +895,9 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
pvinfo->hcall[2] = inst_sc;
pvinfo->hcall[3] = inst_nop;
+#endif
+
+ pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
return 0;
}
@@ -832,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+
+ case KVM_PPC_GET_HTAB_FD: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_get_htab_fd ghf;
+
+ r = -EFAULT;
+ if (copy_from_user(&ghf, argp, sizeof(ghf)))
+ break;
+ r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+ break;
+ }
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index ddb6a2149d4..e326489a542 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,6 +31,126 @@ TRACE_EVENT(kvm_ppc_instr,
__entry->inst, __entry->pc, __entry->emulate)
);
+#ifdef CONFIG_PPC_BOOK3S
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+#else
+#define kvm_trace_symbol_exit \
+ {0, "CRITICAL"}, \
+ {1, "MACHINE_CHECK"}, \
+ {2, "DATA_STORAGE"}, \
+ {3, "INST_STORAGE"}, \
+ {4, "EXTERNAL"}, \
+ {5, "ALIGNMENT"}, \
+ {6, "PROGRAM"}, \
+ {7, "FP_UNAVAIL"}, \
+ {8, "SYSCALL"}, \
+ {9, "AP_UNAVAIL"}, \
+ {10, "DECREMENTER"}, \
+ {11, "FIT"}, \
+ {12, "WATCHDOG"}, \
+ {13, "DTLB_MISS"}, \
+ {14, "ITLB_MISS"}, \
+ {15, "DEBUG"}, \
+ {32, "SPE_UNAVAIL"}, \
+ {33, "SPE_FP_DATA"}, \
+ {34, "SPE_FP_ROUND"}, \
+ {35, "PERFORMANCE_MONITOR"}, \
+ {36, "DOORBELL"}, \
+ {37, "DOORBELL_CRITICAL"}, \
+ {38, "GUEST_DBELL"}, \
+ {39, "GUEST_DBELL_CRIT"}, \
+ {40, "HV_SYSCALL"}, \
+ {41, "HV_PRIV"}
+#endif
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+#ifdef CONFIG_KVM_BOOK3S_PR
+ __field( unsigned long, srr1 )
+#endif
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+#ifdef CONFIG_KVM_BOOK3S_PR
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+#endif
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+#ifdef CONFIG_KVM_BOOK3S_PR
+ svcpu = svcpu_get(vcpu);
+ __entry->srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
+#endif
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+#ifdef CONFIG_KVM_BOOK3S_PR
+ " | srr1=0x%lx"
+#endif
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+#ifdef CONFIG_KVM_BOOK3S_PR
+ __entry->srr1,
+#endif
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
TRACE_EVENT(kvm_stlb_inval,
TP_PROTO(unsigned int stlb_index),
TP_ARGS(stlb_index),
@@ -98,41 +218,31 @@ TRACE_EVENT(kvm_gtlb_write,
__entry->word1, __entry->word2)
);
-
-/*************************************************************************
- * Book3S trace points *
- *************************************************************************/
-
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-TRACE_EVENT(kvm_book3s_exit,
- TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
- TP_ARGS(exit_nr, vcpu),
+TRACE_EVENT(kvm_check_requests,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
TP_STRUCT__entry(
- __field( unsigned int, exit_nr )
- __field( unsigned long, pc )
- __field( unsigned long, msr )
- __field( unsigned long, dar )
- __field( unsigned long, srr1 )
+ __field( __u32, cpu_nr )
+ __field( __u32, requests )
),
TP_fast_assign(
- struct kvmppc_book3s_shadow_vcpu *svcpu;
- __entry->exit_nr = exit_nr;
- __entry->pc = kvmppc_get_pc(vcpu);
- __entry->dar = kvmppc_get_fault_dar(vcpu);
- __entry->msr = vcpu->arch.shared->msr;
- svcpu = svcpu_get(vcpu);
- __entry->srr1 = svcpu->shadow_srr1;
- svcpu_put(svcpu);
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->requests = vcpu->requests;
),
- TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
- __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
- __entry->srr1)
+ TP_printk("vcpu=%x requests=%x",
+ __entry->cpu_nr, __entry->requests)
);
+
+/*************************************************************************
+ * Book3S trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_KVM_BOOK3S_PR
+
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
TP_ARGS(r, vcpu),
@@ -395,6 +505,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
__entry->mas2, __entry->mas7_3)
);
+TRACE_EVENT(kvm_booke206_ref_release,
+ TP_PROTO(__u64 pfn, __u32 flags),
+ TP_ARGS(pfn, flags),
+
+ TP_STRUCT__entry(
+ __field( __u64, pfn )
+ __field( __u32, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("pfn=%llx flags=%x",
+ __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+ TP_ARGS(vcpu, priority),
+
+ TP_STRUCT__entry(
+ __field( __u32, cpu_nr )
+ __field( __u32, priority )
+ __field( unsigned long, pending )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->priority = priority;
+ __entry->pending = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("vcpu=%x prio=%x pending=%lx",
+ __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
#endif
#endif /* _TRACE_KVM_H */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 0a6b28336eb..3a8489a354e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -113,19 +113,6 @@ static int store_updates_sp(struct pt_regs *regs)
#define MM_FAULT_CONTINUE -1
#define MM_FAULT_ERR(sig) (sig)
-static int out_of_memory(struct pt_regs *regs)
-{
- /*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
- up_read(&current->mm->mmap_sem);
- if (!user_mode(regs))
- return MM_FAULT_ERR(SIGKILL);
- pagefault_out_of_memory();
- return MM_FAULT_RETURN;
-}
-
static int do_sigbus(struct pt_regs *regs, unsigned long address)
{
siginfo_t info;
@@ -169,8 +156,18 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
return MM_FAULT_CONTINUE;
/* Out of memory */
- if (fault & VM_FAULT_OOM)
- return out_of_memory(regs);
+ if (fault & VM_FAULT_OOM) {
+ up_read(&current->mm->mmap_sem);
+
+ /*
+ * We ran out of memory, or some other thing happened to us that
+ * made us unable to handle the page fault gracefully.
+ */
+ if (!user_mode(regs))
+ return MM_FAULT_ERR(SIGKILL);
+ pagefault_out_of_memory();
+ return MM_FAULT_RETURN;
+ }
/* Bus error. x86 handles HWPOISON here, we'll add this if/when
* we support the feature in HW
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 59213cfaeca..bba87ca2b4d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -399,18 +399,6 @@ static unsigned long read_n_cells(int n, const unsigned int **buf)
return result;
}
-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
-
/*
* Read the next memblock list entry from the ibm,dynamic-memory property
* and return the information in the provided of_drconf_cell structure.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 5829d2a950d..cf9dada734b 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -722,7 +722,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
}
/*
- * is_hugepage_only_range() is used by generic code to verify wether
+ * is_hugepage_only_range() is used by generic code to verify whether
* a normal mmap mapping (non hugetlbfs) is valid on a given area.
*
* until the generic code provides a more generic hook and/or starts
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index fab919fd138..626ad081639 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -191,12 +191,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
#ifdef CONFIG_PPC_47x
/*
- * 47x variant of icbt
- */
-# define ICBT(CT,RA,RB) \
- .long 0x7c00002c | ((CT) << 21) | ((RA) << 16) | ((RB) << 11)
-
-/*
* _tlbivax_bcast is only on 47x. We don't bother doing a runtime
* check though, it will blow up soon enough if we mistakenly try
* to use it on a 440.
@@ -208,8 +202,7 @@ _GLOBAL(_tlbivax_bcast)
wrteei 0
mtspr SPRN_MMUCR,r5
isync
-/* tlbivax 0,r3 - use .long to avoid binutils deps */
- .long 0x7c000624 | (r3 << 11)
+ PPC_TLBIVAX(0, R3)
isync
eieio
tlbsync
@@ -227,11 +220,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
bl 2f
2: mflr r6
li r7,32
- ICBT(0,r6,r7) /* touch next cache line */
+ PPC_ICBT(0,R6,R7) /* touch next cache line */
add r6,r6,r7
- ICBT(0,r6,r7) /* touch next cache line */
+ PPC_ICBT(0,R6,R7) /* touch next cache line */
add r6,r6,r7
- ICBT(0,r6,r7) /* touch next cache line */
+ PPC_ICBT(0,R6,R7) /* touch next cache line */
sync
nop
nop
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 1fc8109bf2f..8a5dfaf5c6b 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -134,6 +134,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
___PPC_RS(a) | IMM_L(i))
#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \
___PPC_RS(a) | IMM_L(i))
+#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \
+ ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \
+ ___PPC_RS(a) | IMM_L(i))
+#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \
+ ___PPC_RS(a) | IMM_L(i))
#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \
___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index dd1130642d0..e834f1ec23c 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -13,6 +13,8 @@
#include <asm/cacheflush.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
+#include <linux/if_vlan.h>
+
#include "bpf_jit.h"
#ifndef __BIG_ENDIAN
@@ -89,6 +91,8 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
case BPF_S_ANC_IFINDEX:
case BPF_S_ANC_MARK:
case BPF_S_ANC_RXHASH:
+ case BPF_S_ANC_VLAN_TAG:
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
case BPF_S_ANC_CPU:
case BPF_S_ANC_QUEUE:
case BPF_S_LD_W_ABS:
@@ -232,6 +236,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
if (K >= 65536)
PPC_ORIS(r_A, r_A, IMM_H(K));
break;
+ case BPF_S_ANC_ALU_XOR_X:
+ case BPF_S_ALU_XOR_X: /* A ^= X */
+ ctx->seen |= SEEN_XREG;
+ PPC_XOR(r_A, r_A, r_X);
+ break;
+ case BPF_S_ALU_XOR_K: /* A ^= K */
+ if (IMM_L(K))
+ PPC_XORI(r_A, r_A, IMM_L(K));
+ if (K >= 65536)
+ PPC_XORIS(r_A, r_A, IMM_H(K));
+ break;
case BPF_S_ALU_LSH_X: /* A <<= X; */
ctx->seen |= SEEN_XREG;
PPC_SLW(r_A, r_A, r_X);
@@ -371,6 +386,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
rxhash));
break;
+ case BPF_S_ANC_VLAN_TAG:
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
+ vlan_tci));
+ if (filter[i].code == BPF_S_ANC_VLAN_TAG)
+ PPC_ANDI(r_A, r_A, VLAN_VID_MASK);
+ else
+ PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
+ break;
case BPF_S_ANC_QUEUE:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
queue_mapping) != 2);
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 441af08edf4..2ee01e38d5e 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -54,8 +54,10 @@
* Layout of constraint bits:
* 6666555555555544444444443333333333222222222211111111110000000000
* 3210987654321098765432109876543210987654321098765432109876543210
- * [ ><><><><><><>
- * NC P6P5P4P3P2P1
+ * < >< ><><><><><><>
+ * L2 NC P6P5P4P3P2P1
+ *
+ * L2 - 16-18 - Required L2SEL value (select field)
*
* NC - number of counters
* 15: NC error 0x8000
@@ -72,7 +74,7 @@
static int power7_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp)
{
- int pmc, sh;
+ int pmc, sh, unit;
unsigned long mask = 0, value = 0;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
@@ -90,6 +92,15 @@ static int power7_get_constraint(u64 event, unsigned long *maskp,
mask |= 0x8000;
value |= 0x1000;
}
+
+ unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
+ if (unit == 6) {
+ /* L2SEL must be identical across events */
+ int l2sel = (event >> PM_L2SEL_SH) & PM_L2SEL_MSK;
+ mask |= 0x7 << 16;
+ value |= l2sel << 16;
+ }
+
*maskp = mask;
*valp = value;
return 0;
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index b62508b113d..c16999802ec 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -2,7 +2,6 @@ config PPC_MPC512x
bool "512x-based boards"
depends on 6xx
select FSL_SOC
- select FB_FSL_DIU
select IPIC
select PPC_CLOCK
select PPC_PCI_CHOICE
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index dcef6ade48e..0a134e0469e 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -42,7 +42,10 @@ static void __init mpc5121_ads_setup_arch(void)
for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
mpc83xx_add_bridge(np);
#endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
mpc512x_setup_diu();
+#endif
}
static void __init mpc5121_ads_init_IRQ(void)
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 1ab6d11d0b1..c32b399eb95 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -16,6 +16,13 @@ extern void __init mpc512x_init(void);
extern int __init mpc5121_clk_init(void);
void __init mpc512x_declare_of_platform_devices(void);
extern void mpc512x_restart(char *cmd);
-extern void mpc512x_init_diu(void);
-extern void mpc512x_setup_diu(void);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+void mpc512x_init_diu(void);
+void mpc512x_setup_diu(void);
+#else
+#define mpc512x_init_diu NULL
+#define mpc512x_setup_diu NULL
+#endif
+
#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 1650e090ef3..35f14fda108 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -58,6 +58,8 @@ void mpc512x_restart(char *cmd)
;
}
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
struct fsl_diu_shared_fb {
u8 gamma[0x300]; /* 32-bit aligned! */
struct diu_ad ad0; /* 32-bit aligned! */
@@ -66,25 +68,6 @@ struct fsl_diu_shared_fb {
bool in_use;
};
-u32 mpc512x_get_pixel_format(enum fsl_diu_monitor_port port,
- unsigned int bits_per_pixel)
-{
- switch (bits_per_pixel) {
- case 32:
- return 0x88883316;
- case 24:
- return 0x88082219;
- case 16:
- return 0x65053118;
- }
- return 0x00000400;
-}
-
-void mpc512x_set_gamma_table(enum fsl_diu_monitor_port port,
- char *gamma_table_base)
-{
-}
-
void mpc512x_set_monitor_port(enum fsl_diu_monitor_port port)
{
}
@@ -320,14 +303,14 @@ void __init mpc512x_setup_diu(void)
}
}
- diu_ops.get_pixel_format = mpc512x_get_pixel_format;
- diu_ops.set_gamma_table = mpc512x_set_gamma_table;
diu_ops.set_monitor_port = mpc512x_set_monitor_port;
diu_ops.set_pixel_clock = mpc512x_set_pixel_clock;
diu_ops.valid_monitor_port = mpc512x_valid_monitor_port;
diu_ops.release_bootmem = mpc512x_release_bootmem;
}
+#endif
+
void __init mpc512x_init_IRQ(void)
{
struct device_node *np;
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
index 448d862bcf3..1843bc93201 100644
--- a/arch/powerpc/platforms/52xx/lite5200.c
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -4,7 +4,7 @@
* Written by: Grant Likely <grant.likely@secretlab.ca>
*
* Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved.
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Description:
* This program is free software; you can redistribute it and/or modify it
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
index 9cf36020cf0..792a301a0bf 100644
--- a/arch/powerpc/platforms/52xx/mpc5200_simple.c
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -50,6 +50,7 @@ static void __init mpc5200_simple_setup_arch(void)
/* list of the supported boards */
static const char *board[] __initdata = {
+ "anonymous,a3m071",
"anonymous,a4m072",
"anon,charon",
"ifm,o2d",
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 028470b9588..a51cb07bd66 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -526,7 +526,7 @@ EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
#define WDT_IDENTITY "mpc52xx watchdog on GPT0"
-/* wdt_is_active stores wether or not the /dev/watchdog device is opened */
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
static unsigned long wdt_is_active;
/* wdt-capable gpt */
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 2351f9e0fb6..16150fa430f 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -578,18 +578,4 @@ static struct platform_driver mpc52xx_lpbfifo_driver = {
.probe = mpc52xx_lpbfifo_probe,
.remove = __devexit_p(mpc52xx_lpbfifo_remove),
};
-
-/***********************************************************************
- * Module init/exit
- */
-static int __init mpc52xx_lpbfifo_init(void)
-{
- return platform_driver_register(&mpc52xx_lpbfifo_driver);
-}
-module_init(mpc52xx_lpbfifo_init);
-
-static void __exit mpc52xx_lpbfifo_exit(void)
-{
- platform_driver_unregister(&mpc52xx_lpbfifo_driver);
-}
-module_exit(mpc52xx_lpbfifo_exit);
+module_platform_driver(mpc52xx_lpbfifo_driver);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 328d221fd1c..74861a7fb80 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -16,7 +16,6 @@
#include <linux/spinlock.h>
#include <linux/irq.h>
#include <linux/types.h>
-#include <linux/bootmem.h>
#include <linux/slab.h>
#include <asm/io.h>
@@ -149,7 +148,7 @@ int __init pq2ads_pci_init_irq(void)
priv->regs = of_iomap(np, 0);
if (!priv->regs) {
printk(KERN_ERR "Cannot map PCI PIC registers.\n");
- goto out_free_bootmem;
+ goto out_free_kmalloc;
}
/* mask all PCI interrupts */
@@ -171,9 +170,8 @@ int __init pq2ads_pci_init_irq(void)
out_unmap_regs:
iounmap(priv->regs);
-out_free_bootmem:
- free_bootmem((unsigned long)priv,
- sizeof(struct pq2ads_pci_pic));
+out_free_kmalloc:
+ kfree(priv);
of_node_put(np);
out_unmap_irq:
irq_dispose_mapping(irq);
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index d440435e055..8d762203eef 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Description:
* MPC832xE MDS board specific routines.
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 1b1f6c8a1a1..1a26d2f8340 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Li Yang <LeoLi@freescale.com>
* Yin Olivia <Hong-hua.Yin@freescale.com>
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index f8769d713d6..b63b42d11d6 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -1,7 +1,7 @@
/*
* MPC8360E-RDK board file.
*
- * Copyright (c) 2006 Freescale Semicondutor, Inc.
+ * Copyright (c) 2006 Freescale Semiconductor, Inc.
* Copyright (c) 2007-2008 MontaVista Software, Inc.
*
* Author: Anton Vorontsov <avorontsov@ru.mvista.com>
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index eca1f0960ff..9813c81e8e5 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -1,7 +1,7 @@
/*
* arch/powerpc/platforms/83xx/mpc837x_rdb.c
*
- * Copyright (C) 2007 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
*
* MPC837x RDB board specific routines
*
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 8498f732347..bd12588fa25 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006-2010, 2012 Freescale Semicondutor, Inc.
+ * Copyright (C) 2006-2010, 2012 Freescale Semiconductor, Inc.
* All rights reserved.
*
* Author: Andy Fleming <afleming@freescale.com>
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 848a3e98e1c..7328b8d7412 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -249,7 +249,7 @@ static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
goto exit;
}
- iprop = of_get_property(law_node, "fsl,num-laws", 0);
+ iprop = of_get_property(law_node, "fsl,num-laws", NULL);
if (!iprop) {
pr_err("p1022ds: LAW node is missing fsl,num-laws property\n");
goto exit;
@@ -539,7 +539,7 @@ static void __init p1022_ds_setup_arch(void)
};
/*
- * prom_update_property() is called before
+ * of_update_property() is called before
* kmalloc() is available, so the 'new' object
* should be allocated in the global area.
* The easiest way is to do that is to
@@ -548,7 +548,7 @@ static void __init p1022_ds_setup_arch(void)
*/
pr_info("p1022ds: disabling %s node",
np2->full_name);
- prom_update_property(np2, &nor_status);
+ of_update_property(np2, &nor_status);
of_node_put(np2);
}
@@ -564,7 +564,7 @@ static void __init p1022_ds_setup_arch(void)
pr_info("p1022ds: disabling %s node",
np2->full_name);
- prom_update_property(np2, &nand_status);
+ of_update_property(np2, &nand_status);
of_node_put(np2);
}
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6fcfa12e5c5..148c2f2d978 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -128,6 +128,19 @@ static void __cpuinit smp_85xx_mach_cpu_die(void)
}
#endif
+static inline void flush_spin_table(void *spin_table)
+{
+ flush_dcache_range((ulong)spin_table,
+ (ulong)spin_table + sizeof(struct epapr_spin_table));
+}
+
+static inline u32 read_spin_table_addr_l(void *spin_table)
+{
+ flush_dcache_range((ulong)spin_table,
+ (ulong)spin_table + sizeof(struct epapr_spin_table));
+ return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
+}
+
static int __cpuinit smp_85xx_kick_cpu(int nr)
{
unsigned long flags;
@@ -161,8 +174,8 @@ static int __cpuinit smp_85xx_kick_cpu(int nr)
/* Map the spin table */
if (ioremappable)
- spin_table = ioremap(*cpu_rel_addr,
- sizeof(struct epapr_spin_table));
+ spin_table = ioremap_prot(*cpu_rel_addr,
+ sizeof(struct epapr_spin_table), _PAGE_COHERENT);
else
spin_table = phys_to_virt(*cpu_rel_addr);
@@ -173,7 +186,16 @@ static int __cpuinit smp_85xx_kick_cpu(int nr)
generic_set_cpu_up(nr);
if (system_state == SYSTEM_RUNNING) {
+ /*
+ * To keep it compatible with old boot program which uses
+ * cache-inhibit spin table, we need to flush the cache
+ * before accessing spin table to invalidate any staled data.
+ * We also need to flush the cache after writing to spin
+ * table to push data out.
+ */
+ flush_spin_table(spin_table);
out_be32(&spin_table->addr_l, 0);
+ flush_spin_table(spin_table);
/*
* We don't set the BPTR register here since it already points
@@ -181,9 +203,14 @@ static int __cpuinit smp_85xx_kick_cpu(int nr)
*/
mpic_reset_core(hw_cpu);
- /* wait until core is ready... */
- if (!spin_event_timeout(in_be32(&spin_table->addr_l) == 1,
- 10000, 100)) {
+ /*
+ * wait until core is ready...
+ * We need to invalidate the stale data, in case the boot
+ * loader uses a cache-inhibited spin table.
+ */
+ if (!spin_event_timeout(
+ read_spin_table_addr_l(spin_table) == 1,
+ 10000, 100)) {
pr_err("%s: timeout waiting for core %d to reset\n",
__func__, hw_cpu);
ret = -ENOENT;
@@ -194,12 +221,10 @@ static int __cpuinit smp_85xx_kick_cpu(int nr)
__secondary_hold_acknowledge = -1;
}
#endif
+ flush_spin_table(spin_table);
out_be32(&spin_table->pir, hw_cpu);
out_be32(&spin_table->addr_l, __pa(__early_start));
-
- if (!ioremappable)
- flush_dcache_range((ulong)spin_table,
- (ulong)spin_table + sizeof(struct epapr_spin_table));
+ flush_spin_table(spin_table);
/* Wait a bit for the CPU to ack. */
if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu,
@@ -213,13 +238,11 @@ out:
#else
smp_generic_kick_cpu(nr);
+ flush_spin_table(spin_table);
out_be32(&spin_table->pir, hw_cpu);
out_be64((u64 *)(&spin_table->addr_h),
__pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
-
- if (!ioremappable)
- flush_dcache_range((ulong)spin_table,
- (ulong)spin_table + sizeof(struct epapr_spin_table));
+ flush_spin_table(spin_table);
#endif
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index a817398a56d..04d9d317f74 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -353,5 +353,7 @@ define_machine(mpc86xx_hpcd) {
.time_init = mpc86xx_time_init,
.calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
+#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
};
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e7a896acd98..48a920d5148 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -90,6 +90,7 @@ config MPIC
config PPC_EPAPR_HV_PIC
bool
default n
+ select EPAPR_PARAVIRT
config MPIC_WEIRD
bool
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index abc8af43ea7..173568140a3 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -401,11 +401,11 @@ error:
} else {
if (config && *config) {
size = 256;
- free_bootmem((unsigned long)(*config), size);
+ free_bootmem(__pa(*config), size);
}
if (res && *res) {
size = sizeof(struct celleb_pci_resource);
- free_bootmem((unsigned long)(*res), size);
+ free_bootmem(__pa(*res), size);
}
}
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index dca21366674..e56bb651da1 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -728,7 +728,7 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
nid, np->full_name);
/* XXX todo: If we can have multiple windows on the same IOMMU, which
- * isn't the case today, we probably want here to check wether the
+ * isn't the case today, we probably want here to check whether the
* iommu for that node is already setup.
* However, there might be issue with getting the size right so let's
* ignore that for now. We might want to completely get rid of the
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index d8b7cc8a66c..8e299447127 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -148,7 +148,7 @@ static int spider_set_irq_type(struct irq_data *d, unsigned int type)
/* Configure the source. One gross hack that was there before and
* that I've kept around is the priority to the BE which I set to
- * be the same as the interrupt source number. I don't know wether
+ * be the same as the interrupt source number. I don't know whether
* that's supposed to make any kind of sense however, we'll have to
* decide that, but for now, I'm not changing the behaviour.
*/
@@ -220,7 +220,7 @@ static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
/* For hooking up the cascace we have a problem. Our device-tree is
* crap and we don't know on which BE iic interrupt we are hooked on at
* least not the "standard" way. We can reconstitute it based on two
- * informations though: which BE node we are connected to and wether
+ * informations though: which BE node we are connected to and whether
* we are connected to IOIF0 or IOIF1. Right now, we really only care
* about the IBM cell blade and we know that its firmware gives us an
* interrupt-map property which is pretty strange.
@@ -232,7 +232,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
int imaplen, intsize, unit;
struct device_node *iic;
- /* First, we check wether we have a real "interrupts" in the device
+ /* First, we check whether we have a real "interrupts" in the device
* tree in case the device-tree is ever fixed
*/
struct of_irq oirq;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 965d381abd7..25db92a8e1c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
LOAD_INT(c), LOAD_FRAC(c),
count_active_contexts(),
atomic_read(&nr_spu_contexts),
- current->nsproxy->pid_ns->last_pid);
+ task_active_pid_ns(current)->last_pid);
return 0;
}
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 64171198535..311b804353b 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -55,6 +55,7 @@ static unsigned int low_freq;
static unsigned int hi_freq;
static unsigned int cur_freq;
static unsigned int sleep_freq;
+static unsigned long transition_latency;
/*
* Different models uses different mechanisms to switch the frequency
@@ -403,7 +404,7 @@ static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (policy->cpu != 0)
return -ENODEV;
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cpuinfo.transition_latency = transition_latency;
policy->cur = cur_freq;
cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
@@ -658,12 +659,14 @@ static int __init pmac_cpufreq_setup(void)
if (!value)
goto out;
cur_freq = (*value) / 1000;
+ transition_latency = CPUFREQ_ETERNAL;
/* Check for 7447A based MacRISC3 */
if (of_machine_is_compatible("MacRISC3") &&
of_get_property(cpunode, "dynamic-power-step", NULL) &&
PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
pmac_cpufreq_init_7447A(cpunode);
+ transition_latency = 8000000;
/* Check for other MacRISC3 machines */
} else if (of_machine_is_compatible("PowerBook3,4") ||
of_machine_is_compatible("PowerBook3,5") ||
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index b0c3777528a..d588e48dff7 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -686,7 +686,7 @@ static int pmf_add_functions(struct pmf_device *dev, void *driverdata)
int count = 0;
for (pp = dev->node->properties; pp != 0; pp = pp->next) {
- char *name;
+ const char *name;
if (strncmp(pp->name, PP_PREFIX, plen) != 0)
continue;
name = pp->name + plen;
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index c4e630576ff..31036b56670 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -529,7 +529,7 @@ static int __init pmac_pic_probe_mpic(void)
void __init pmac_pic_init(void)
{
/* We configure the OF parsing based on our oldworld vs. newworld
- * platform type and wether we were booted by BootX.
+ * platform type and whether we were booted by BootX.
*/
#ifdef CONFIG_PPC32
if (!pmac_newworld)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 471aa3ccd9f..53d052e95cf 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -34,24 +34,12 @@
#include "powernv.h"
#include "pci.h"
-static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
- struct va_format *vaf)
-{
- char pfix[32];
-
- if (pe->pdev)
- strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
- else
- sprintf(pfix, "%04x:%02x ",
- pci_domain_nr(pe->pbus), pe->pbus->number);
- return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
-}
-
#define define_pe_printk_level(func, kern_level) \
static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
{ \
struct va_format vaf; \
va_list args; \
+ char pfix[32]; \
int r; \
\
va_start(args, fmt); \
@@ -59,7 +47,16 @@ static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
vaf.fmt = fmt; \
vaf.va = &args; \
\
- r = __pe_printk(kern_level, pe, &vaf); \
+ if (pe->pdev) \
+ strlcpy(pfix, dev_name(&pe->pdev->dev), \
+ sizeof(pfix)); \
+ else \
+ sprintf(pfix, "%04x:%02x ", \
+ pci_domain_nr(pe->pbus), \
+ pe->pbus->number); \
+ r = printk(kern_level "pci %s: [PE# %.3d] %pV", \
+ pfix, pe->pe_number, &vaf); \
+ \
va_end(args); \
\
return r; \
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 56d26bc4fd4..09787139834 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -280,13 +280,13 @@ static void os_area_set_property(struct device_node *node,
if (tmp) {
pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
- prom_remove_property(node, tmp);
+ of_remove_property(node, tmp);
}
- result = prom_add_property(node, prop);
+ result = of_add_property(node, prop);
if (result)
- pr_debug("%s:%d prom_set_property failed\n", __func__,
+ pr_debug("%s:%d of_set_property failed\n", __func__,
__LINE__);
}
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 0f1b706506e..a1a7b9a67ff 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -13,17 +13,16 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/notifier.h>
-#include <linux/proc_fs.h>
#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <linux/slab.h>
+#include <linux/of.h>
#include "offline_states.h"
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/uaccess.h>
#include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
struct cc_workarea {
u32 drc_index;
@@ -255,9 +254,6 @@ static struct device_node *derive_parent(const char *path)
int dlpar_attach_node(struct device_node *dn)
{
-#ifdef CONFIG_PROC_DEVICETREE
- struct proc_dir_entry *ent;
-#endif
int rc;
of_node_set_flag(dn, OF_DYNAMIC);
@@ -266,44 +262,26 @@ int dlpar_attach_node(struct device_node *dn)
if (!dn->parent)
return -ENOMEM;
- rc = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, dn);
+ rc = of_attach_node(dn);
if (rc) {
printk(KERN_ERR "Failed to add device node %s\n",
dn->full_name);
return rc;
}
- of_attach_node(dn);
-
-#ifdef CONFIG_PROC_DEVICETREE
- ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
- if (ent)
- proc_device_tree_add_node(dn, ent);
-#endif
-
of_node_put(dn->parent);
return 0;
}
int dlpar_detach_node(struct device_node *dn)
{
-#ifdef CONFIG_PROC_DEVICETREE
- struct device_node *parent = dn->parent;
- struct property *prop = dn->properties;
-
- while (prop) {
- remove_proc_entry(prop->name, dn->pde);
- prop = prop->next;
- }
+ int rc;
- if (dn->pde)
- remove_proc_entry(dn->pde->name, parent->pde);
-#endif
+ rc = of_detach_node(dn);
+ if (rc)
+ return rc;
- pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, dn);
- of_detach_node(dn);
of_node_put(dn); /* Must decrement the refcount */
-
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 0b0eff0cce3..7b56118f531 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -56,6 +56,7 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = {
{FW_FEATURE_MULTITCE, "hcall-multi-tce"},
{FW_FEATURE_SPLPAR, "hcall-splpar"},
{FW_FEATURE_VPHN, "hcall-vphn"},
+ {FW_FEATURE_SET_MODE, "hcall-set-mode"},
};
/* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 64c97d8ac0c..a38956269fb 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -23,12 +23,12 @@
#include <linux/delay.h>
#include <linux/sched.h> /* for idle_task_exit */
#include <linux/cpu.h>
+#include <linux/of.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/xics.h>
#include "plpar_wrappers.h"
#include "offline_states.h"
@@ -333,10 +333,10 @@ static int pseries_smp_notifier(struct notifier_block *nb,
int err = 0;
switch (action) {
- case PSERIES_RECONFIG_ADD:
+ case OF_RECONFIG_ATTACH_NODE:
err = pseries_add_processor(node);
break;
- case PSERIES_RECONFIG_REMOVE:
+ case OF_RECONFIG_DETACH_NODE:
pseries_remove_processor(node);
break;
}
@@ -399,7 +399,7 @@ static int __init pseries_cpu_hotplug_init(void)
/* Processors can be added/removed only on LPAR */
if (firmware_has_feature(FW_FEATURE_LPAR)) {
- pSeries_reconfig_notifier_register(&pseries_smp_nb);
+ of_reconfig_notifier_register(&pseries_smp_nb);
cpu_maps_update_begin();
if (cede_offline_enabled && parse_cede_parameters() == 0) {
default_offline_state = CPU_STATE_INACTIVE;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ecdb0a6b317..2372c609fa2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -16,7 +16,6 @@
#include <asm/firmware.h>
#include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/sparsemem.h>
static unsigned long get_memblock_size(void)
@@ -187,42 +186,69 @@ static int pseries_add_memory(struct device_node *np)
return (ret < 0) ? -EINVAL : 0;
}
-static int pseries_drconf_memory(unsigned long *base, unsigned int action)
+static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
{
+ struct of_drconf_cell *new_drmem, *old_drmem;
unsigned long memblock_size;
- int rc;
+ u32 entries;
+ u32 *p;
+ int i, rc = -EINVAL;
memblock_size = get_memblock_size();
if (!memblock_size)
return -EINVAL;
- if (action == PSERIES_DRCONF_MEM_ADD) {
- rc = memblock_add(*base, memblock_size);
- rc = (rc < 0) ? -EINVAL : 0;
- } else if (action == PSERIES_DRCONF_MEM_REMOVE) {
- rc = pseries_remove_memblock(*base, memblock_size);
- } else {
- rc = -EINVAL;
+ p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+ if (!p)
+ return -EINVAL;
+
+ /* The first int of the property is the number of lmb's described
+ * by the property. This is followed by an array of of_drconf_cell
+ * entries. Get the niumber of entries and skip to the array of
+ * of_drconf_cell's.
+ */
+ entries = *p++;
+ old_drmem = (struct of_drconf_cell *)p;
+
+ p = (u32 *)pr->prop->value;
+ p++;
+ new_drmem = (struct of_drconf_cell *)p;
+
+ for (i = 0; i < entries; i++) {
+ if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) &&
+ (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) {
+ rc = pseries_remove_memblock(old_drmem[i].base_addr,
+ memblock_size);
+ break;
+ } else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) &&
+ (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) {
+ rc = memblock_add(old_drmem[i].base_addr,
+ memblock_size);
+ rc = (rc < 0) ? -EINVAL : 0;
+ break;
+ }
}
return rc;
}
static int pseries_memory_notifier(struct notifier_block *nb,
- unsigned long action, void *node)
+ unsigned long action, void *node)
{
+ struct of_prop_reconfig *pr;
int err = 0;
switch (action) {
- case PSERIES_RECONFIG_ADD:
+ case OF_RECONFIG_ATTACH_NODE:
err = pseries_add_memory(node);
break;
- case PSERIES_RECONFIG_REMOVE:
+ case OF_RECONFIG_DETACH_NODE:
err = pseries_remove_memory(node);
break;
- case PSERIES_DRCONF_MEM_ADD:
- case PSERIES_DRCONF_MEM_REMOVE:
- err = pseries_drconf_memory(node, action);
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ pr = (struct of_prop_reconfig *)node;
+ if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
+ err = pseries_update_drconf_memory(pr);
break;
}
return notifier_from_errno(err);
@@ -235,7 +261,7 @@ static struct notifier_block pseries_mem_nb = {
static int __init pseries_memory_hotplug_init(void)
{
if (firmware_has_feature(FW_FEATURE_LPAR))
- pSeries_reconfig_notifier_register(&pseries_mem_nb);
+ of_reconfig_notifier_register(&pseries_mem_nb);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6153eea27ce..e2685badb5d 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -36,13 +36,13 @@
#include <linux/dma-mapping.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
+#include <linux/of.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/firmware.h>
#include <asm/tce.h>
#include <asm/ppc-pci.h>
@@ -760,7 +760,7 @@ static void remove_ddw(struct device_node *np)
__remove_ddw(np, ddw_avail, liobn);
delprop:
- ret = prom_remove_property(np, win64);
+ ret = of_remove_property(np, win64);
if (ret)
pr_warning("%s: failed to remove direct window property: %d\n",
np->full_name, ret);
@@ -1070,7 +1070,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
goto out_free_window;
}
- ret = prom_add_property(pdn, win64);
+ ret = of_add_property(pdn, win64);
if (ret) {
dev_err(&dev->dev, "unable to add dma window property for %s: %d",
pdn->full_name, ret);
@@ -1294,7 +1294,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
struct direct_window *window;
switch (action) {
- case PSERIES_RECONFIG_REMOVE:
+ case OF_RECONFIG_DETACH_NODE:
if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
@@ -1357,7 +1357,7 @@ void iommu_init_early_pSeries(void)
}
- pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+ of_reconfig_notifier_register(&iommu_reconfig_nb);
register_memory_notifier(&iommu_mem_nb);
set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index dd30b12edfe..6573808cc5f 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -116,7 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
}
if (!more) {
- prom_update_property(dn, new_prop);
+ of_update_property(dn, new_prop);
new_prop = NULL;
}
@@ -172,7 +172,7 @@ static int update_dt_node(u32 phandle)
case 0x80000000:
prop = of_find_property(dn, prop_name, NULL);
- prom_remove_property(dn, prop);
+ of_remove_property(dn, prop);
prop = NULL;
break;
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 13e8cc43adf..e6cc34a6705 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -273,4 +273,35 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
lbuf[1]);
}
+/* Set various resource mode parameters */
+static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
+ unsigned long value1, unsigned long value2)
+{
+ return plpar_hcall_norets(H_SET_MODE, mflags, resource, value1, value2);
+}
+
+/*
+ * Enable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_reloc_on_exceptions(void)
+{
+ /* mflags = 3: Exceptions at 0xC000000000004000 */
+ return plpar_set_mode(3, 3, 0, 0);
+}
+
+/*
+ * Disable relocation on exceptions on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long disable_reloc_on_exceptions(void) {
+ return plpar_set_mode(0, 3, 0, 0);
+}
+
#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 45d00e5fe14..4d806b41960 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -36,7 +36,7 @@ static struct cpuidle_state *cpuidle_state_table;
static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
{
- *kt_before = ktime_get_real();
+ *kt_before = ktime_get();
*in_purr = mfspr(SPRN_PURR);
/*
* Indicate to the HV that we are idle. Now would be
@@ -50,7 +50,7 @@ static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before)
get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
get_lppaca()->idle = 0;
- return ktime_to_us(ktime_sub(ktime_get_real(), kt_before));
+ return ktime_to_us(ktime_sub(ktime_get(), kt_before));
}
static int snooze_loop(struct cpuidle_device *dev,
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 39f71fba9b3..d6491bd481d 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -16,55 +16,13 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
+#include <linux/of.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/uaccess.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/mmu.h>
-
-
-/*
- * Routines for "runtime" addition and removal of device tree nodes.
- */
-#ifdef CONFIG_PROC_DEVICETREE
-/*
- * Add a node to /proc/device-tree.
- */
-static void add_node_proc_entries(struct device_node *np)
-{
- struct proc_dir_entry *ent;
-
- ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde);
- if (ent)
- proc_device_tree_add_node(np, ent);
-}
-
-static void remove_node_proc_entries(struct device_node *np)
-{
- struct property *pp = np->properties;
- struct device_node *parent = np->parent;
-
- while (pp) {
- remove_proc_entry(pp->name, np->pde);
- pp = pp->next;
- }
- if (np->pde)
- remove_proc_entry(np->pde->name, parent->pde);
-}
-#else /* !CONFIG_PROC_DEVICETREE */
-static void add_node_proc_entries(struct device_node *np)
-{
- return;
-}
-
-static void remove_node_proc_entries(struct device_node *np)
-{
- return;
-}
-#endif /* CONFIG_PROC_DEVICETREE */
-
/**
* derive_parent - basically like dirname(1)
* @path: the full_name of a node to be added to the tree
@@ -97,28 +55,6 @@ static struct device_node *derive_parent(const char *path)
return parent;
}
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
-
-int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
-
-void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
-{
- blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
-
-int pSeries_reconfig_notify(unsigned long action, void *p)
-{
- int err = blocking_notifier_call_chain(&pSeries_reconfig_chain,
- action, p);
-
- return notifier_to_errno(err);
-}
-
static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
{
struct device_node *np;
@@ -142,16 +78,12 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
goto out_err;
}
- err = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, np);
+ err = of_attach_node(np);
if (err) {
printk(KERN_ERR "Failed to add device node %s\n", path);
goto out_err;
}
- of_attach_node(np);
-
- add_node_proc_entries(np);
-
of_node_put(np->parent);
return 0;
@@ -179,11 +111,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
return -EBUSY;
}
- remove_node_proc_entries(np);
-
- pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, np);
of_detach_node(np);
-
of_node_put(parent);
of_node_put(np); /* Must decrement the refcount */
return 0;
@@ -281,12 +209,11 @@ static struct property *new_property(const char *name, const int length,
if (!new)
return NULL;
- if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL)))
+ if (!(new->name = kstrdup(name, GFP_KERNEL)))
goto cleanup;
if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
goto cleanup;
- strcpy(new->name, name);
memcpy(new->value, value, length);
*(((char *)new->value) + length) = 0;
new->length = length;
@@ -398,7 +325,7 @@ static int do_add_property(char *buf, size_t bufsize)
if (!prop)
return -ENOMEM;
- prom_add_property(np, prop);
+ of_add_property(np, prop);
return 0;
}
@@ -422,16 +349,15 @@ static int do_remove_property(char *buf, size_t bufsize)
prop = of_find_property(np, buf, NULL);
- return prom_remove_property(np, prop);
+ return of_remove_property(np, prop);
}
static int do_update_property(char *buf, size_t bufsize)
{
struct device_node *np;
- struct pSeries_reconfig_prop_update upd_value;
unsigned char *value;
char *name, *end, *next_prop;
- int rc, length;
+ int length;
struct property *newprop;
buf = parse_node(buf, bufsize, &np);
end = buf + bufsize;
@@ -453,41 +379,7 @@ static int do_update_property(char *buf, size_t bufsize)
if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
slb_set_size(*(int *)value);
- upd_value.node = np;
- upd_value.property = newprop;
- pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
-
- rc = prom_update_property(np, newprop);
- if (rc)
- return rc;
-
- /* For memory under the ibm,dynamic-reconfiguration-memory node
- * of the device tree, adding and removing memory is just an update
- * to the ibm,dynamic-memory property instead of adding/removing a
- * memory node in the device tree. For these cases we still need to
- * involve the notifier chain.
- */
- if (!strcmp(name, "ibm,dynamic-memory")) {
- int action;
-
- next_prop = parse_next_property(next_prop, end, &name,
- &length, &value);
- if (!next_prop)
- return -EINVAL;
-
- if (!strcmp(name, "add"))
- action = PSERIES_DRCONF_MEM_ADD;
- else
- action = PSERIES_DRCONF_MEM_REMOVE;
-
- rc = pSeries_reconfig_notify(action, value);
- if (rc) {
- prom_update_property(np, newprop);
- return rc;
- }
- }
-
- return 0;
+ return of_update_property(np, newprop);
}
/**
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e3cb7ae6165..ca55882465d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -40,6 +40,8 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/cpuidle.h>
+#include <linux/of.h>
+#include <linux/kexec.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -63,7 +65,6 @@
#include <asm/smp.h>
#include <asm/firmware.h>
#include <asm/eeh.h>
-#include <asm/pSeries_reconfig.h>
#include "plpar_wrappers.h"
#include "pseries.h"
@@ -258,7 +259,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
int err = NOTIFY_OK;
switch (action) {
- case PSERIES_RECONFIG_ADD:
+ case OF_RECONFIG_ATTACH_NODE:
pci = np->parent->data;
if (pci) {
update_dn_pci_info(np, pci->phb);
@@ -367,6 +368,65 @@ static void pSeries_idle(void)
}
}
+/*
+ * Enable relocation on during exceptions. This has partition wide scope and
+ * may take a while to complete, if it takes longer than one second we will
+ * just give up rather than wasting any more time on this - if that turns out
+ * to ever be a problem in practice we can move this into a kernel thread to
+ * finish off the process later in boot.
+ */
+static int __init pSeries_enable_reloc_on_exc(void)
+{
+ long rc;
+ unsigned int delay, total_delay = 0;
+
+ while (1) {
+ rc = enable_reloc_on_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ return rc;
+
+ delay = get_longbusy_msecs(rc);
+ total_delay += delay;
+ if (total_delay > 1000) {
+ pr_warn("Warning: Giving up waiting to enable "
+ "relocation on exceptions (%u msec)!\n",
+ total_delay);
+ return rc;
+ }
+
+ mdelay(delay);
+ }
+}
+
+#ifdef CONFIG_KEXEC
+static long pSeries_disable_reloc_on_exc(void)
+{
+ long rc;
+
+ while (1) {
+ rc = disable_reloc_on_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ return rc;
+ mdelay(get_longbusy_msecs(rc));
+ }
+}
+
+static void pSeries_machine_kexec(struct kimage *image)
+{
+ long rc;
+
+ if (firmware_has_feature(FW_FEATURE_SET_MODE) &&
+ (image->type != KEXEC_TYPE_CRASH)) {
+ rc = pSeries_disable_reloc_on_exc();
+ if (rc != H_SUCCESS)
+ pr_warning("Warning: Failed to disable relocation on "
+ "exceptions: %ld\n", rc);
+ }
+
+ default_machine_kexec(image);
+}
+#endif
+
static void __init pSeries_setup_arch(void)
{
panic_timeout = 10;
@@ -389,7 +449,7 @@ static void __init pSeries_setup_arch(void)
/* Find and initialize PCI host bridges */
init_pci_config_tokens();
find_and_init_phbs();
- pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+ of_reconfig_notifier_register(&pci_dn_reconfig_nb);
pSeries_nvram_init();
@@ -402,6 +462,14 @@ static void __init pSeries_setup_arch(void)
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
else
ppc_md.enable_pmcs = power4_enable_pmcs;
+
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ long rc;
+ if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) {
+ pr_warn("Unable to enable relocation on exceptions: "
+ "%ld\n", rc);
+ }
+ }
}
static int __init pSeries_init_panel(void)
@@ -659,4 +727,7 @@ define_machine(pseries) {
.progress = rtas_progress,
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_exception = pSeries_machine_check_exception,
+#ifdef CONFIG_KEXEC
+ .machine_kexec = pSeries_machine_kexec,
+#endif
};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 71706bc34a0..9fc0a494190 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -38,7 +38,6 @@
#include <asm/cputable.h>
#include <asm/firmware.h>
#include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/mpic.h>
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index 02cf1e7e77f..0eb871cc343 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -1,7 +1,7 @@
/*
* Freescale General-purpose Timers Module
*
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright (c) Freescale Semiconductor, Inc. 2006.
* Shlomi Gridish <gridish@freescale.com>
* Jerry Huang <Chang-Ming.Huang@freescale.com>
* Copyright (c) MontaVista Software, Inc. 2008.
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 51ffafae561..63c5f04ea58 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
u32 intr_index;
u32 have_shift = 0;
struct fsl_msi_cascade_data *cascade_data;
- unsigned int ret;
cascade_data = irq_get_handler_data(irq);
msi_data = cascade_data->msi_data;
@@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
case FSL_PIC_IP_IPIC:
msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
break;
- case FSL_PIC_IP_VMPIC:
+#ifdef CONFIG_EPAPR_PARAVIRT
+ case FSL_PIC_IP_VMPIC: {
+ unsigned int ret;
ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
if (ret) {
pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
@@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
}
break;
}
+#endif
+ }
while (msir_value) {
intr_index = ffs(msir_value) - 1;
@@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = {
.compatible = "fsl,ipic-msi",
.data = &ipic_msi_feature,
},
+#ifdef CONFIG_EPAPR_PARAVIRT
{
.compatible = "fsl,vmpic-msi",
.data = &vmpic_msi_feature,
},
+#endif
{}
};
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ffb93ae9379..5ba325bff3a 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -89,7 +89,7 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
return 0;
}
-static int __init setup_one_atmu(struct ccsr_pci __iomem *pci,
+static int setup_one_atmu(struct ccsr_pci __iomem *pci,
unsigned int index, const struct resource *res,
resource_size_t offset)
{
@@ -126,7 +126,7 @@ static int __init setup_one_atmu(struct ccsr_pci __iomem *pci,
}
/* atmu setup for fsl pci/pcie controller */
-static void __init setup_pci_atmu(struct pci_controller *hose,
+static void setup_pci_atmu(struct pci_controller *hose,
struct resource *rsrc)
{
struct ccsr_pci __iomem *pci;
@@ -136,7 +136,7 @@ static void __init setup_pci_atmu(struct pci_controller *hose,
u32 pcicsrbar = 0, pcicsrbar_sz;
u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
- char *name = hose->dn->full_name;
+ const char *name = hose->dn->full_name;
const u64 *reg;
int len;
@@ -902,9 +902,42 @@ static int __devinit fsl_pci_probe(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_PM
+static int fsl_pci_resume(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct resource pci_rsrc;
+
+ hose = pci_find_hose_for_OF_device(dev->of_node);
+ if (!hose)
+ return -ENODEV;
+
+ if (of_address_to_resource(dev->of_node, 0, &pci_rsrc)) {
+ dev_err(dev, "Get pci register base failed.");
+ return -ENODEV;
+ }
+
+ setup_pci_atmu(hose, &pci_rsrc);
+
+ return 0;
+}
+
+static const struct dev_pm_ops pci_pm_ops = {
+ .resume = fsl_pci_resume,
+};
+
+#define PCI_PM_OPS (&pci_pm_ops)
+
+#else
+
+#define PCI_PM_OPS NULL
+
+#endif
+
static struct platform_driver fsl_pci_driver = {
.driver = {
.name = "fsl-pci",
+ .pm = PCI_PM_OPS,
.of_match_table = pci_ids,
},
.probe = fsl_pci_probe,
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index c449dbd1c93..97118dc3d28 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops;
EXPORT_SYMBOL(diu_ops);
#endif
+#ifdef CONFIG_EPAPR_PARAVIRT
/*
* Restart the current partition
*
@@ -278,3 +279,4 @@ void fsl_hv_halt(void)
pr_info("hv exit\n");
fh_partition_stop(-1);
}
+#endif
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 8f0465422b1..5aaf86c0389 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -214,18 +214,7 @@ static struct platform_driver pmi_of_platform_driver = {
.of_match_table = pmi_match,
},
};
-
-static int __init pmi_module_init(void)
-{
- return platform_driver_register(&pmi_of_platform_driver);
-}
-module_init(pmi_module_init);
-
-static void __exit pmi_module_exit(void)
-{
- platform_driver_unregister(&pmi_of_platform_driver);
-}
-module_exit(pmi_module_exit);
+module_platform_driver(pmi_of_platform_driver);
int pmi_send_message(pmi_message_t msg)
{
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index b0436752972..238a07b97f2 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006-2010 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 2fba6ef2f95..b2b87c30e26 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -1,7 +1,7 @@
/*
* arch/powerpc/sysdev/qe_lib/qe_ic.c
*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Li Yang <leoli@freescale.com>
* Based on code from Shlomi Gridish <gridish@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.h b/arch/powerpc/sysdev/qe_lib/qe_ic.h
index c327872ed35..efef7ab9b75 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.h
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.h
@@ -3,7 +3,7 @@
*
* QUICC ENGINE Interrupt Controller Header
*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Li Yang <leoli@freescale.com>
* Based on code from Shlomi Gridish <gridish@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c
index fd1a6c3b172..a88807b3dd5 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_io.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_io.c
@@ -3,7 +3,7 @@
*
* QE Parallel I/O ports configuration routines
*
- * Copyright (C) Freescale Semicondutor, Inc. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Li Yang <LeoLi@freescale.com>
* Based on code from Shlomi Gridish <gridish@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c
index 04677505f20..134b07d2943 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc.c
@@ -3,7 +3,7 @@
*
* QE UCC API Set - UCC specific routines implementations.
*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c
index fba02440d12..cceb2e36673 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_fast.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
index 524c0ead941..1c062f48f1a 100644
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
diff --git a/arch/powerpc/sysdev/qe_lib/usb.c b/arch/powerpc/sysdev/qe_lib/usb.c
index 9162828f5da..27f23bd15eb 100644
--- a/arch/powerpc/sysdev/qe_lib/usb.c
+++ b/arch/powerpc/sysdev/qe_lib/usb.c
@@ -1,7 +1,7 @@
/*
* QE USB routines
*
- * Copyright (c) Freescale Semicondutor, Inc. 2006.
+ * Copyright 2006 Freescale Semiconductor, Inc.
* Shlomi Gridish <gridish@freescale.com>
* Jerry Huang <Chang-Ming.Huang@freescale.com>
* Copyright (c) MontaVista Software, Inc. 2008.
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 702256a1ca1..9193e12df69 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -157,7 +157,7 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
ent->map = SCOM_MAP_INVALID;
spin_lock_init(&ent->lock);
snprintf(ent->name, 8, "scom%d", i);
- ent->blob.data = dn->full_name;
+ ent->blob.data = (void*) dn->full_name;
ent->blob.size = strlen(dn->full_name);
dir = debugfs_create_dir(ent->name, root);
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index c168c54e3c4..b49fdbd1580 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -6,7 +6,7 @@ GCOV_PROFILE := n
ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
-obj-y += xmon.o start.o nonstdio.o
+obj-y += xmon.o nonstdio.o
ifdef CONFIG_XMON_DISASSEMBLY
obj-y += ppc-dis.o ppc-opc.o
diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
index bfac84fbe78..bce3dcfe505 100644
--- a/arch/powerpc/xmon/nonstdio.c
+++ b/arch/powerpc/xmon/nonstdio.c
@@ -7,9 +7,23 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
+#include <asm/udbg.h>
#include <asm/time.h>
#include "nonstdio.h"
+
+static int xmon_write(const void *ptr, int nb)
+{
+ return udbg_write(ptr, nb);
+}
+
+static int xmon_readchar(void)
+{
+ if (udbg_getc)
+ return udbg_getc();
+ return -1;
+}
+
int xmon_putchar(int c)
{
char ch = c;
@@ -23,34 +37,7 @@ static char line[256];
static char *lineptr;
static int lineleft;
-int xmon_expect(const char *str, unsigned long timeout)
-{
- int c;
- unsigned long t0;
-
- /* assume 25MHz default timebase if tb_ticks_per_sec not set yet */
- timeout *= tb_ticks_per_sec? tb_ticks_per_sec: 25000000;
- t0 = get_tbl();
- do {
- lineptr = line;
- for (;;) {
- c = xmon_read_poll();
- if (c == -1) {
- if (get_tbl() - t0 > timeout)
- return 0;
- continue;
- }
- if (c == '\n')
- break;
- if (c != '\r' && lineptr < &line[sizeof(line) - 1])
- *lineptr++ = c;
- }
- *lineptr = 0;
- } while (strstr(line, str) == NULL);
- return 1;
-}
-
-int xmon_getchar(void)
+static int xmon_getchar(void)
{
int c;
@@ -124,13 +111,19 @@ char *xmon_gets(char *str, int nb)
void xmon_printf(const char *format, ...)
{
va_list args;
- int n;
static char xmon_outbuf[1024];
+ int rc, n;
va_start(args, format);
n = vsnprintf(xmon_outbuf, sizeof(xmon_outbuf), format, args);
va_end(args);
- xmon_write(xmon_outbuf, n);
+
+ rc = xmon_write(xmon_outbuf, n);
+
+ if (n && rc == 0) {
+ /* No udbg hooks, fallback to printk() - dangerous */
+ printk(xmon_outbuf);
+ }
}
void xmon_puts(const char *str)
diff --git a/arch/powerpc/xmon/nonstdio.h b/arch/powerpc/xmon/nonstdio.h
index 23dd95f4599..18a51ded4ff 100644
--- a/arch/powerpc/xmon/nonstdio.h
+++ b/arch/powerpc/xmon/nonstdio.h
@@ -4,12 +4,6 @@
#define putchar xmon_putchar
extern int xmon_putchar(int c);
-extern int xmon_getchar(void);
extern void xmon_puts(const char *);
extern char *xmon_gets(char *, int);
extern void xmon_printf(const char *, ...);
-extern void xmon_map_scc(void);
-extern int xmon_expect(const char *str, unsigned long timeout);
-extern int xmon_write(const void *ptr, int nb);
-extern int xmon_readchar(void);
-extern int xmon_read_poll(void);
diff --git a/arch/powerpc/xmon/start.c b/arch/powerpc/xmon/start.c
deleted file mode 100644
index 8864de2af38..00000000000
--- a/arch/powerpc/xmon/start.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-#include "nonstdio.h"
-
-void xmon_map_scc(void)
-{
-}
-
-int xmon_write(const void *ptr, int nb)
-{
- return udbg_write(ptr, nb);
-}
-
-int xmon_readchar(void)
-{
- if (udbg_getc)
- return udbg_getc();
- return -1;
-}
-
-int xmon_read_poll(void)
-{
- if (udbg_getc_poll)
- return udbg_getc_poll();
- return -1;
-}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 3a56a639a92..1f8d2f10a43 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -52,9 +52,6 @@
#include "nonstdio.h"
#include "dis-asm.h"
-#define scanhex xmon_scanhex
-#define skipbl xmon_skipbl
-
#ifdef CONFIG_SMP
static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
static unsigned long xmon_taken = 1;
@@ -169,12 +166,8 @@ extern void xmon_leave(void);
#ifdef CONFIG_PPC64
#define REG "%.16lx"
-#define REGS_PER_LINE 4
-#define LAST_VOLATILE 13
#else
#define REG "%.8lx"
-#define REGS_PER_LINE 8
-#define LAST_VOLATILE 12
#endif
#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
@@ -1288,27 +1281,19 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp,
catch_memory_errors = 0;
}
-static int xmon_depth_to_print = 64;
-
#define LRSAVE_OFFSET (STACK_FRAME_LR_SAVE * sizeof(unsigned long))
#define MARKER_OFFSET (STACK_FRAME_MARKER * sizeof(unsigned long))
-#ifdef __powerpc64__
-#define REGS_OFFSET 0x70
-#else
-#define REGS_OFFSET 16
-#endif
-
static void xmon_show_stack(unsigned long sp, unsigned long lr,
unsigned long pc)
{
+ int max_to_print = 64;
unsigned long ip;
unsigned long newsp;
unsigned long marker;
- int count = 0;
struct pt_regs regs;
- do {
+ while (max_to_print--) {
if (sp < PAGE_OFFSET) {
if (sp != 0)
printf("SP (%lx) is in userspace\n", sp);
@@ -1362,10 +1347,10 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
an exception frame. */
if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
&& marker == STACK_FRAME_REGS_MARKER) {
- if (mread(sp + REGS_OFFSET, &regs, sizeof(regs))
+ if (mread(sp + STACK_FRAME_OVERHEAD, &regs, sizeof(regs))
!= sizeof(regs)) {
printf("Couldn't read registers at %lx\n",
- sp + REGS_OFFSET);
+ sp + STACK_FRAME_OVERHEAD);
break;
}
printf("--- Exception: %lx %s at ", regs.trap,
@@ -1379,7 +1364,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
break;
sp = newsp;
- } while (count++ < xmon_depth_to_print);
+ }
}
static void backtrace(struct pt_regs *excp)
@@ -2943,7 +2928,6 @@ static void xmon_init(int enable)
__debugger_dabr_match = NULL;
__debugger_fault_handler = NULL;
}
- xmon_map_scc();
}
#ifdef CONFIG_MAGIC_SYSRQ