summaryrefslogtreecommitdiff
path: root/hw/i386
diff options
context:
space:
mode:
authorSeokYeon Hwang <syeon.hwang@samsung.com>2016-12-20 10:13:15 +0900
committerSeokYeon Hwang <syeon.hwang@samsung.com>2016-12-20 10:13:15 +0900
commitdc36664b156b6aa2b55f2bca5fd0c643b6417ddb (patch)
treebb319daf3cd759c2d91dd541bb2ee24d8ca4ee1a /hw/i386
parent100d9fdc18f28d813f9d22025d783a7cdcc4bb4b (diff)
parent6a928d25b6d8bc3729c3d28326c6db13b9481059 (diff)
downloadqemu-dc36664b156b6aa2b55f2bca5fd0c643b6417ddb.tar.gz
qemu-dc36664b156b6aa2b55f2bca5fd0c643b6417ddb.tar.bz2
qemu-dc36664b156b6aa2b55f2bca5fd0c643b6417ddb.zip
Merge tag 'v2.8.0-rc4' into develop
v2.8.0-rc4 release Change-Id: I0158b5078d1af545dc32a51f10d2f8f0b96543a6 Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Diffstat (limited to 'hw/i386')
-rw-r--r--hw/i386/Makefile.objs1
-rw-r--r--hw/i386/acpi-build.c214
-rw-r--r--hw/i386/amd_iommu.c1212
-rw-r--r--hw/i386/amd_iommu.h289
-rw-r--r--hw/i386/intel_iommu.c119
-rw-r--r--hw/i386/intel_iommu_internal.h2
-rw-r--r--hw/i386/kvm/apic.c66
-rw-r--r--hw/i386/kvm/i8259.c2
-rw-r--r--hw/i386/kvm/pci-assign.c4
-rw-r--r--hw/i386/kvmvapic.c20
-rw-r--r--hw/i386/pc.c185
-rw-r--r--hw/i386/pc_piix.c33
-rw-r--r--hw/i386/pc_q35.c30
-rw-r--r--hw/i386/trace-events33
-rw-r--r--hw/i386/x86-iommu.c6
-rw-r--r--hw/i386/xen/xen_apic.c6
-rw-r--r--hw/i386/xen/xen_platform.c33
17 files changed, 2040 insertions, 215 deletions
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 90e94ffefd..909ead6a77 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += multiboot.o
obj-y += pc.o pc_piix.o pc_q35.o
obj-y += pc_sysfw.o
obj-y += x86-iommu.o intel_iommu.o
+obj-y += amd_iommu.o
obj-$(CONFIG_XEN) += ../xenpv/ xen/
obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a26a4bb03f..9708cdc463 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -53,13 +53,13 @@
#include "hw/pci/pci_bus.h"
#include "hw/pci-host/q35.h"
#include "hw/i386/x86-iommu.h"
-#include "hw/timer/hpet.h"
#include "hw/acpi/aml-build.h"
#include "qapi/qmp/qint.h"
#include "qom/qom-qobject.h"
-#include "hw/i386/x86-iommu.h"
+#include "hw/i386/amd_iommu.h"
+#include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
@@ -339,24 +339,38 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm,
void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
CPUArchIdList *apic_ids, GArray *entry)
{
- int apic_id;
- AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
+ uint32_t apic_id = apic_ids->cpus[uid].arch_id;
- apic_id = apic_ids->cpus[uid].arch_id;
- apic->type = ACPI_APIC_PROCESSOR;
- apic->length = sizeof(*apic);
- apic->processor_id = uid;
- apic->local_apic_id = apic_id;
- if (apic_ids->cpus[uid].cpu != NULL) {
- apic->flags = cpu_to_le32(1);
+ /* ACPI spec says that LAPIC entry for non present
+ * CPU may be omitted from MADT or it must be marked
+ * as disabled. However omitting non present CPU from
+ * MADT breaks hotplug on linux. So possible CPUs
+ * should be put in MADT but kept disabled.
+ */
+ if (apic_id < 255) {
+ AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
+
+ apic->type = ACPI_APIC_PROCESSOR;
+ apic->length = sizeof(*apic);
+ apic->processor_id = uid;
+ apic->local_apic_id = apic_id;
+ if (apic_ids->cpus[uid].cpu != NULL) {
+ apic->flags = cpu_to_le32(1);
+ } else {
+ apic->flags = cpu_to_le32(0);
+ }
} else {
- /* ACPI spec says that LAPIC entry for non present
- * CPU may be omitted from MADT or it must be marked
- * as disabled. However omitting non present CPU from
- * MADT breaks hotplug on linux. So possible CPUs
- * should be put in MADT but kept disabled.
- */
- apic->flags = cpu_to_le32(0);
+ AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic);
+
+ apic->type = ACPI_APIC_LOCAL_X2APIC;
+ apic->length = sizeof(*apic);
+ apic->uid = cpu_to_le32(uid);
+ apic->x2apic_id = cpu_to_le32(apic_id);
+ if (apic_ids->cpus[uid].cpu != NULL) {
+ apic->flags = cpu_to_le32(1);
+ } else {
+ apic->flags = cpu_to_le32(0);
+ }
}
}
@@ -368,11 +382,11 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
int madt_start = table_data->len;
AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
+ bool x2apic_mode = false;
AcpiMultipleApicTable *madt;
AcpiMadtIoApic *io_apic;
AcpiMadtIntsrcovr *intsrcovr;
- AcpiMadtLocalNmi *local_nmi;
int i;
madt = acpi_data_push(table_data, sizeof *madt);
@@ -381,6 +395,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
for (i = 0; i < apic_ids->len; i++) {
adevc->madt_cpu(adev, i, apic_ids, table_data);
+ if (apic_ids->cpus[i].arch_id > 254) {
+ x2apic_mode = true;
+ }
}
g_free(apic_ids);
@@ -413,12 +430,25 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */
}
- local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
- local_nmi->type = ACPI_APIC_LOCAL_NMI;
- local_nmi->length = sizeof(*local_nmi);
- local_nmi->processor_id = 0xff; /* all processors */
- local_nmi->flags = cpu_to_le16(0);
- local_nmi->lint = 1; /* ACPI_LINT1 */
+ if (x2apic_mode) {
+ AcpiMadtLocalX2ApicNmi *local_nmi;
+
+ local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+ local_nmi->type = ACPI_APIC_LOCAL_X2APIC_NMI;
+ local_nmi->length = sizeof(*local_nmi);
+ local_nmi->uid = 0xFFFFFFFF; /* all processors */
+ local_nmi->flags = cpu_to_le16(0);
+ local_nmi->lint = 1; /* ACPI_LINT1 */
+ } else {
+ AcpiMadtLocalNmi *local_nmi;
+
+ local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+ local_nmi->type = ACPI_APIC_LOCAL_NMI;
+ local_nmi->length = sizeof(*local_nmi);
+ local_nmi->processor_id = 0xff; /* all processors */
+ local_nmi->flags = cpu_to_le16(0);
+ local_nmi->lint = 1; /* ACPI_LINT1 */
+ }
build_header(linker, table_data,
(void *)(table_data->data + madt_start), "APIC",
@@ -789,7 +819,7 @@ static gint crs_range_compare(gconstpointer a, gconstpointer b)
static void crs_replace_with_free_ranges(GPtrArray *ranges,
uint64_t start, uint64_t end)
{
- GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free);
+ GPtrArray *free_ranges = g_ptr_array_new();
uint64_t free_base = start;
int i;
@@ -813,7 +843,7 @@ static void crs_replace_with_free_ranges(GPtrArray *ranges,
g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i));
}
- g_ptr_array_free(free_ranges, false);
+ g_ptr_array_free(free_ranges, true);
}
/*
@@ -2038,6 +2068,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
method = aml_method("_E03", 0, AML_NOTSERIALIZED);
aml_append(method, aml_call0(MEMORY_HOTPLUG_HANDLER_PATH));
aml_append(scope, method);
+
+ if (pcms->acpi_nvdimm_state.is_enabled) {
+ method = aml_method("_E04", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
+ aml_int(0x80)));
+ aml_append(scope, method);
+ }
}
aml_append(dsdt, scope);
@@ -2390,7 +2427,6 @@ static void
build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
{
AcpiSystemResourceAffinityTable *srat;
- AcpiSratProcessorAffinity *core;
AcpiSratMemoryAffinity *numamem;
int i;
@@ -2409,22 +2445,34 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
srat->reserved1 = cpu_to_le32(1);
for (i = 0; i < apic_ids->len; i++) {
- int j;
- int apic_id = apic_ids->cpus[i].arch_id;
-
- core = acpi_data_push(table_data, sizeof *core);
- core->type = ACPI_SRAT_PROCESSOR_APIC;
- core->length = sizeof(*core);
- core->local_apic_id = apic_id;
- for (j = 0; j < nb_numa_nodes; j++) {
- if (test_bit(i, numa_info[j].node_cpu)) {
+ int j = numa_get_node_for_cpu(i);
+ uint32_t apic_id = apic_ids->cpus[i].arch_id;
+
+ if (apic_id < 255) {
+ AcpiSratProcessorAffinity *core;
+
+ core = acpi_data_push(table_data, sizeof *core);
+ core->type = ACPI_SRAT_PROCESSOR_APIC;
+ core->length = sizeof(*core);
+ core->local_apic_id = apic_id;
+ if (j < nb_numa_nodes) {
core->proximity_lo = j;
- break;
}
+ memset(core->proximity_hi, 0, 3);
+ core->local_sapic_eid = 0;
+ core->flags = cpu_to_le32(1);
+ } else {
+ AcpiSratProcessorX2ApicAffinity *core;
+
+ core = acpi_data_push(table_data, sizeof *core);
+ core->type = ACPI_SRAT_PROCESSOR_x2APIC;
+ core->length = sizeof(*core);
+ core->x2apic_id = cpu_to_le32(apic_id);
+ if (j < nb_numa_nodes) {
+ core->proximity_domain = cpu_to_le32(j);
+ }
+ core->flags = cpu_to_le32(1);
}
- memset(core->proximity_hi, 0, 3);
- core->local_sapic_eid = 0;
- core->flags = cpu_to_le32(1);
}
@@ -2557,11 +2605,68 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
scope->length = ioapic_scope_size;
scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
scope->bus = Q35_PSEUDO_BUS_PLATFORM;
- scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC);
+ scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
+ scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1, NULL, NULL);
}
+/*
+ * IVRS table as specified in AMD IOMMU Specification v2.62, Section 5.2
+ * accessible here http://support.amd.com/TechDocs/48882_IOMMU.pdf
+ */
+static void
+build_amd_iommu(GArray *table_data, BIOSLinker *linker)
+{
+ int iommu_start = table_data->len;
+ AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
+
+ /* IVRS header */
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
+ /* IVinfo - IO virtualization information common to all
+ * IOMMU units in a system
+ */
+ build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+ /* reserved */
+ build_append_int_noprefix(table_data, 0, 8);
+
+ /* IVHD definition - type 10h */
+ build_append_int_noprefix(table_data, 0x10, 1);
+ /* virtualization flags */
+ build_append_int_noprefix(table_data,
+ (1UL << 0) | /* HtTunEn */
+ (1UL << 4) | /* iotblSup */
+ (1UL << 6) | /* PrefSup */
+ (1UL << 7), /* PPRSup */
+ 1);
+ /* IVHD length */
+ build_append_int_noprefix(table_data, 0x24, 2);
+ /* DeviceID */
+ build_append_int_noprefix(table_data, s->devid, 2);
+ /* Capability offset */
+ build_append_int_noprefix(table_data, s->capab_offset, 2);
+ /* IOMMU base address */
+ build_append_int_noprefix(table_data, s->mmio.addr, 8);
+ /* PCI Segment Group */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* IOMMU info */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* IOMMU Feature Reporting */
+ build_append_int_noprefix(table_data,
+ (48UL << 30) | /* HATS */
+ (48UL << 28) | /* GATS */
+ (1UL << 2), /* GTSup */
+ 4);
+ /*
+ * Type 1 device entry reporting all devices
+ * These are 4-byte device entries currently reporting the range of
+ * Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
+ */
+ build_append_int_noprefix(table_data, 0x0000001, 4);
+
+ build_header(linker, table_data, (void *)(table_data->data + iommu_start),
+ "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
+}
static GArray *
build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
@@ -2622,11 +2727,6 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
return true;
}
-static bool acpi_has_iommu(void)
-{
- return !!x86_iommu_get_default();
-}
-
static
void acpi_build(AcpiBuildTables *tables, MachineState *machine)
{
@@ -2706,13 +2806,19 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_mcfg_q35(tables_blob, tables->linker, &mcfg);
}
- if (acpi_has_iommu()) {
- acpi_add_table(table_offsets, tables_blob);
- build_dmar_q35(tables_blob, tables->linker);
+ if (x86_iommu_get_default()) {
+ IommuType IOMMUType = x86_iommu_get_type();
+ if (IOMMUType == TYPE_AMD) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_amd_iommu(tables_blob, tables->linker);
+ } else if (IOMMUType == TYPE_INTEL) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_dmar_q35(tables_blob, tables->linker);
+ }
}
if (pcms->acpi_nvdimm_state.is_enabled) {
nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
- pcms->acpi_nvdimm_state.dsm_mem);
+ &pcms->acpi_nvdimm_state, machine->ram_slots);
}
/* Add tables supplied by user (if any) */
@@ -2754,7 +2860,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
*/
int legacy_aml_len =
pcmc->legacy_acpi_table_size +
- ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus;
+ ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit;
int legacy_table_size =
ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
ACPI_BUILD_ALIGN_SIZE);
@@ -2830,7 +2936,7 @@ static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state,
uint64_t max_size)
{
return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1,
- name, acpi_build_update, build_state);
+ name, acpi_build_update, build_state, NULL);
}
static const VMStateDescription vmstate_acpi_build = {
@@ -2855,7 +2961,7 @@ void acpi_setup(void)
return;
}
- if (!pcmc->has_acpi_build) {
+ if (!pcms->acpi_build_enabled) {
ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
return;
}
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
new file mode 100644
index 0000000000..47b79d9112
--- /dev/null
+++ b/hw/i386/amd_iommu.c
@@ -0,0 +1,1212 @@
+/*
+ * QEMU emulation of AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Cache implementation inspired by hw/i386/intel_iommu.c
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/amd_iommu.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+/* used AMD-Vi MMIO registers */
+const char *amdvi_mmio_low[] = {
+ "AMDVI_MMIO_DEVTAB_BASE",
+ "AMDVI_MMIO_CMDBUF_BASE",
+ "AMDVI_MMIO_EVTLOG_BASE",
+ "AMDVI_MMIO_CONTROL",
+ "AMDVI_MMIO_EXCL_BASE",
+ "AMDVI_MMIO_EXCL_LIMIT",
+ "AMDVI_MMIO_EXT_FEATURES",
+ "AMDVI_MMIO_PPR_BASE",
+ "UNHANDLED"
+};
+const char *amdvi_mmio_high[] = {
+ "AMDVI_MMIO_COMMAND_HEAD",
+ "AMDVI_MMIO_COMMAND_TAIL",
+ "AMDVI_MMIO_EVTLOG_HEAD",
+ "AMDVI_MMIO_EVTLOG_TAIL",
+ "AMDVI_MMIO_STATUS",
+ "AMDVI_MMIO_PPR_HEAD",
+ "AMDVI_MMIO_PPR_TAIL",
+ "UNHANDLED"
+};
+
+struct AMDVIAddressSpace {
+ uint8_t bus_num; /* bus number */
+ uint8_t devfn; /* device function */
+ AMDVIState *iommu_state; /* AMDVI - one per machine */
+ MemoryRegion iommu; /* Device's address translation region */
+ MemoryRegion iommu_ir; /* Device's interrupt remapping region */
+ AddressSpace as; /* device's corresponding address space */
+};
+
+/* AMDVI cache entry */
+typedef struct AMDVIIOTLBEntry {
+ uint16_t domid; /* assigned domain id */
+ uint16_t devid; /* device owning entry */
+ uint64_t perms; /* access permissions */
+ uint64_t translated_addr; /* translated address */
+ uint64_t page_mask; /* physical page size */
+} AMDVIIOTLBEntry;
+
+/* configure MMIO registers at startup/reset */
+static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
+ uint64_t romask, uint64_t w1cmask)
+{
+ stq_le_p(&s->mmior[addr], val);
+ stq_le_p(&s->romask[addr], romask);
+ stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
+{
+ return lduw_le_p(&s->mmior[addr]);
+}
+
+static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
+{
+ return ldl_le_p(&s->mmior[addr]);
+}
+
+static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
+{
+ return ldq_le_p(&s->mmior[addr]);
+}
+
+/* internal write */
+static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr)
+{
+ stq_le_p(&s->mmior[addr], val);
+}
+
+/* external write */
+static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
+{
+ uint16_t romask = lduw_le_p(&s->romask[addr]);
+ uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
+ uint16_t oldval = lduw_le_p(&s->mmior[addr]);
+ stw_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
+{
+ uint32_t romask = ldl_le_p(&s->romask[addr]);
+ uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+ uint32_t oldval = ldl_le_p(&s->mmior[addr]);
+ stl_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ uint64_t romask = ldq_le_p(&s->romask[addr]);
+ uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+ uint32_t oldval = ldq_le_p(&s->mmior[addr]);
+ stq_le_p(&s->mmior[addr],
+ ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+/* OR a 64-bit register with a 64-bit value */
+static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ return amdvi_readq(s, addr) | val;
+}
+
+/* OR a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
+}
+
+/* AND a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+ amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
+}
+
+static void amdvi_generate_msi_interrupt(AMDVIState *s)
+{
+ MSIMessage msg = {};
+ MemTxAttrs attrs = {
+ .requester_id = pci_requester_id(&s->pci.dev)
+ };
+
+ if (msi_enabled(&s->pci.dev)) {
+ msg = msi_get_message(&s->pci.dev, 0);
+ address_space_stl_le(&address_space_memory, msg.address, msg.data,
+ attrs, NULL);
+ }
+}
+
+static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
+{
+ /* event logging not enabled */
+ if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
+ AMDVI_MMIO_STATUS_EVT_OVF)) {
+ return;
+ }
+
+ /* event log buffer full */
+ if (s->evtlog_tail >= s->evtlog_len) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
+ /* generate interrupt */
+ amdvi_generate_msi_interrupt(s);
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
+ &evt, AMDVI_EVENT_LEN)) {
+ trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
+ }
+
+ s->evtlog_tail += AMDVI_EVENT_LEN;
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+ amdvi_generate_msi_interrupt(s);
+}
+
+static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
+ int length)
+{
+ int index = start / 64, bitpos = start % 64;
+ uint64_t mask = MAKE_64BIT_MASK(start, length);
+ buffer[index] &= ~mask;
+ buffer[index] |= (value << bitpos) & mask;
+}
+/*
+ * AMDVi event structure
+ * 0:15 -> DeviceID
+ * 55:63 -> event type + miscellaneous info
+ * 63:127 -> related address
+ */
+static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
+ uint16_t info)
+{
+ amdvi_setevent_bits(evt, devid, 0, 16);
+ amdvi_setevent_bits(evt, info, 55, 8);
+ amdvi_setevent_bits(evt, addr, 63, 64);
+}
+/* log an error encountered during a page walk
+ *
+ * @addr: virtual address in translation request
+ */
+static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/*
+ * log a master abort accessing device table
+ * @devtab : address of device table entry
+ * @info : error flags
+ */
+static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
+ hwaddr devtab, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
+
+ amdvi_encode_event(evt, devid, devtab, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an event trying to access command buffer
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
+{
+ uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
+
+ amdvi_encode_event(evt, 0, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an illegal comand event
+ * @addr : address of illegal command
+ */
+static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
+ hwaddr addr)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
+ amdvi_encode_event(evt, 0, addr, info);
+ amdvi_log_event(s, evt);
+}
+/* log an error accessing device table
+ *
+ * @devid : device owning the table entry
+ * @devtab : address of device table entry
+ * @info : error flags
+ */
+static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+}
+/* log an error accessing a PTE entry
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
+ hwaddr addr, uint16_t info)
+{
+ uint64_t evt[4];
+
+ info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
+ amdvi_encode_event(evt, devid, addr, info);
+ amdvi_log_event(s, evt);
+ pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+ PCI_STATUS_SIG_TARGET_ABORT);
+}
+
+static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+ return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint amdvi_uint64_hash(gconstpointer v)
+{
+ return (guint)*(const uint64_t *)v;
+}
+
+static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
+ uint64_t devid)
+{
+ uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+ ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ return g_hash_table_lookup(s->iotlb, &key);
+}
+
+static void amdvi_iotlb_reset(AMDVIState *s)
+{
+ assert(s->iotlb);
+ trace_amdvi_iotlb_reset();
+ g_hash_table_remove_all(s->iotlb);
+}
+
+static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+ uint16_t devid = *(uint16_t *)user_data;
+ return entry->devid == devid;
+}
+
+static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
+ uint64_t devid)
+{
+ uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+ ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ g_hash_table_remove(s->iotlb, &key);
+}
+
+static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
+ uint64_t gpa, IOMMUTLBEntry to_cache,
+ uint16_t domid)
+{
+ AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
+ uint64_t *key = g_new(uint64_t, 1);
+ uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+
+ /* don't cache erroneous translations */
+ if (to_cache.perm != IOMMU_NONE) {
+ trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), gpa, to_cache.translated_addr);
+
+ if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
+ amdvi_iotlb_reset(s);
+ }
+
+ entry->domid = domid;
+ entry->perms = to_cache.perm;
+ entry->translated_addr = to_cache.translated_addr;
+ entry->page_mask = to_cache.addr_mask;
+ *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+ g_hash_table_replace(s->iotlb, key, entry);
+ }
+}
+
+static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
+{
+ /* pad the last 3 bits */
+ hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
+ uint64_t data = cpu_to_le64(cmd[1]);
+
+ if (extract64(cmd[0], 51, 8)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ if (extract64(cmd[0], 0, 1)) {
+ if (dma_memory_write(&address_space_memory, addr, &data,
+ AMDVI_COMPLETION_DATA_SIZE)) {
+ trace_amdvi_completion_wait_fail(addr);
+ }
+ }
+ /* set completion interrupt */
+ if (extract64(cmd[0], 1, 1)) {
+ amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+ /* generate interrupt */
+ amdvi_generate_msi_interrupt(s);
+ }
+ trace_amdvi_completion_wait(addr, data);
+}
+
+/* log error without aborting since linux seems to be using reserved bits */
+static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
+{
+ uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
+
+ /* This command should invalidate internal caches of which there isn't */
+ if (extract64(cmd[0], 15, 16) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid));
+}
+
+static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 15, 16) || extract64(cmd[0], 19, 8) ||
+ extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
+ || extract64(cmd[1], 47, 16)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+ trace_amdvi_ppr_exec();
+}
+
+static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 0, 60) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ amdvi_iotlb_reset(s);
+ trace_amdvi_all_inval();
+}
+
+static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+ uint16_t domid = *(uint16_t *)user_data;
+ return entry->domid == domid;
+}
+
+/* we don't have devid - we can't remove pages by address */
+static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
+{
+ uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+
+ if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 16, 12) ||
+ extract64(cmd[0], 3, 10)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
+ &domid);
+ trace_amdvi_pages_inval(domid);
+}
+
+static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 20, 8) ||
+ extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
+ extract64(cmd[1], 5, 7)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+
+ trace_amdvi_prefetch_pages();
+}
+
+static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
+{
+ if (extract64(cmd[0], 16, 16) || cmd[1]) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ trace_amdvi_intr_inval();
+}
+
+/* FIXME: Try to work with the specified size instead of all the pages
+ * when the S bit is on
+ */
+static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
+{
+
+ uint16_t devid = extract64(cmd[0], 0, 16);
+ if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 9)) {
+ amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ if (extract64(cmd[1], 0, 1)) {
+ g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
+ &devid);
+ } else {
+ amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
+ cpu_to_le16(extract64(cmd[1], 0, 16)));
+ }
+ trace_amdvi_iotlb_inval();
+}
+
+/* not honouring reserved bits is regarded as an illegal command */
+static void amdvi_cmdbuf_exec(AMDVIState *s)
+{
+ uint64_t cmd[2];
+
+ if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
+ cmd, AMDVI_COMMAND_SIZE)) {
+ trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
+ amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ switch (extract64(cmd[0], 60, 4)) {
+ case AMDVI_CMD_COMPLETION_WAIT:
+ amdvi_completion_wait(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
+ amdvi_inval_devtab_entry(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_AMDVI_PAGES:
+ amdvi_inval_pages(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_IOTLB_PAGES:
+ iommu_inval_iotlb(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_INTR_TABLE:
+ amdvi_inval_inttable(s, cmd);
+ break;
+ case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
+ amdvi_prefetch_pages(s, cmd);
+ break;
+ case AMDVI_CMD_COMPLETE_PPR_REQUEST:
+ amdvi_complete_ppr(s, cmd);
+ break;
+ case AMDVI_CMD_INVAL_AMDVI_ALL:
+ amdvi_inval_all(s, cmd);
+ break;
+ default:
+ trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
+ /* log illegal command */
+ amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
+ s->cmdbuf + s->cmdbuf_head);
+ }
+}
+
+static void amdvi_cmdbuf_run(AMDVIState *s)
+{
+ if (!s->cmdbuf_enabled) {
+ trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
+ return;
+ }
+
+ /* check if there is work to do. */
+ while (s->cmdbuf_head != s->cmdbuf_tail) {
+ trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
+ amdvi_cmdbuf_exec(s);
+ s->cmdbuf_head += AMDVI_COMMAND_SIZE;
+ amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD);
+
+ /* wrap head pointer */
+ if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
+ s->cmdbuf_head = 0;
+ }
+ }
+}
+
+static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+{
+ uint8_t index = (addr & ~0x2000) / 8;
+
+ if ((addr & 0x2000)) {
+ /* high table */
+ index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
+ trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ } else {
+ index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
+ trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ }
+}
+
+static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ AMDVIState *s = opaque;
+
+ uint64_t val = -1;
+ if (addr + size > AMDVI_MMIO_SIZE) {
+ trace_amdvi_mmio_read("error: addr outside region: max ",
+ (uint64_t)AMDVI_MMIO_SIZE, addr, size);
+ return (uint64_t)-1;
+ }
+
+ if (size == 2) {
+ val = amdvi_readw(s, addr);
+ } else if (size == 4) {
+ val = amdvi_readl(s, addr);
+ } else if (size == 8) {
+ val = amdvi_readq(s, addr);
+ }
+ amdvi_mmio_trace(addr, size);
+
+ return val;
+}
+
+static void amdvi_handle_control_write(AMDVIState *s)
+{
+ unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
+ s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
+
+ s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
+ s->evtlog_enabled = s->enabled && !!(control &
+ AMDVI_MMIO_CONTROL_EVENTLOGEN);
+
+ s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
+ s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
+ s->cmdbuf_enabled = s->enabled && !!(control &
+ AMDVI_MMIO_CONTROL_CMDBUFLEN);
+
+ /* update the flags depending on the control register */
+ if (s->cmdbuf_enabled) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
+ } else {
+ amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
+ }
+ if (s->evtlog_enabled) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
+ } else {
+ amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
+ }
+
+ trace_amdvi_control_status(control);
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_devtab_write(AMDVIState *s)
+
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
+ s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
+
+ /* set device table length */
+ s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
+ (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
+ AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
+}
+
+static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
+{
+ s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
+ & AMDVI_MMIO_CMDBUF_HEAD_MASK;
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
+{
+ s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
+ & AMDVI_MMIO_CMDBUF_BASE_MASK;
+ s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
+ & AMDVI_MMIO_CMDBUF_SIZE_MASK);
+ s->cmdbuf_head = s->cmdbuf_tail = 0;
+}
+
+static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
+{
+ s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
+ & AMDVI_MMIO_CMDBUF_TAIL_MASK;
+ amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_excllim_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
+ s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
+ AMDVI_MMIO_EXCL_LIMIT_LOW;
+}
+
+static inline void amdvi_handle_evtbase_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
+ s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
+ s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
+ & AMDVI_MMIO_EVTLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_evttail_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
+ s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
+}
+
+static inline void amdvi_handle_evthead_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
+ s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprbase_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
+ s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
+ s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
+ & AMDVI_MMIO_PPRLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_pprhead_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
+ s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprtail_write(AMDVIState *s)
+{
+ uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
+ s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
+}
+
+/* FIXME: something might go wrong if System Software writes in chunks
+ * of one byte but linux writes in chunks of 4 bytes so currently it
+ * works correctly with linux but will definitely be busted if software
+ * reads/writes 8 bytes
+ */
+static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
+ hwaddr addr)
+{
+ if (size == 2) {
+ amdvi_writew(s, addr, val);
+ } else if (size == 4) {
+ amdvi_writel(s, addr, val);
+ } else if (size == 8) {
+ amdvi_writeq(s, addr, val);
+ }
+}
+
+static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ AMDVIState *s = opaque;
+ unsigned long offset = addr & 0x07;
+
+ if (addr + size > AMDVI_MMIO_SIZE) {
+ trace_amdvi_mmio_write("error: addr outside region: max ",
+ (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
+ return;
+ }
+
+ amdvi_mmio_trace(addr, size);
+ switch (addr & ~0x07) {
+ case AMDVI_MMIO_CONTROL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_control_write(s);
+ break;
+ case AMDVI_MMIO_DEVICE_TABLE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ /* set device table address
+ * This also suffers from inability to tell whether software
+ * is done writing
+ */
+ if (offset || (size == 8)) {
+ amdvi_handle_devtab_write(s);
+ }
+ break;
+ case AMDVI_MMIO_COMMAND_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_cmdhead_write(s);
+ break;
+ case AMDVI_MMIO_COMMAND_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ /* FIXME - make sure System Software has finished writing incase
+ * it writes in chucks less than 8 bytes in a robust way.As for
+ * now, this hacks works for the linux driver
+ */
+ if (offset || (size == 8)) {
+ amdvi_handle_cmdbase_write(s);
+ }
+ break;
+ case AMDVI_MMIO_COMMAND_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_cmdtail_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evtbase_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evthead_write(s);
+ break;
+ case AMDVI_MMIO_EVENT_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_evttail_write(s);
+ break;
+ case AMDVI_MMIO_EXCL_LIMIT:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_excllim_write(s);
+ break;
+ /* PPR log base - unused for now */
+ case AMDVI_MMIO_PPR_BASE:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprbase_write(s);
+ break;
+ /* PPR log head - also unused for now */
+ case AMDVI_MMIO_PPR_HEAD:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprhead_write(s);
+ break;
+ /* PPR log tail - unused for now */
+ case AMDVI_MMIO_PPR_TAIL:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ amdvi_handle_pprtail_write(s);
+ break;
+ }
+}
+
+static inline uint64_t amdvi_get_perms(uint64_t entry)
+{
+ return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
+ AMDVI_DEV_PERM_SHIFT;
+}
+
+/* a valid entry should have V = 1 and reserved bits honoured */
+static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
+ uint64_t *dte)
+{
+ if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
+ || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
+ || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
+ amdvi_log_illegaldevtab_error(s, devid,
+ s->devtab +
+ devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+ return false;
+ }
+
+ return dte[0] & AMDVI_DEV_VALID;
+}
+
+/* get a device table entry given the devid */
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+{
+ uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+
+ if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
+ AMDVI_DEVTAB_ENTRY_SIZE)) {
+ trace_amdvi_dte_get_fail(s->devtab, offset);
+ /* log error accessing dte */
+ amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
+ return false;
+ }
+
+ *entry = le64_to_cpu(*entry);
+ if (!amdvi_validate_dte(s, devid, entry)) {
+ trace_amdvi_invalid_dte(entry[0]);
+ return false;
+ }
+
+ return true;
+}
+
+/* get pte translation mode */
+static inline uint8_t get_pte_translation_mode(uint64_t pte)
+{
+ return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+}
+
+static inline uint64_t pte_override_page_mask(uint64_t pte)
+{
+ uint8_t page_mask = 12;
+ uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK;
+ /* find the first zero bit */
+ while (addr & 1) {
+ page_mask++;
+ addr = addr >> 1;
+ }
+
+ return ~((1ULL << page_mask) - 1);
+}
+
+static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
+{
+ return ~((1UL << ((oldlevel * 9) + 3)) - 1);
+}
+
+static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
+ uint16_t devid)
+{
+ uint64_t pte;
+
+ if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) {
+ trace_amdvi_get_pte_hwerror(pte_addr);
+ amdvi_log_pagetab_error(s, devid, pte_addr, 0);
+ pte = 0;
+ return pte;
+ }
+
+ pte = le64_to_cpu(pte);
+ return pte;
+}
+
+static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
+ IOMMUTLBEntry *ret, unsigned perms,
+ hwaddr addr)
+{
+ unsigned level, present, pte_perms, oldlevel;
+ uint64_t pte = dte[0], pte_addr, page_mask;
+
+ /* make sure the DTE has TV = 1 */
+ if (pte & AMDVI_DEV_TRANSLATION_VALID) {
+ level = get_pte_translation_mode(pte);
+ if (level >= 7) {
+ trace_amdvi_mode_invalid(level, addr);
+ return;
+ }
+ if (level == 0) {
+ goto no_remap;
+ }
+
+ /* we are at the leaf page table or page table encodes a huge page */
+ while (level > 0) {
+ pte_perms = amdvi_get_perms(pte);
+ present = pte & 1;
+ if (!present || perms != (perms & pte_perms)) {
+ amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
+ trace_amdvi_page_fault(addr);
+ return;
+ }
+
+ /* go to the next lower level */
+ pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+ /* add offset and load pte */
+ pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+ pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
+ if (!pte) {
+ return;
+ }
+ oldlevel = level;
+ level = get_pte_translation_mode(pte);
+ if (level == 0x7) {
+ break;
+ }
+ }
+
+ if (level == 0x7) {
+ page_mask = pte_override_page_mask(pte);
+ } else {
+ page_mask = pte_get_page_mask(oldlevel);
+ }
+
+ /* get access permissions from pte */
+ ret->iova = addr & page_mask;
+ ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
+ ret->addr_mask = ~page_mask;
+ ret->perm = amdvi_get_perms(pte);
+ return;
+ }
+no_remap:
+ ret->iova = addr & AMDVI_PAGE_MASK_4K;
+ ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret->perm = amdvi_get_perms(pte);
+}
+
+static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
+ bool is_write, IOMMUTLBEntry *ret)
+{
+ AMDVIState *s = as->iommu_state;
+ uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+ AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
+ uint64_t entry[4];
+
+ if (iotlb_entry) {
+ trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
+ ret->iova = addr & ~iotlb_entry->page_mask;
+ ret->translated_addr = iotlb_entry->translated_addr;
+ ret->addr_mask = iotlb_entry->page_mask;
+ ret->perm = iotlb_entry->perms;
+ return;
+ }
+
+ /* devices with V = 0 are not translated */
+ if (!amdvi_get_dte(s, devid, entry)) {
+ goto out;
+ }
+
+ amdvi_page_walk(as, entry, ret,
+ is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
+
+ amdvi_update_iotlb(s, devid, addr, *ret,
+ entry[1] & AMDVI_DEV_DOMID_ID_MASK);
+ return;
+
+out:
+ ret->iova = addr & AMDVI_PAGE_MASK_4K;
+ ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret->perm = IOMMU_RW;
+}
+
+static inline bool amdvi_is_interrupt_addr(hwaddr addr)
+{
+ return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
+}
+
+static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
+{
+ AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+ AMDVIState *s = as->iommu_state;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE
+ };
+
+ if (!s->enabled) {
+ /* AMDVI disabled - corresponds to iommu=off not
+ * failure to provide any parameter
+ */
+ ret.iova = addr & AMDVI_PAGE_MASK_4K;
+ ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret.perm = IOMMU_RW;
+ return ret;
+ } else if (amdvi_is_interrupt_addr(addr)) {
+ ret.iova = addr & AMDVI_PAGE_MASK_4K;
+ ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+ ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+ ret.perm = IOMMU_WO;
+ return ret;
+ }
+
+ amdvi_do_translate(as, addr, is_write, &ret);
+ trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
+ PCI_FUNC(as->devfn), addr, ret.translated_addr);
+ return ret;
+}
+
+static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ AMDVIState *s = opaque;
+ AMDVIAddressSpace **iommu_as;
+ int bus_num = pci_bus_num(bus);
+
+ iommu_as = s->address_spaces[bus_num];
+
+ /* allocate memory during the first run */
+ if (!iommu_as) {
+ iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX);
+ s->address_spaces[bus_num] = iommu_as;
+ }
+
+ /* set up AMD-Vi region */
+ if (!iommu_as[devfn]) {
+ iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
+ iommu_as[devfn]->bus_num = (uint8_t)bus_num;
+ iommu_as[devfn]->devfn = (uint8_t)devfn;
+ iommu_as[devfn]->iommu_state = s;
+
+ memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
+ &s->iommu_ops, "amd-iommu", UINT64_MAX);
+ address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
+ "amd-iommu");
+ }
+ return &iommu_as[devfn]->as;
+}
+
+static const MemoryRegionOps mmio_mem_ops = {
+ .read = amdvi_mmio_read,
+ .write = amdvi_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ }
+};
+
+static void amdvi_iommu_notify_flag_changed(MemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new)
+{
+ AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+
+ if (new & IOMMU_NOTIFIER_MAP) {
+ error_report("device %02x.%02x.%x requires iommu notifier which is not "
+ "currently supported", as->bus_num, PCI_SLOT(as->devfn),
+ PCI_FUNC(as->devfn));
+ exit(1);
+ }
+}
+
+static void amdvi_init(AMDVIState *s)
+{
+ amdvi_iotlb_reset(s);
+
+ s->iommu_ops.translate = amdvi_translate;
+ s->iommu_ops.notify_flag_changed = amdvi_iommu_notify_flag_changed;
+ s->devtab_len = 0;
+ s->cmdbuf_len = 0;
+ s->cmdbuf_head = 0;
+ s->cmdbuf_tail = 0;
+ s->evtlog_head = 0;
+ s->evtlog_tail = 0;
+ s->excl_enabled = false;
+ s->excl_allow = false;
+ s->mmio_enabled = false;
+ s->enabled = false;
+ s->ats_enabled = false;
+ s->cmdbuf_enabled = false;
+
+ /* reset MMIO */
+ memset(s->mmior, 0, AMDVI_MMIO_SIZE);
+ amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+ 0xffffffffffffffef, 0);
+ amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+
+ /* reset device ident */
+ pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
+ pci_config_set_prog_interface(s->pci.dev.config, 00);
+ pci_config_set_device_id(s->pci.dev.config, s->devid);
+ pci_config_set_class(s->pci.dev.config, 0x0806);
+
+ /* reset AMDVI specific capabilities, all r/o */
+ pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+ s->mmio.addr & ~(0xffff0000));
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+ (s->mmio.addr & ~(0xffff)) >> 16);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+ 0xff000000);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+ pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+ AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
+}
+
+static void amdvi_reset(DeviceState *dev)
+{
+ AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+
+ msi_reset(&s->pci.dev);
+ amdvi_init(s);
+}
+
+static void amdvi_realize(DeviceState *dev, Error **err)
+{
+ int ret = 0;
+ AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+ PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+ s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+ amdvi_uint64_equal, g_free, g_free);
+
+ /* This device should take care of IOMMU PCI properties */
+ x86_iommu->type = TYPE_AMD;
+ qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
+ object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
+ s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+ AMDVI_CAPAB_SIZE);
+ assert(s->capab_offset > 0);
+ ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+ assert(ret > 0);
+ ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
+ assert(ret > 0);
+
+ /* set up MMIO */
+ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
+ AMDVI_MMIO_SIZE);
+
+ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
+ sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
+ pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+ s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
+ msi_init(&s->pci.dev, 0, 1, true, false, err);
+ amdvi_init(s);
+}
+
+static const VMStateDescription vmstate_amdvi = {
+ .name = "amd-iommu",
+ .unmigratable = 1
+};
+
+static void amdvi_instance_init(Object *klass)
+{
+ AMDVIState *s = AMD_IOMMU_DEVICE(klass);
+
+ object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
+}
+
+static void amdvi_class_init(ObjectClass *klass, void* data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ X86IOMMUClass *dc_class = X86_IOMMU_CLASS(klass);
+
+ dc->reset = amdvi_reset;
+ dc->vmsd = &vmstate_amdvi;
+ dc->hotpluggable = false;
+ dc_class->realize = amdvi_realize;
+}
+
+static const TypeInfo amdvi = {
+ .name = TYPE_AMD_IOMMU_DEVICE,
+ .parent = TYPE_X86_IOMMU_DEVICE,
+ .instance_size = sizeof(AMDVIState),
+ .instance_init = amdvi_instance_init,
+ .class_init = amdvi_class_init
+};
+
+static const TypeInfo amdviPCI = {
+ .name = "AMDVI-PCI",
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(AMDVIPCIState),
+};
+
+static void amdviPCI_register_types(void)
+{
+ type_register_static(&amdviPCI);
+ type_register_static(&amdvi);
+}
+
+type_init(amdviPCI_register_types);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
new file mode 100644
index 0000000000..884926e9e7
--- /dev/null
+++ b/hw/i386/amd_iommu.h
@@ -0,0 +1,289 @@
+/*
+ * QEMU emulation of an AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AMD_IOMMU_H_
+#define AMD_IOMMU_H_
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/i386/x86-iommu.h"
+
+/* Capability registers */
+#define AMDVI_CAPAB_BAR_LOW 0x04
+#define AMDVI_CAPAB_BAR_HIGH 0x08
+#define AMDVI_CAPAB_RANGE 0x0C
+#define AMDVI_CAPAB_MISC 0x10
+
+#define AMDVI_CAPAB_SIZE 0x18
+#define AMDVI_CAPAB_REG_SIZE 0x04
+
+/* Capability header data */
+#define AMDVI_CAPAB_ID_SEC 0xf
+#define AMDVI_CAPAB_FLAT_EXT (1 << 28)
+#define AMDVI_CAPAB_EFR_SUP (1 << 27)
+#define AMDVI_CAPAB_FLAG_NPCACHE (1 << 26)
+#define AMDVI_CAPAB_FLAG_HTTUNNEL (1 << 25)
+#define AMDVI_CAPAB_FLAG_IOTLBSUP (1 << 24)
+#define AMDVI_CAPAB_INIT_TYPE (3 << 16)
+
+/* No. of used MMIO registers */
+#define AMDVI_MMIO_REGS_HIGH 8
+#define AMDVI_MMIO_REGS_LOW 7
+
+/* MMIO registers */
+#define AMDVI_MMIO_DEVICE_TABLE 0x0000
+#define AMDVI_MMIO_COMMAND_BASE 0x0008
+#define AMDVI_MMIO_EVENT_BASE 0x0010
+#define AMDVI_MMIO_CONTROL 0x0018
+#define AMDVI_MMIO_EXCL_BASE 0x0020
+#define AMDVI_MMIO_EXCL_LIMIT 0x0028
+#define AMDVI_MMIO_EXT_FEATURES 0x0030
+#define AMDVI_MMIO_COMMAND_HEAD 0x2000
+#define AMDVI_MMIO_COMMAND_TAIL 0x2008
+#define AMDVI_MMIO_EVENT_HEAD 0x2010
+#define AMDVI_MMIO_EVENT_TAIL 0x2018
+#define AMDVI_MMIO_STATUS 0x2020
+#define AMDVI_MMIO_PPR_BASE 0x0038
+#define AMDVI_MMIO_PPR_HEAD 0x2030
+#define AMDVI_MMIO_PPR_TAIL 0x2038
+
+#define AMDVI_MMIO_SIZE 0x4000
+
+#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1)
+#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \
+ AMDVI_MMIO_DEVTAB_SIZE_MASK)
+#define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32
+#define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096
+
+/* some of this are similar but just for readability */
+#define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7)
+#define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f
+#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK
+#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK
+#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK
+#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK
+
+#define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0)
+#define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1)
+#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff
+
+/* mmio control register flags */
+#define AMDVI_MMIO_CONTROL_AMDVIEN (1ULL << 0)
+#define AMDVI_MMIO_CONTROL_HTTUNEN (1ULL << 1)
+#define AMDVI_MMIO_CONTROL_EVENTLOGEN (1ULL << 2)
+#define AMDVI_MMIO_CONTROL_EVENTINTEN (1ULL << 3)
+#define AMDVI_MMIO_CONTROL_COMWAITINTEN (1ULL << 4)
+#define AMDVI_MMIO_CONTROL_CMDBUFLEN (1ULL << 12)
+
+/* MMIO status register bits */
+#define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4)
+#define AMDVI_MMIO_STATUS_EVT_RUN (1 << 3)
+#define AMDVI_MMIO_STATUS_COMP_INT (1 << 2)
+#define AMDVI_MMIO_STATUS_EVT_OVF (1 << 0)
+
+#define AMDVI_CMDBUF_ID_BYTE 0x07
+#define AMDVI_CMDBUF_ID_RSHIFT 4
+
+#define AMDVI_CMD_COMPLETION_WAIT 0x01
+#define AMDVI_CMD_INVAL_DEVTAB_ENTRY 0x02
+#define AMDVI_CMD_INVAL_AMDVI_PAGES 0x03
+#define AMDVI_CMD_INVAL_IOTLB_PAGES 0x04
+#define AMDVI_CMD_INVAL_INTR_TABLE 0x05
+#define AMDVI_CMD_PREFETCH_AMDVI_PAGES 0x06
+#define AMDVI_CMD_COMPLETE_PPR_REQUEST 0x07
+#define AMDVI_CMD_INVAL_AMDVI_ALL 0x08
+
+#define AMDVI_DEVTAB_ENTRY_SIZE 32
+
+/* Device table entry bits 0:63 */
+#define AMDVI_DEV_VALID (1ULL << 0)
+#define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1)
+#define AMDVI_DEV_MODE_MASK 0x7
+#define AMDVI_DEV_MODE_RSHIFT 9
+#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000
+#define AMDVI_DEV_PT_ROOT_RSHIFT 12
+#define AMDVI_DEV_PERM_SHIFT 61
+#define AMDVI_DEV_PERM_READ (1ULL << 61)
+#define AMDVI_DEV_PERM_WRITE (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1)
+
+/* Event codes and flags, as stored in the info field */
+#define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12)
+#define AMDVI_EVENT_IOPF (0x2U << 12)
+#define AMDVI_EVENT_IOPF_I (1U << 3)
+#define AMDVI_EVENT_DEV_TAB_HW_ERROR (0x3U << 12)
+#define AMDVI_EVENT_PAGE_TAB_HW_ERROR (0x4U << 12)
+#define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
+#define AMDVI_EVENT_COMMAND_HW_ERROR (0x6U << 12)
+
+#define AMDVI_EVENT_LEN 16
+#define AMDVI_PERM_READ (1 << 0)
+#define AMDVI_PERM_WRITE (1 << 1)
+
+#define AMDVI_FEATURE_PREFETCH (1ULL << 0) /* page prefetch */
+#define AMDVI_FEATURE_PPR (1ULL << 1) /* PPR Support */
+#define AMDVI_FEATURE_GT (1ULL << 4) /* Guest Translation */
+#define AMDVI_FEATURE_IA (1ULL << 6) /* inval all support */
+#define AMDVI_FEATURE_GA (1ULL << 7) /* guest VAPIC support */
+#define AMDVI_FEATURE_HE (1ULL << 8) /* hardware error regs */
+#define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */
+
+/* reserved DTE bits */
+#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc
+#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100
+#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000
+
+/* AMDVI paging mode */
+#define AMDVI_GATS_MODE (6ULL << 12)
+#define AMDVI_HATS_MODE (6ULL << 10)
+
+/* IOTLB */
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_DEVID_SHIFT 36
+
+/* extended feature support */
+#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
+ AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
+ AMDVI_GATS_MODE | AMDVI_HATS_MODE)
+
+/* capabilities header */
+#define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
+ AMDVI_CAPAB_FLAG_NPCACHE | AMDVI_CAPAB_FLAG_IOTLBSUP \
+ | AMDVI_CAPAB_ID_SEC | AMDVI_CAPAB_INIT_TYPE | \
+ AMDVI_CAPAB_FLAG_HTTUNNEL | AMDVI_CAPAB_EFR_SUP)
+
+/* AMDVI default address */
+#define AMDVI_BASE_ADDR 0xfed80000
+
+/* page management constants */
+#define AMDVI_PAGE_SHIFT 12
+#define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT)
+
+#define AMDVI_PAGE_SHIFT_4K 12
+#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1))
+
+#define AMDVI_MAX_VA_ADDR (48UL << 5)
+#define AMDVI_MAX_PH_ADDR (40UL << 8)
+#define AMDVI_MAX_GVA_ADDR (48UL << 15)
+
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE 8
+
+#define AMDVI_COMMAND_SIZE 16
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE 8
+
+#define AMDVI_COMMAND_SIZE 16
+
+#define AMDVI_INT_ADDR_FIRST 0xfee00000
+#define AMDVI_INT_ADDR_LAST 0xfeefffff
+
+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
+#define AMD_IOMMU_DEVICE(obj)\
+ OBJECT_CHECK(AMDVIState, (obj), TYPE_AMD_IOMMU_DEVICE)
+
+#define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+
+typedef struct AMDVIAddressSpace AMDVIAddressSpace;
+
+/* functions to steal PCI config space */
+typedef struct AMDVIPCIState {
+ PCIDevice dev; /* The PCI device itself */
+} AMDVIPCIState;
+
+typedef struct AMDVIState {
+ X86IOMMUState iommu; /* IOMMU bus device */
+ AMDVIPCIState pci; /* IOMMU PCI device */
+
+ uint32_t version;
+ uint32_t capab_offset; /* capability offset pointer */
+
+ uint64_t mmio_addr;
+
+ uint32_t devid; /* auto-assigned devid */
+
+ bool enabled; /* IOMMU enabled */
+ bool ats_enabled; /* address translation enabled */
+ bool cmdbuf_enabled; /* command buffer enabled */
+ bool evtlog_enabled; /* event log enabled */
+ bool excl_enabled;
+
+ hwaddr devtab; /* base address device table */
+ size_t devtab_len; /* device table length */
+
+ hwaddr cmdbuf; /* command buffer base address */
+ uint64_t cmdbuf_len; /* command buffer length */
+ uint32_t cmdbuf_head; /* current IOMMU read position */
+ uint32_t cmdbuf_tail; /* next Software write position */
+ bool completion_wait_intr;
+
+ hwaddr evtlog; /* base address event log */
+ bool evtlog_intr;
+ uint32_t evtlog_len; /* event log length */
+ uint32_t evtlog_head; /* current IOMMU write position */
+ uint32_t evtlog_tail; /* current Software read position */
+
+ /* unused for now */
+ hwaddr excl_base; /* base DVA - IOMMU exclusion range */
+ hwaddr excl_limit; /* limit of IOMMU exclusion range */
+ bool excl_allow; /* translate accesses to the exclusion range */
+ bool excl_enable; /* exclusion range enabled */
+
+ hwaddr ppr_log; /* base address ppr log */
+ uint32_t pprlog_len; /* ppr log len */
+ uint32_t pprlog_head; /* ppr log head */
+ uint32_t pprlog_tail; /* ppr log tail */
+
+ MemoryRegion mmio; /* MMIO region */
+ uint8_t mmior[AMDVI_MMIO_SIZE]; /* read/write MMIO */
+ uint8_t w1cmask[AMDVI_MMIO_SIZE]; /* read/write 1 clear mask */
+ uint8_t romask[AMDVI_MMIO_SIZE]; /* MMIO read/only mask */
+ bool mmio_enabled;
+
+ /* IOMMU function */
+ MemoryRegionIOMMUOps iommu_ops;
+
+ /* for each served device */
+ AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+
+ /* IOTLB */
+ GHashTable *iotlb;
+} AMDVIState;
+
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 28c31a2cdf..5f3e35123d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -21,16 +21,20 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "hw/sysbus.h"
#include "exec/address-spaces.h"
#include "intel_iommu_internal.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/i386/pc.h"
+#include "hw/i386/apic-msidef.h"
#include "hw/boards.h"
#include "hw/i386/x86-iommu.h"
#include "hw/pci-host/q35.h"
#include "sysemu/kvm.h"
+#include "hw/i386/apic_internal.h"
+#include "kvm_i386.h"
/*#define DEBUG_INTEL_IOMMU*/
#ifdef DEBUG_INTEL_IOMMU
@@ -214,7 +218,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
g_hash_table_remove_all(s->iotlb);
}
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
+static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
uint32_t level)
{
return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
@@ -279,18 +283,17 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
hwaddr mesg_data_reg)
{
- hwaddr addr;
- uint32_t data;
+ MSIMessage msi;
assert(mesg_data_reg < DMAR_REG_SIZE);
assert(mesg_addr_reg < DMAR_REG_SIZE);
- addr = vtd_get_long_raw(s, mesg_addr_reg);
- data = vtd_get_long_raw(s, mesg_data_reg);
+ msi.address = vtd_get_long_raw(s, mesg_addr_reg);
+ msi.data = vtd_get_long_raw(s, mesg_data_reg);
- VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data);
- address_space_stl_le(&address_space_memory, addr, data,
- MEMTXATTRS_UNSPECIFIED, NULL);
+ VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
+ msi.address, msi.data);
+ apic_get_class()->send_msi(&msi);
}
/* Generate a fault event to software via MSI if conditions are met.
@@ -985,6 +988,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
mask = 7; /* Mask bit 2:0 in the SID field */
break;
}
+ mask = ~mask;
VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
" mask %"PRIu16, source_id, mask);
vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
@@ -1974,14 +1978,20 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
return ret;
}
-static void vtd_iommu_notify_started(MemoryRegion *iommu)
+static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new)
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
- hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
- "is currently not supported by intel-iommu emulation",
- vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
- PCI_FUNC(vtd_as->devfn));
+ if (new & IOMMU_NOTIFIER_MAP) {
+ error_report("Device at bus %s addr %02x.%d requires iommu "
+ "notifier which is currently not supported by "
+ "intel-iommu emulation",
+ vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ exit(1);
+ }
}
static const VMStateDescription vtd_vmstate = {
@@ -2005,6 +2015,9 @@ static const MemoryRegionOps vtd_mem_ops = {
static Property vtd_properties[] = {
DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
+ DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2127,6 +2140,7 @@ static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
msg.dest_mode = irq->dest_mode;
msg.redir_hint = irq->redir_hint;
msg.dest = irq->dest;
+ msg.__addr_hi = irq->dest & 0xffffff00;
msg.__addr_head = cpu_to_le32(0xfee);
/* Keep this from original MSI address bits */
msg.__not_used = irq->msi_addr_last_bits;
@@ -2167,7 +2181,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
}
addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
- if (le16_to_cpu(addr.addr.__head) != 0xfee) {
+ if (addr.addr.__head != 0xfee) {
VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
"0x%"PRIx32, addr.data);
return -VTD_FR_IR_REQ_RSVD;
@@ -2203,6 +2217,8 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
}
} else {
uint8_t vector = origin->data & 0xff;
+ uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
VTD_DPRINTF(IR, "received IOAPIC interrupt");
/* IOAPIC entry vector should be aligned with IRTE vector
* (see vt-d spec 5.1.5.1). */
@@ -2211,6 +2227,15 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
"entry: %d, IRTE: %d, index: %d",
vector, irq.vector, index);
}
+
+ /* The Trigger Mode field must match the Trigger Mode in the IRTE.
+ * (see vt-d spec 5.1.5.1). */
+ if (trigger_mode != irq.trigger_mode) {
+ VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
+ "entry: %u, IRTE: %u, index: %d",
+ trigger_mode, irq.trigger_mode, index);
+ }
+
}
/*
@@ -2275,11 +2300,7 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
" for device sid 0x%04x",
to.address, to.data, sid);
- if (dma_memory_write(&address_space_memory, to.address,
- &to.data, size)) {
- VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64
- " value 0x%"PRIx32, to.address, to.data);
- }
+ apic_get_class()->send_msi(&to);
return MEMTX_OK;
}
@@ -2348,7 +2369,7 @@ static void vtd_init(IntelIOMMUState *s)
memset(s->womask, 0, DMAR_REG_SIZE);
s->iommu_ops.translate = vtd_iommu_translate;
- s->iommu_ops.notify_started = vtd_iommu_notify_started;
+ s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
s->root = 0;
s->root_extended = false;
s->dmar_enabled = false;
@@ -2364,7 +2385,11 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
if (x86_iommu->intr_supported) {
- s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV;
+ s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
+ if (s->intr_eim == ON_OFF_AUTO_ON) {
+ s->ecap |= VTD_ECAP_EIM;
+ }
+ assert(s->intr_eim != ON_OFF_AUTO_AUTO);
}
vtd_reset_context_cache(s);
@@ -2439,12 +2464,48 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
IntelIOMMUState *s = opaque;
VTDAddressSpace *vtd_as;
- assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX);
+ assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
vtd_as = vtd_find_add_as(s, bus, devfn);
return &vtd_as->as;
}
+static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
+{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
+ if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
+ !kvm_irqchip_is_split()) {
+ error_setg(errp, "Intel Interrupt Remapping cannot work with "
+ "kernel-irqchip=on, please use 'split|off'.");
+ return false;
+ }
+ if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
+ error_setg(errp, "eim=on cannot be selected without intremap=on");
+ return false;
+ }
+
+ if (s->intr_eim == ON_OFF_AUTO_AUTO) {
+ s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
+ && x86_iommu->intr_supported ?
+ ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+ }
+ if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
+ if (!kvm_irqchip_in_kernel()) {
+ error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
+ return false;
+ }
+ if (!kvm_enable_x2apic()) {
+ error_setg(errp, "eim=on requires support on the KVM side"
+ "(X2APIC_API, first shipped in v4.7)");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static void vtd_realize(DeviceState *dev, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
@@ -2453,6 +2514,12 @@ static void vtd_realize(DeviceState *dev, Error **errp)
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
VTD_DPRINTF(GENERAL, "");
+ x86_iommu->type = TYPE_INTEL;
+
+ if (!vtd_decide_config(s, errp)) {
+ return;
+ }
+
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
@@ -2467,14 +2534,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
/* Pseudo address space under root PCI bus. */
pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
-
- /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
- if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
- !kvm_irqchip_is_split()) {
- error_report("Intel Interrupt Remapping cannot work with "
- "kernel-irqchip=on, please use 'split|off'.");
- exit(1);
- }
}
static void vtd_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0829a5064f..11abfa2233 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -115,7 +115,7 @@
/* The shift of source_id in the key of IOTLB hash table */
#define VTD_IOTLB_SID_SHIFT 36
-#define VTD_IOTLB_LVL_SHIFT 44
+#define VTD_IOTLB_LVL_SHIFT 52
#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 2bd0de82b4..01cbaa88d2 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -15,6 +15,7 @@
#include "hw/i386/apic_internal.h"
#include "hw/pci/msi.h"
#include "sysemu/kvm.h"
+#include "target-i386/kvm_i386.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -28,13 +29,16 @@ static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
return *((uint32_t *)(kapic->regs + (reg_id << 4)));
}
-void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
+static void kvm_put_apic_state(APICCommonState *s, struct kvm_lapic_state *kapic)
{
- APICCommonState *s = APIC_COMMON(dev);
int i;
memset(kapic, 0, sizeof(*kapic));
- kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+ if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+ kvm_apic_set_reg(kapic, 0x2, s->initial_apic_id);
+ } else {
+ kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+ }
kvm_apic_set_reg(kapic, 0x8, s->tpr);
kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24);
kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
@@ -59,7 +63,11 @@ void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
APICCommonState *s = APIC_COMMON(dev);
int i, v;
- s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+ if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+ assert(kvm_apic_get_reg(kapic, 0x2) == s->initial_apic_id);
+ } else {
+ s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+ }
s->tpr = kvm_apic_get_reg(kapic, 0x8);
s->arb_id = kvm_apic_get_reg(kapic, 0x9);
s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24;
@@ -125,10 +133,30 @@ static void kvm_apic_vapic_base_update(APICCommonState *s)
}
}
-static void do_inject_external_nmi(void *data)
+static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
+{
+ APICCommonState *s = data.host_ptr;
+ struct kvm_lapic_state kapic;
+ int ret;
+
+ kvm_put_apicbase(s->cpu, s->apicbase);
+ kvm_put_apic_state(s, &kapic);
+
+ ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(ret));
+ abort();
+ }
+}
+
+static void kvm_apic_post_load(APICCommonState *s)
+{
+ run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
+}
+
+static void do_inject_external_nmi(CPUState *cpu, run_on_cpu_data data)
{
- APICCommonState *s = data;
- CPUState *cpu = CPU(s->cpu);
+ APICCommonState *s = data.host_ptr;
uint32_t lvt;
int ret;
@@ -146,7 +174,18 @@ static void do_inject_external_nmi(void *data)
static void kvm_apic_external_nmi(APICCommonState *s)
{
- run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s);
+ run_on_cpu(CPU(s->cpu), do_inject_external_nmi, RUN_ON_CPU_HOST_PTR(s));
+}
+
+static void kvm_send_msi(MSIMessage *msg)
+{
+ int ret;
+
+ ret = kvm_irqchip_send_msi(kvm_state, *msg);
+ if (ret < 0) {
+ fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
+ strerror(-ret));
+ }
}
static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr,
@@ -159,13 +198,8 @@ static void kvm_apic_mem_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size)
{
MSIMessage msg = { .address = addr, .data = data };
- int ret;
- ret = kvm_irqchip_send_msi(kvm_state, msg);
- if (ret < 0) {
- fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
- strerror(-ret));
- }
+ kvm_send_msi(&msg);
}
static const MemoryRegionOps kvm_apic_io_ops = {
@@ -178,6 +212,8 @@ static void kvm_apic_reset(APICCommonState *s)
{
/* Not used by KVM, which uses the CPU mp_state instead. */
s->wait_for_sipi = 0;
+
+ run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
}
static void kvm_apic_realize(DeviceState *dev, Error **errp)
@@ -206,9 +242,11 @@ static void kvm_apic_class_init(ObjectClass *klass, void *data)
k->set_base = kvm_apic_set_base;
k->set_tpr = kvm_apic_set_tpr;
k->get_tpr = kvm_apic_get_tpr;
+ k->post_load = kvm_apic_post_load;
k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting;
k->vapic_base_update = kvm_apic_vapic_base_update;
k->external_nmi = kvm_apic_external_nmi;
+ k->send_msi = kvm_send_msi;
}
static const TypeInfo kvm_apic_info = {
diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c
index 2b207de01b..11d1b726b6 100644
--- a/hw/i386/kvm/i8259.c
+++ b/hw/i386/kvm/i8259.c
@@ -92,7 +92,7 @@ static void kvm_pic_put(PICCommonState *s)
ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
if (ret < 0) {
- fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+ fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret));
abort();
}
}
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 8238fbc630..87dcbdd51a 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1251,6 +1251,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
error_propagate(errp, local_err);
return -ENOTSUP;
}
+ dev->dev.cap_present |= QEMU_PCI_CAP_MSI;
dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
/* Only 32-bit/no-mask currently supported */
ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSI, pos, 10,
@@ -1285,6 +1286,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
error_propagate(errp, local_err);
return -ENOTSUP;
}
+ dev->dev.cap_present |= QEMU_PCI_CAP_MSIX;
dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSIX, pos, 12,
&local_err);
@@ -1648,6 +1650,7 @@ static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp)
dev->msix_table = NULL;
return;
}
+ dev->dev.msix_table = (uint8_t *)dev->msix_table;
assigned_dev_msix_reset(dev);
@@ -1665,6 +1668,7 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
error_report("error unmapping msix_table! %s", strerror(errno));
}
dev->msix_table = NULL;
+ dev->dev.msix_table = NULL;
}
static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 3bf1ddd976..b30d1b90c6 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -17,6 +17,7 @@
#include "sysemu/kvm.h"
#include "hw/i386/apic_internal.h"
#include "hw/sysbus.h"
+#include "tcg/tcg.h"
#define VAPIC_IO_PORT 0x7e
@@ -449,6 +450,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
resume_all_vcpus();
if (!kvm_enabled()) {
+ /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
+ * back into the cpu_exec loop. */
+ tb_lock();
tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
cpu_loop_exit_noexc(cs);
}
@@ -483,10 +487,9 @@ typedef struct VAPICEnableTPRReporting {
bool enable;
} VAPICEnableTPRReporting;
-static void vapic_do_enable_tpr_reporting(void *data)
+static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
{
- VAPICEnableTPRReporting *info = data;
-
+ VAPICEnableTPRReporting *info = data.host_ptr;
apic_enable_tpr_access_reporting(info->apic, info->enable);
}
@@ -501,7 +504,7 @@ static void vapic_enable_tpr_reporting(bool enable)
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
info.apic = cpu->apic_state;
- run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info);
+ run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
}
}
@@ -734,10 +737,10 @@ static void vapic_realize(DeviceState *dev, Error **errp)
nb_option_roms++;
}
-static void do_vapic_enable(void *data)
+static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
{
- VAPICROMState *s = data;
- X86CPU *cpu = X86_CPU(first_cpu);
+ VAPICROMState *s = data.host_ptr;
+ X86CPU *cpu = X86_CPU(cs);
static const uint8_t enabled = 1;
cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
@@ -758,7 +761,7 @@ static void kvmvapic_vm_state_change(void *opaque, int running,
if (s->state == VAPIC_ACTIVE) {
if (smp_cpus == 1) {
- run_on_cpu(first_cpu, do_vapic_enable, s);
+ run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
} else {
zero = g_malloc0(s->rom_state.vapic_size);
cpu_physical_memory_write(s->vapic_paddr, zero,
@@ -768,6 +771,7 @@ static void kvmvapic_vm_state_change(void *opaque, int running,
}
qemu_del_vm_change_state_handler(s->vmsentry);
+ s->vmsentry = NULL;
}
static int vapic_post_load(void *opaque, int version_id)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0daa4d1f7f..0779fa2639 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -68,6 +68,7 @@
#include "qapi-visit.h"
#include "qom/cpu.h"
#include "hw/nmi.h"
+#include "hw/i386/intel_iommu.h"
#include "sysemu/hax.h"
@@ -163,13 +164,15 @@ int cpu_get_pic_interrupt(CPUX86State *env)
X86CPU *cpu = x86_env_get_cpu(env);
int intno;
- intno = apic_get_interrupt(cpu->apic_state);
- if (intno >= 0) {
- return intno;
- }
- /* read the irq from the PIC */
- if (!apic_accept_pic_intr(cpu->apic_state)) {
- return -1;
+ if (!kvm_irqchip_in_kernel()) {
+ intno = apic_get_interrupt(cpu->apic_state);
+ if (intno >= 0) {
+ return intno;
+ }
+ /* read the irq from the PIC */
+ if (!apic_accept_pic_intr(cpu->apic_state)) {
+ return -1;
+ }
}
intno = pic_read_irq(isa_pic);
@@ -182,7 +185,7 @@ static void pic_irq_request(void *opaque, int irq, int level)
X86CPU *cpu = X86_CPU(cs);
DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
- if (cpu->apic_state) {
+ if (cpu->apic_state && !kvm_irqchip_in_kernel()) {
CPU_FOREACH(cs) {
cpu = X86_CPU(cs);
if (apic_accept_pic_intr(cpu->apic_state)) {
@@ -532,9 +535,9 @@ static uint64_t port92_read(void *opaque, hwaddr addr,
return ret;
}
-static void port92_init(ISADevice *dev, qemu_irq *a20_out)
+static void port92_init(ISADevice *dev, qemu_irq a20_out)
{
- qdev_connect_gpio_out_named(DEVICE(dev), PORT92_A20_LINE, 0, *a20_out);
+ qdev_connect_gpio_out_named(DEVICE(dev), PORT92_A20_LINE, 0, a20_out);
}
static const VMStateDescription vmstate_port92_isa = {
@@ -743,20 +746,19 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
int i, j;
fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
/* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
*
- * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
- * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
- * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
- * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
- * may see".
+ * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for
+ * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table,
+ * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface
+ * for CPU hotplug also uses APIC ID and not "CPU index".
+ * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs",
+ * but the "limit to the APIC ID values SeaBIOS may see".
*
- * So, this means we must not use max_cpus, here, but the maximum possible
- * APIC ID value, plus one.
- *
- * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
- * the APIC ID, not the "CPU index"
+ * So for compatibility reasons with old BIOSes we are stuck with
+ * "etc/max-cpus" actually being apic_id_limit
*/
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
@@ -779,11 +781,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < pcms->apic_id_limit);
- for (j = 0; j < nb_numa_nodes; j++) {
- if (test_bit(i, numa_info[j].node_cpu)) {
- numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
- break;
- }
+ j = numa_get_node_for_cpu(i);
+ if (j < nb_numa_nodes) {
+ numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
}
}
for (i = 0; i < nb_numa_nodes; i++) {
@@ -1093,17 +1093,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
}
}
-static int pc_present_cpus_count(PCMachineState *pcms)
-{
- int i, boot_cpus = 0;
- for (i = 0; i < pcms->possible_cpus->len; i++) {
- if (pcms->possible_cpus->cpus[i].cpu) {
- boot_cpus++;
- }
- }
- return boot_cpus;
-}
-
static X86CPU *pc_new_cpu(const char *typename, int64_t apic_id,
Error **errp)
{
@@ -1196,12 +1185,6 @@ void pc_cpus_init(PCMachineState *pcms)
* This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
*/
pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
- if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
- error_report("max_cpus is too large. APIC ID of last CPU is %u",
- pcms->apic_id_limit - 1);
- exit(1);
- }
-
pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
sizeof(CPUArchId) * max_cpus);
for (i = 0; i < max_cpus; i++) {
@@ -1246,6 +1229,19 @@ static void pc_build_feature_control_file(PCMachineState *pcms)
fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
}
+static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count)
+{
+ if (cpus_count > 0xff) {
+ /* If the number of CPUs can't be represented in 8 bits, the
+ * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just
+ * to make old BIOSes fail more predictably.
+ */
+ rtc_set_memory(rtc, 0x5f, 0);
+ } else {
+ rtc_set_memory(rtc, 0x5f, cpus_count - 1);
+ }
+}
+
static
void pc_machine_done(Notifier *notifier, void *data)
{
@@ -1254,7 +1250,7 @@ void pc_machine_done(Notifier *notifier, void *data)
PCIBus *bus = pcms->bus;
/* set the number of CPUs */
- rtc_set_memory(pcms->rtc, 0x5f, pc_present_cpus_count(pcms) - 1);
+ rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
if (bus) {
int extra_hosts = 0;
@@ -1277,6 +1273,21 @@ void pc_machine_done(Notifier *notifier, void *data)
if (pcms->fw_cfg) {
pc_build_smbios(pcms->fw_cfg);
pc_build_feature_control_file(pcms);
+ /* update FW_CFG_NB_CPUS to account for -device added CPUs */
+ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
+ }
+
+ if (pcms->apic_id_limit > 255) {
+ IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+ if (!iommu || !iommu->x86_iommu.intr_supported ||
+ iommu->intr_eim != ON_OFF_AUTO_ON) {
+ error_report("current -smp configuration requires "
+ "Extended Interrupt Mode enabled. "
+ "You can add an IOMMU using: "
+ "-device intel-iommu,intremap=on,eim=on");
+ exit(EXIT_FAILURE);
+ }
}
}
@@ -1341,6 +1352,7 @@ void xen_load_linux(PCMachineState *pcms)
assert(MACHINE(pcms)->kernel_filename != NULL);
fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
rom_set_fw(fw_cfg);
load_linux(pcms, fw_cfg);
@@ -1595,12 +1607,12 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
pcspk_init(isa_bus, pit);
}
- serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+ serial_hds_isa_init(isa_bus, 0, MAX_SERIAL_PORTS);
parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
i8042 = isa_create_simple(isa_bus, "i8042");
- i8042_setup_a20_line(i8042, &a20_line[0]);
+ i8042_setup_a20_line(i8042, a20_line[0]);
if (!no_vmport) {
vmport_init(isa_bus);
vmmouse = isa_try_create(isa_bus, "vmmouse");
@@ -1613,7 +1625,8 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
qdev_init_nofail(dev);
}
port92 = isa_create_simple(isa_bus, "port92");
- port92_init(port92, &a20_line[1]);
+ port92_init(port92, a20_line[1]);
+ g_free(a20_line);
DMA_init(isa_bus, 0);
@@ -1705,6 +1718,10 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
goto out;
}
+ if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+ nvdimm_plug(&pcms->acpi_nvdimm_state);
+ }
+
hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
out:
@@ -1724,6 +1741,12 @@ static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev,
goto out;
}
+ if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+ error_setg(&local_err,
+ "nvdimm device hot unplug is not supported yet.");
+ goto out;
+ }
+
hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
@@ -1799,9 +1822,11 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
}
}
+ /* increment the number of CPUs */
+ pcms->boot_cpus++;
if (dev->hotplugged) {
- /* increment the number of CPUs */
- rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
+ rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
+ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
}
found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
@@ -1855,7 +1880,11 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
found_cpu->cpu = NULL;
object_unparent(OBJECT(dev));
- rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) - 1);
+ /* decrement the number of CPUs */
+ pcms->boot_cpus--;
+ /* Update the number of CPUs in CMOS */
+ rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
+ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
out:
error_propagate(errp, local_err);
}
@@ -2141,41 +2170,13 @@ static void pc_machine_initfn(Object *obj)
{
PCMachineState *pcms = PC_MACHINE(obj);
- object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
- pc_machine_get_hotplug_memory_region_size,
- NULL, NULL, NULL, &error_abort);
-
pcms->max_ram_below_4g = 0; /* use default */
- object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
- pc_machine_get_max_ram_below_4g,
- pc_machine_set_max_ram_below_4g,
- NULL, NULL, &error_abort);
- object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G,
- "Maximum ram below the 4G boundary (32bit boundary)",
- &error_abort);
-
pcms->smm = ON_OFF_AUTO_AUTO;
- object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto",
- pc_machine_get_smm,
- pc_machine_set_smm,
- NULL, NULL, &error_abort);
- object_property_set_description(obj, PC_MACHINE_SMM,
- "Enable SMM (pc & q35)",
- &error_abort);
-
pcms->vmport = ON_OFF_AUTO_AUTO;
- object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto",
- pc_machine_get_vmport,
- pc_machine_set_vmport,
- NULL, NULL, &error_abort);
- object_property_set_description(obj, PC_MACHINE_VMPORT,
- "Enable vmport (pc & q35)",
- &error_abort);
-
/* nvdimm is disabled on default. */
pcms->acpi_nvdimm_state.is_enabled = false;
- object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm,
- pc_machine_set_nvdimm, &error_abort);
+ /* acpi build is enabled by default if machine supports it */
+ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
}
static void pc_machine_reset(void)
@@ -2310,6 +2311,32 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
hc->unplug_request = pc_machine_device_unplug_request_cb;
hc->unplug = pc_machine_device_unplug_cb;
nc->nmi_monitor_handler = x86_nmi;
+
+ object_class_property_add(oc, PC_MACHINE_MEMHP_REGION_SIZE, "int",
+ pc_machine_get_hotplug_memory_region_size, NULL,
+ NULL, NULL, &error_abort);
+
+ object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
+ pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g,
+ NULL, NULL, &error_abort);
+
+ object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G,
+ "Maximum ram below the 4G boundary (32bit boundary)", &error_abort);
+
+ object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto",
+ pc_machine_get_smm, pc_machine_set_smm,
+ NULL, NULL, &error_abort);
+ object_class_property_set_description(oc, PC_MACHINE_SMM,
+ "Enable SMM (pc & q35)", &error_abort);
+
+ object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto",
+ pc_machine_get_vmport, pc_machine_set_vmport,
+ NULL, NULL, &error_abort);
+ object_class_property_set_description(oc, PC_MACHINE_VMPORT,
+ "Enable vmport (pc & q35)", &error_abort);
+
+ object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
+ pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
}
static const TypeInfo pc_machine_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 84241296a8..68835dde0b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -74,7 +74,6 @@ static void pc_init1(MachineState *machine,
ISABus *isa_bus;
PCII440FXState *i440fx_state;
int piix3_devfn = -1;
- qemu_irq *gsi;
qemu_irq *i8259;
qemu_irq smi_irq;
GSIState *gsi_state;
@@ -185,16 +184,16 @@ static void pc_init1(MachineState *machine,
gsi_state = g_malloc0(sizeof(*gsi_state));
if (kvm_ioapic_in_kernel()) {
kvm_pc_setup_irq_routing(pcmc->pci_enabled);
- gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
- GSI_NUM_PINS);
+ pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
+ GSI_NUM_PINS);
} else {
- gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+ pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
}
if (pcmc->pci_enabled) {
pci_bus = i440fx_init(host_type,
pci_type,
- &i440fx_state, &piix3_devfn, &isa_bus, gsi,
+ &i440fx_state, &piix3_devfn, &isa_bus, pcms->gsi,
system_memory, system_io, machine->ram_size,
pcms->below_4g_mem_size,
pcms->above_4g_mem_size,
@@ -207,7 +206,7 @@ static void pc_init1(MachineState *machine,
&error_abort);
no_hpet = 1;
}
- isa_bus_irqs(isa_bus, gsi);
+ isa_bus_irqs(isa_bus, pcms->gsi);
if (kvm_pic_in_kernel()) {
i8259 = kvm_i8259_init(isa_bus);
@@ -225,7 +224,7 @@ static void pc_init1(MachineState *machine,
ioapic_init_gsi(gsi_state, "i440fx");
}
- pc_register_ferr_irq(gsi[13]);
+ pc_register_ferr_irq(pcms->gsi[13]);
pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL);
@@ -235,7 +234,7 @@ static void pc_init1(MachineState *machine,
}
/* init basic PC hardware */
- pc_basic_device_init(isa_bus, gsi, &rtc_state, true,
+ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true,
(pcms->vmport != ON_OFF_AUTO_ON), 0x4);
pc_nic_init(isa_bus, pci_bus);
@@ -279,7 +278,7 @@ static void pc_init1(MachineState *machine,
smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);
/* TODO: Populate SPD eeprom data. */
smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
- gsi[9], smi_irq,
+ pcms->gsi[9], smi_irq,
pc_machine_is_smm_enabled(pcms),
&piix4_pm);
smbus_eeprom_init(smbus, 8, NULL, 0);
@@ -447,13 +446,25 @@ static void pc_i440fx_machine_options(MachineClass *m)
m->default_display = "std";
}
-static void pc_i440fx_2_7_machine_options(MachineClass *m)
+static void pc_i440fx_2_8_machine_options(MachineClass *m)
{
pc_i440fx_machine_options(m);
m->alias = "pc";
m->is_default = 1;
}
+DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8", NULL,
+ pc_i440fx_2_8_machine_options);
+
+
+static void pc_i440fx_2_7_machine_options(MachineClass *m)
+{
+ pc_i440fx_2_8_machine_options(m);
+ m->is_default = 0;
+ m->alias = NULL;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_7);
+}
+
DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL,
pc_i440fx_2_7_machine_options);
@@ -462,8 +473,6 @@ static void pc_i440fx_2_6_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_i440fx_2_7_machine_options(m);
- m->is_default = 0;
- m->alias = NULL;
pcmc->legacy_cpu_hotplug = true;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c0b9961928..b40d19ee00 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -69,7 +69,6 @@ static void pc_q35_init(MachineState *machine)
MemoryRegion *ram_memory;
GSIState *gsi_state;
ISABus *isa_bus;
- qemu_irq *gsi;
qemu_irq *i8259;
int i;
ICH9LPCState *ich9_lpc;
@@ -153,10 +152,10 @@ static void pc_q35_init(MachineState *machine)
gsi_state = g_malloc0(sizeof(*gsi_state));
if (kvm_ioapic_in_kernel()) {
kvm_pc_setup_irq_routing(pcmc->pci_enabled);
- gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
- GSI_NUM_PINS);
+ pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
+ GSI_NUM_PINS);
} else {
- gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+ pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
}
/* create pci host bus */
@@ -195,7 +194,7 @@ static void pc_q35_init(MachineState *machine)
ich9_lpc = ICH9_LPC_DEVICE(lpc);
lpc_dev = DEVICE(lpc);
for (i = 0; i < GSI_NUM_PINS; i++) {
- qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, gsi[i]);
+ qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]);
}
pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
ICH9_LPC_NB_PIRQS);
@@ -213,11 +212,13 @@ static void pc_q35_init(MachineState *machine)
for (i = 0; i < ISA_NUM_IRQS; i++) {
gsi_state->i8259_irq[i] = i8259[i];
}
+ g_free(i8259);
+
if (pcmc->pci_enabled) {
ioapic_init_gsi(gsi_state, "q35");
}
- pc_register_ferr_irq(gsi[13]);
+ pc_register_ferr_irq(pcms->gsi[13]);
assert(pcms->vmport != ON_OFF_AUTO__MAX);
if (pcms->vmport == ON_OFF_AUTO_AUTO) {
@@ -225,7 +226,7 @@ static void pc_q35_init(MachineState *machine)
}
/* init basic PC hardware */
- pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy,
+ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy,
(pcms->vmport != ON_OFF_AUTO_ON), 0xff0104);
/* connect pm stuff to lpc */
@@ -290,14 +291,26 @@ static void pc_q35_machine_options(MachineClass *m)
m->default_display = "std";
m->no_floppy = 1;
m->has_dynamic_sysbus = true;
+ m->max_cpus = 288;
}
-static void pc_q35_2_7_machine_options(MachineClass *m)
+static void pc_q35_2_8_machine_options(MachineClass *m)
{
pc_q35_machine_options(m);
m->alias = "q35";
}
+DEFINE_Q35_MACHINE(v2_8, "pc-q35-2.8", NULL,
+ pc_q35_2_8_machine_options);
+
+static void pc_q35_2_7_machine_options(MachineClass *m)
+{
+ pc_q35_2_8_machine_options(m);
+ m->alias = NULL;
+ m->max_cpus = 255;
+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_7);
+}
+
DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL,
pc_q35_2_7_machine_options);
@@ -305,7 +318,6 @@ static void pc_q35_2_6_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_2_7_machine_options(m);
- m->alias = NULL;
pcmc->legacy_cpu_hotplug = true;
SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 7735e46eaf..d2b497327e 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -7,9 +7,34 @@ xen_platform_log(char *s) "xen platform: %s"
xen_pv_mmio_read(uint64_t addr) "WARNING: read from Xen PV Device MMIO space (address %"PRIx64")"
xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (address %"PRIx64")"
-# hw/i386/pc.c
-mhp_pc_dimm_assigned_slot(int slot) "0x%d"
-mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
-
# hw/i386/x86-iommu.c
x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
+
+# hw/i386/amd_iommu.c
+amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32
+amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64
+amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64
+amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64
+amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64
+amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32
+amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64
+amdvi_unhandled_command(uint8_t type) "unhandled command 0x%"PRIx8
+amdvi_intr_inval(void) "Interrupt table invalidated"
+amdvi_iotlb_inval(void) "IOTLB pages invalidated"
+amdvi_prefetch_pages(void) "Pre-fetch of AMD-Vi pages requested"
+amdvi_pages_inval(uint16_t domid) "AMD-Vi pages for domain 0x%"PRIx16 " invalidated"
+amdvi_all_inval(void) "Invalidation of all AMD-Vi cache requested "
+amdvi_ppr_exec(void) "Execution of PPR queue requested "
+amdvi_devtab_inval(uint8_t bus, uint8_t slot, uint8_t func) "device table entry for devid: %02x:%02x.%x invalidated"
+amdvi_completion_wait(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_control_status(uint64_t val) "MMIO_STATUS state 0x%"PRIx64
+amdvi_iotlb_reset(void) "IOTLB exceed size limit - reset "
+amdvi_completion_wait_exec(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_dte_get_fail(uint64_t addr, uint32_t offset) "error: failed to access Device Entry devtab 0x%"PRIx64" offset 0x%"PRIx32
+amdvi_invalid_dte(uint64_t addr) "PTE entry at 0x%"PRIx64" is invalid "
+amdvi_get_pte_hwerror(uint64_t addr) "hardware error eccessing PTE at addr 0x%"PRIx64
+amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PRIx8" translating addr 0x%"PRIx64
+amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64
+amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index ce26b2a71d..2278af7c32 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -71,6 +71,11 @@ X86IOMMUState *x86_iommu_get_default(void)
return x86_iommu_default;
}
+IommuType x86_iommu_get_type(void)
+{
+ return x86_iommu_default->type;
+}
+
static void x86_iommu_realize(DeviceState *dev, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
@@ -79,6 +84,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
if (x86_class->realize) {
x86_class->realize(dev, errp);
}
+
x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
}
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 21d68ee04b..55769eba7e 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -68,6 +68,11 @@ static void xen_apic_external_nmi(APICCommonState *s)
{
}
+static void xen_send_msi(MSIMessage *msi)
+{
+ xen_hvm_inject_msi(msi->address, msi->data);
+}
+
static void xen_apic_class_init(ObjectClass *klass, void *data)
{
APICCommonClass *k = APIC_COMMON_CLASS(klass);
@@ -78,6 +83,7 @@ static void xen_apic_class_init(ObjectClass *klass, void *data)
k->get_tpr = xen_apic_get_tpr;
k->vapic_base_update = xen_apic_vapic_base_update;
k->external_nmi = xen_apic_external_nmi;
+ k->send_msi = xen_send_msi;
}
static const TypeInfo xen_apic_info = {
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index aa7839324c..2e1e543881 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -114,6 +114,10 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
PCI_CLASS_STORAGE_IDE
&& strcmp(d->name, "xen-pci-passthrough") != 0) {
pci_piix3_xen_ide_unplug(DEVICE(d));
+ } else if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
+ PCI_CLASS_STORAGE_SCSI
+ && strcmp(d->name, "xen-pci-passthrough") != 0) {
+ object_unparent(OBJECT(d));
}
}
@@ -134,8 +138,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
devices, and bit 2 the non-primary-master IDE devices. */
if (val & UNPLUG_ALL_IDE_DISKS) {
DPRINTF("unplug disks\n");
- blk_drain_all();
- blk_flush_all();
pci_unplug_disks(pci_dev->bus);
}
if (val & UNPLUG_ALL_NICS) {
@@ -309,13 +311,38 @@ static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
uint64_t val, unsigned int size)
{
PCIXenPlatformState *s = opaque;
+ PCIDevice *pci_dev = PCI_DEVICE(s);
switch (addr) {
case 0: /* Platform flags */
platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val);
break;
+ case 4:
+ if (val == 1) {
+ /*
+ * SUSE unplug for Xenlinux
+ * xen-kmp used this since xen-3.0.4, instead the official protocol
+ * from xen-3.3+ It did an unconditional "outl(1, (ioaddr + 4));"
+ * Pre VMDP 1.7 used 4 and 8 depending on how VMDP was configured.
+ * If VMDP was to control both disk and LAN it would use 4.
+ * If it controlled just disk or just LAN, it would use 8 below.
+ */
+ pci_unplug_disks(pci_dev->bus);
+ pci_unplug_nics(pci_dev->bus);
+ }
+ break;
case 8:
- log_writeb(s, (uint32_t)val);
+ switch (val) {
+ case 1:
+ pci_unplug_disks(pci_dev->bus);
+ break;
+ case 2:
+ pci_unplug_nics(pci_dev->bus);
+ break;
+ default:
+ log_writeb(s, (uint32_t)val);
+ break;
+ }
break;
default:
break;