summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-03-15 12:22:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-03-15 12:22:10 -0700
commit11e8c7e9471cf8e6ae6ec7324a3174191cd965e3 (patch)
tree4e18e18bf7c1ff66b5f5a0d609766215ecc56472 /tools
parent4f3df2e5ea69f5717d2721922aff263c31957548 (diff)
parentd2ea4ff1ce50787a98a3900b3fb1636f3620b7cf (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "Quite a large pull request, partly due to skipping last week and therefore having material from ~all submaintainers in this one. About a fourth of it is a new selftest, and a couple more changes are large in number of files touched (fixing a -Wflex-array-member-not-at-end compiler warning) or lines changed (reformatting of a table in the API documentation, thanks rST). But who am I kidding---it's a lot of commits and there are a lot of bugs being fixed here, some of them on the nastier side like the RISC-V ones. ARM: - Correctly handle deactivation of interrupts that were activated from LRs. Since EOIcount only denotes deactivation of interrupts that are not present in an LR, start EOIcount deactivation walk *after* the last irq that made it into an LR - Avoid calling into the stubs to probe for ICH_VTR_EL2.TDS when pKVM is already enabled -- not only thhis isn't possible (pKVM will reject the call), but it is also useless: this can only happen for a CPU that has already booted once, and the capability will not change - Fix a couple of low-severity bugs in our S2 fault handling path, affecting the recently introduced LS64 handling and the even more esoteric handling of hwpoison in a nested context - Address yet another syzkaller finding in the vgic initialisation, where we would end-up destroying an uninitialised vgic with nasty consequences - Address an annoying case of pKVM failing to boot when some of the memblock regions that the host is faulting in are not page-aligned - Inject some sanity in the NV stage-2 walker by checking the limits against the advertised PA size, and correctly report the resulting faults PPC: - Fix a PPC e500 build error due to a long-standing wart that was exposed by the recent conversion to kmalloc_obj(); rip out all the ugliness that led to the wart RISC-V: - Prevent speculative out-of-bounds access using array_index_nospec() in APLIC interrupt handling, ONE_REG regiser access, AIA CSR access, float register access, and PMU counter access - Fix potential use-after-free issues in kvm_riscv_gstage_get_leaf(), kvm_riscv_aia_aplic_has_attr(), and kvm_riscv_aia_imsic_has_attr() - Fix potential null pointer dereference in kvm_riscv_vcpu_aia_rmw_topei() - Fix off-by-one array access in SBI PMU - Skip THP support check during dirty logging - Fix error code returned for Smstateen and Ssaia ONE_REG interface - Check host Ssaia extension when creating AIA irqchip x86: - Fix cases where CPUID mitigation features were incorrectly marked as available whenever the kernel used scattered feature words for them - Validate _all_ GVAs, rather than just the first GVA, when processing a range of GVAs for Hyper-V's TLB flush hypercalls - Fix a brown paper bug in add_atomic_switch_msr() - Use hlist_for_each_entry_srcu() when traversing mask_notifier_list, to fix a lockdep warning; KVM doesn't hold RCU, just irq_srcu - Ensure AVIC VMCB fields are initialized if the VM has an in-kernel local APIC (and AVIC is enabled at the module level) - Update CR8 write interception when AVIC is (de)activated, to fix a bug where the guest can run in perpetuity with the CR8 intercept enabled - Add a quirk to skip the consistency check on FREEZE_IN_SMM, i.e. to allow L1 hypervisors to set FREEZE_IN_SMM. This reverts (by default) an unintentional tightening of userspace ABI in 6.17, and provides some amount of backwards compatibility with hypervisors who want to freeze PMCs on VM-Entry - Validate the VMCS/VMCB on return to a nested guest from SMM, because either userspace or the guest could stash invalid values in memory and trigger the processor's consistency checks Generic: - Remove a subtle pseudo-overlay of kvm_stats_desc, which, aside from being unnecessary and confusing, triggered compiler warnings due to -Wflex-array-member-not-at-end - Document that vcpu->mutex is take outside of kvm->slots_lock and kvm->slots_arch_lock, which is intentional and desirable despite being rather unintuitive Selftests: - Increase the maximum number of NUMA nodes in the guest_memfd selftest to 64 (from 8)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (43 commits) KVM: selftests: Verify SEV+ guests can read and write EFER, CR0, CR4, and CR8 Documentation: kvm: fix formatting of the quirks table KVM: x86: clarify leave_smm() return value selftests: kvm: add a test that VMX validates controls on RSM selftests: kvm: extract common functionality out of smm_test.c KVM: SVM: check validity of VMCB controls when returning from SMM KVM: VMX: check validity of VMCS controls when returning from SMM KVM: SVM: Set/clear CR8 write interception when AVIC is (de)activated KVM: SVM: Initialize AVIC VMCB fields if AVIC is enabled with in-kernel APIC KVM: x86: Introduce KVM_X86_QUIRK_VMCS12_ALLOW_FREEZE_IN_SMM KVM: x86: Fix SRCU list traversal in kvm_fire_mask_notifiers() KVM: VMX: Fix a wrong MSR update in add_atomic_switch_msr() KVM: x86: hyper-v: Validate all GVAs during PV TLB flush KVM: x86: synthesize CPUID bits only if CPU capability is set KVM: PPC: e500: Rip out "struct tlbe_ref" KVM: PPC: e500: Fix build error due to using kmalloc_obj() with wrong type KVM: selftests: Increase 'maxnode' for guest_memfd tests KVM: arm64: pkvm: Don't reprobe for ICH_VTR_EL2.TDS on CPU hotplug KVM: arm64: vgic: Pick EOIcount deactivations from AP-list tail KVM: arm64: Remove the redundant ISB in __kvm_at_s1e2() ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c2
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h23
-rw-r--r--tools/testing/selftests/kvm/include/x86/smm.h17
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c26
-rw-r--r--tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c150
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c30
-rw-r--r--tools/testing/selftests/kvm/x86/smm_test.c27
8 files changed, 250 insertions, 26 deletions
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index fdec90e85467..dc68371f76a3 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 618c937f3c90..cc329b57ce2e 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -80,7 +80,7 @@ static void test_mbind(int fd, size_t total_size)
{
const unsigned long nodemask_0 = 1; /* nid: 0 */
unsigned long nodemask = 0;
- unsigned long maxnode = 8;
+ unsigned long maxnode = BITS_PER_TYPE(nodemask);
int policy;
char *mem;
int ret;
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 4ebae4269e68..469a22122157 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -557,6 +557,11 @@ static inline uint64_t get_cr0(void)
return cr0;
}
+static inline void set_cr0(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
+}
+
static inline uint64_t get_cr3(void)
{
uint64_t cr3;
@@ -566,6 +571,11 @@ static inline uint64_t get_cr3(void)
return cr3;
}
+static inline void set_cr3(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory");
+}
+
static inline uint64_t get_cr4(void)
{
uint64_t cr4;
@@ -580,6 +590,19 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline uint64_t get_cr8(void)
+{
+ uint64_t cr8;
+
+ __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8));
+ return cr8;
+}
+
+static inline void set_cr8(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory");
+}
+
static inline void set_idt(const struct desc_ptr *idt_desc)
{
__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h
new file mode 100644
index 000000000000..19337c34f13e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/smm.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef SELFTEST_KVM_SMM_H
+#define SELFTEST_KVM_SMM_H
+
+#include "kvm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t smram_gpa,
+ const void *smi_handler, size_t handler_size);
+
+void inject_smi(struct kvm_vcpu *vcpu);
+
+#endif /* SELFTEST_KVM_SMM_H */
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index fab18e9be66c..23a44941e283 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -8,6 +8,7 @@
#include "kvm_util.h"
#include "pmu.h"
#include "processor.h"
+#include "smm.h"
#include "svm_util.h"
#include "sev.h"
#include "vmx.h"
@@ -1444,3 +1445,28 @@ bool kvm_arch_has_default_irqchip(void)
{
return true;
}
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t smram_gpa,
+ const void *smi_handler, size_t handler_size)
+{
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa,
+ SMRAM_MEMSLOT) == smram_gpa,
+ "Could not allocate guest physical addresses for SMRAM");
+
+ memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+ vcpu_events_set(vcpu, &events);
+}
diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
new file mode 100644
index 000000000000..af7c90103396
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Red Hat, Inc.
+ *
+ * Test that vmx_leave_smm() validates vmcs12 controls before re-entering
+ * nested guest mode on RSM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "smm.h"
+#include "hyperv.h"
+#include "vmx.h"
+
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define SYNC_PORT 0xe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+/*
+ * SMI handler: runs in real-address mode.
+ * Reports SMRAM_STAGE via port IO, then does RSM.
+ */
+static uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n"
+ : "+a" (phase));
+}
+
+static void l2_guest_code(void)
+{
+ sync_with_host(1);
+
+ /* After SMI+RSM with invalid controls, we should not reach here. */
+ vmcall();
+}
+
+static void guest_code(struct vmx_pages *vmx_pages,
+ struct hyperv_test_pages *hv_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Set up Hyper-V enlightenments and eVMCS */
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 exits via vmcall if test fails */
+ sync_with_host(2);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ struct hyperv_test_pages *hv;
+ struct hv_enlightened_vmcs *evmcs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_regs regs;
+ int stage_reported;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
+
+ vcpu_run(vcpu);
+
+ /* L2 is running and syncs with host. */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == 1,
+ "Expected stage 1, got %d", stage_reported);
+
+ /* Inject SMI while L2 is running. */
+ inject_smi(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == SMRAM_STAGE,
+ "Expected SMM handler stage %#x, got %#x",
+ SMRAM_STAGE, stage_reported);
+
+ /*
+ * Guest is now paused in the SMI handler, about to execute RSM.
+ * Hack the eVMCS page to set-up invalid pin-based execution
+ * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING).
+ */
+ evmcs = hv->enlightened_vmcs_hva;
+ evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS;
+ evmcs->hv_clean_fields = 0;
+
+ /*
+ * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE,
+ * copying the invalid pin_based_vm_exec_control into cached_vmcs12.
+ */
+ union {
+ struct kvm_nested_state state;
+ char state_[16384];
+ } nested_state_buf;
+
+ memset(&nested_state_buf, 0, sizeof(nested_state_buf));
+ nested_state_buf.state.size = sizeof(nested_state_buf);
+ vcpu_nested_state_get(vcpu, &nested_state_buf.state);
+
+ /*
+ * Resume the guest. The SMI handler executes RSM, which calls
+ * vmx_leave_smm(). nested_vmx_check_controls() should detect
+ * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault.
+ */
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index 86ad1c7d068f..8bd37a476f15 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -13,6 +13,30 @@
#include "linux/psp-sev.h"
#include "sev.h"
+static void guest_sev_test_msr(uint32_t msr)
+{
+ uint64_t val = rdmsr(msr);
+
+ wrmsr(msr, val);
+ GUEST_ASSERT(val == rdmsr(msr));
+}
+
+#define guest_sev_test_reg(reg) \
+do { \
+ uint64_t val = get_##reg(); \
+ \
+ set_##reg(val); \
+ GUEST_ASSERT(val == get_##reg()); \
+} while (0)
+
+static void guest_sev_test_regs(void)
+{
+ guest_sev_test_msr(MSR_EFER);
+ guest_sev_test_reg(cr0);
+ guest_sev_test_reg(cr3);
+ guest_sev_test_reg(cr4);
+ guest_sev_test_reg(cr8);
+}
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
@@ -24,6 +48,8 @@ static void guest_snp_code(void)
GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+ guest_sev_test_regs();
+
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
vmgexit();
}
@@ -34,6 +60,8 @@ static void guest_sev_es_code(void)
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+ guest_sev_test_regs();
+
/*
* TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
* force "termination" to signal "done" via the GHCB MSR protocol.
@@ -47,6 +75,8 @@ static void guest_sev_code(void)
GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ guest_sev_test_regs();
+
GUEST_DONE();
}
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..ade8412bf94a 100644
--- a/tools/testing/selftests/kvm/x86/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -14,13 +14,11 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "smm.h"
#include "vmx.h"
#include "svm_util.h"
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
#define SMRAM_GPA 0x1000000
#define SMRAM_STAGE 0xfe
@@ -113,18 +111,6 @@ static void guest_code(void *arg)
sync_with_host(DONE);
}
-void inject_smi(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- events.smi.pending = 1;
- events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
- vcpu_events_set(vcpu, &events);
-}
-
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
@@ -140,16 +126,7 @@ int main(int argc, char *argv[])
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
- SMRAM_MEMSLOT, SMRAM_PAGES, 0);
- TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
- == SMRAM_GPA, "could not allocate guest physical addresses?");
-
- memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
- memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
- sizeof(smi_handler));
-
- vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
if (kvm_cpu_has(X86_FEATURE_SVM))