summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-09 19:08:17 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-11 12:45:12 -0500
commit9123c5f956b1fbedd63821eb528ece55ddd0e49c (patch)
treeefdc67b3e97c7df743b18d5d80182ebd8d101378 /virt
parent54f15ebfc61ee8499a97f2dbfc18b1b13fdcb524 (diff)
parent2a62345b30529e488beb6a1220577b3495933724 (diff)
Merge tag 'kvm-x86-gmem-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM guest_memfd changes for 6.20 - Remove kvm_gmem_populate()'s preparation tracking and half-baked hugepage handling, and instead rely on SNP (the only user of the tracking) to do its own tracking via the RMP. - Retroactively document and enforce (for SNP) that KVM_SEV_SNP_LAUNCH_UPDATE and KVM_TDX_INIT_MEM_REGION require the source page to be 4KiB aligned, to avoid non-trivial complexity for a non-existent usecase (and because in-place conversion simply can't support unaligned sources). - When populating guest_memfd memory, GUP the source page in common code and pass the refcounted page to the vendor callback, instead of letting vendor code do the heavy lifting. Doing so avoids a looming deadlock bug with in-place due an AB-BA conflict betwee mmap_lock and guest_memfd's filemap invalidate lock.
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/guest_memfd.c139
1 files changed, 76 insertions, 63 deletions
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index fdaea3422c30..923c51a3a525 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -76,11 +76,6 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
return 0;
}
-static inline void kvm_gmem_mark_prepared(struct folio *folio)
-{
- folio_mark_uptodate(folio);
-}
-
/*
* Process @folio, which contains @gfn, so that the guest can use it.
* The folio must be locked and the gfn must be contained in @slot.
@@ -90,13 +85,7 @@ static inline void kvm_gmem_mark_prepared(struct folio *folio)
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, struct folio *folio)
{
- unsigned long nr_pages, i;
pgoff_t index;
- int r;
-
- nr_pages = folio_nr_pages(folio);
- for (i = 0; i < nr_pages; i++)
- clear_highpage(folio_page(folio, i));
/*
* Preparing huge folios should always be safe, since it should
@@ -114,11 +103,8 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, folio_nr_pages(folio)));
index = kvm_gmem_get_index(slot, gfn);
index = ALIGN_DOWN(index, folio_nr_pages(folio));
- r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
- if (!r)
- kvm_gmem_mark_prepared(folio);
- return r;
+ return __kvm_gmem_prepare_folio(kvm, slot, index, folio);
}
/*
@@ -151,6 +137,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
mapping_gfp_mask(inode->i_mapping), policy);
mpol_cond_put(policy);
+ /*
+ * External interfaces like kvm_gmem_get_pfn() support dealing
+ * with hugepages to a degree, but internally, guest_memfd currently
+ * assumes that all folios are order-0 and handling would need
+ * to be updated for anything otherwise (e.g. page-clearing
+ * operations).
+ */
+ WARN_ON_ONCE(!IS_ERR(folio) && folio_order(folio));
+
return folio;
}
@@ -420,7 +415,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
if (!folio_test_uptodate(folio)) {
clear_highpage(folio_page(folio, 0));
- kvm_gmem_mark_prepared(folio);
+ folio_mark_uptodate(folio);
}
vmf->page = folio_file_page(folio, vmf->pgoff);
@@ -757,7 +752,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
static struct folio *__kvm_gmem_get_pfn(struct file *file,
struct kvm_memory_slot *slot,
pgoff_t index, kvm_pfn_t *pfn,
- bool *is_prepared, int *max_order)
+ int *max_order)
{
struct file *slot_file = READ_ONCE(slot->gmem.file);
struct gmem_file *f = file->private_data;
@@ -787,7 +782,6 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file,
if (max_order)
*max_order = 0;
- *is_prepared = folio_test_uptodate(folio);
return folio;
}
@@ -797,19 +791,22 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
{
pgoff_t index = kvm_gmem_get_index(slot, gfn);
struct folio *folio;
- bool is_prepared = false;
int r = 0;
CLASS(gmem_get_file, file)(slot);
if (!file)
return -EFAULT;
- folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order);
+ folio = __kvm_gmem_get_pfn(file, slot, index, pfn, max_order);
if (IS_ERR(folio))
return PTR_ERR(folio);
- if (!is_prepared)
- r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
+ if (!folio_test_uptodate(folio)) {
+ clear_highpage(folio_page(folio, 0));
+ folio_mark_uptodate(folio);
+ }
+
+ r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
folio_unlock(folio);
@@ -823,13 +820,49 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn);
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
+
+static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct file *file, gfn_t gfn, struct page *src_page,
+ kvm_gmem_populate_cb post_populate, void *opaque)
+{
+ pgoff_t index = kvm_gmem_get_index(slot, gfn);
+ struct folio *folio;
+ kvm_pfn_t pfn;
+ int ret;
+
+ filemap_invalidate_lock(file->f_mapping);
+
+ folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
+ goto out_unlock;
+ }
+
+ folio_unlock(folio);
+
+ if (!kvm_range_has_memory_attributes(kvm, gfn, gfn + 1,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
+ ret = -EINVAL;
+ goto out_put_folio;
+ }
+
+ ret = post_populate(kvm, gfn, pfn, src_page, opaque);
+ if (!ret)
+ folio_mark_uptodate(folio);
+
+out_put_folio:
+ folio_put(folio);
+out_unlock:
+ filemap_invalidate_unlock(file->f_mapping);
+ return ret;
+}
+
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
{
struct kvm_memory_slot *slot;
- void __user *p;
-
- int ret = 0, max_order;
+ int ret = 0;
long i;
lockdep_assert_held(&kvm->slots_lock);
@@ -837,6 +870,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
if (WARN_ON_ONCE(npages <= 0))
return -EINVAL;
+ if (WARN_ON_ONCE(!PAGE_ALIGNED(src)))
+ return -EINVAL;
+
slot = gfn_to_memslot(kvm, start_gfn);
if (!kvm_slot_has_gmem(slot))
return -EINVAL;
@@ -845,60 +881,37 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
if (!file)
return -EFAULT;
- filemap_invalidate_lock(file->f_mapping);
-
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
- for (i = 0; i < npages; i += (1 << max_order)) {
- struct folio *folio;
- gfn_t gfn = start_gfn + i;
- pgoff_t index = kvm_gmem_get_index(slot, gfn);
- bool is_prepared = false;
- kvm_pfn_t pfn;
+ for (i = 0; i < npages; i++) {
+ struct page *src_page = NULL;
if (signal_pending(current)) {
ret = -EINTR;
break;
}
- folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, &max_order);
- if (IS_ERR(folio)) {
- ret = PTR_ERR(folio);
- break;
- }
+ if (src) {
+ unsigned long uaddr = (unsigned long)src + i * PAGE_SIZE;
- if (is_prepared) {
- folio_unlock(folio);
- folio_put(folio);
- ret = -EEXIST;
- break;
+ ret = get_user_pages_fast(uaddr, 1, 0, &src_page);
+ if (ret < 0)
+ break;
+ if (ret != 1) {
+ ret = -ENOMEM;
+ break;
+ }
}
- folio_unlock(folio);
- WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
- (npages - i) < (1 << max_order));
+ ret = __kvm_gmem_populate(kvm, slot, file, start_gfn + i, src_page,
+ post_populate, opaque);
- ret = -EINVAL;
- while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
- KVM_MEMORY_ATTRIBUTE_PRIVATE,
- KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
- if (!max_order)
- goto put_folio_and_exit;
- max_order--;
- }
-
- p = src ? src + i * PAGE_SIZE : NULL;
- ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
- if (!ret)
- kvm_gmem_mark_prepared(folio);
+ if (src_page)
+ put_page(src_page);
-put_folio_and_exit:
- folio_put(folio);
if (ret)
break;
}
- filemap_invalidate_unlock(file->f_mapping);
-
return ret && !i ? ret : i;
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate);