diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-09 19:08:17 +0100 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-11 12:45:12 -0500 |
| commit | 9123c5f956b1fbedd63821eb528ece55ddd0e49c (patch) | |
| tree | efdc67b3e97c7df743b18d5d80182ebd8d101378 /virt | |
| parent | 54f15ebfc61ee8499a97f2dbfc18b1b13fdcb524 (diff) | |
| parent | 2a62345b30529e488beb6a1220577b3495933724 (diff) | |
Merge tag 'kvm-x86-gmem-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM guest_memfd changes for 6.20
- Remove kvm_gmem_populate()'s preparation tracking and half-baked hugepage
handling, and instead rely on SNP (the only user of the tracking) to do its
own tracking via the RMP.
- Retroactively document and enforce (for SNP) that KVM_SEV_SNP_LAUNCH_UPDATE
and KVM_TDX_INIT_MEM_REGION require the source page to be 4KiB aligned, to
avoid non-trivial complexity for a non-existent usecase (and because
in-place conversion simply can't support unaligned sources).
- When populating guest_memfd memory, GUP the source page in common code and
pass the refcounted page to the vendor callback, instead of letting vendor
code do the heavy lifting. Doing so avoids a looming deadlock bug with
in-place due an AB-BA conflict betwee mmap_lock and guest_memfd's filemap
invalidate lock.
Diffstat (limited to 'virt')
| -rw-r--r-- | virt/kvm/guest_memfd.c | 139 |
1 files changed, 76 insertions, 63 deletions
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fdaea3422c30..923c51a3a525 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -76,11 +76,6 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo return 0; } -static inline void kvm_gmem_mark_prepared(struct folio *folio) -{ - folio_mark_uptodate(folio); -} - /* * Process @folio, which contains @gfn, so that the guest can use it. * The folio must be locked and the gfn must be contained in @slot. @@ -90,13 +85,7 @@ static inline void kvm_gmem_mark_prepared(struct folio *folio) static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, struct folio *folio) { - unsigned long nr_pages, i; pgoff_t index; - int r; - - nr_pages = folio_nr_pages(folio); - for (i = 0; i < nr_pages; i++) - clear_highpage(folio_page(folio, i)); /* * Preparing huge folios should always be safe, since it should @@ -114,11 +103,8 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, folio_nr_pages(folio))); index = kvm_gmem_get_index(slot, gfn); index = ALIGN_DOWN(index, folio_nr_pages(folio)); - r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); - if (!r) - kvm_gmem_mark_prepared(folio); - return r; + return __kvm_gmem_prepare_folio(kvm, slot, index, folio); } /* @@ -151,6 +137,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) mapping_gfp_mask(inode->i_mapping), policy); mpol_cond_put(policy); + /* + * External interfaces like kvm_gmem_get_pfn() support dealing + * with hugepages to a degree, but internally, guest_memfd currently + * assumes that all folios are order-0 and handling would need + * to be updated for anything otherwise (e.g. page-clearing + * operations). + */ + WARN_ON_ONCE(!IS_ERR(folio) && folio_order(folio)); + return folio; } @@ -420,7 +415,7 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (!folio_test_uptodate(folio)) { clear_highpage(folio_page(folio, 0)); - kvm_gmem_mark_prepared(folio); + folio_mark_uptodate(folio); } vmf->page = folio_file_page(folio, vmf->pgoff); @@ -757,7 +752,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) static struct folio *__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, pgoff_t index, kvm_pfn_t *pfn, - bool *is_prepared, int *max_order) + int *max_order) { struct file *slot_file = READ_ONCE(slot->gmem.file); struct gmem_file *f = file->private_data; @@ -787,7 +782,6 @@ static struct folio *__kvm_gmem_get_pfn(struct file *file, if (max_order) *max_order = 0; - *is_prepared = folio_test_uptodate(folio); return folio; } @@ -797,19 +791,22 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, { pgoff_t index = kvm_gmem_get_index(slot, gfn); struct folio *folio; - bool is_prepared = false; int r = 0; CLASS(gmem_get_file, file)(slot); if (!file) return -EFAULT; - folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order); + folio = __kvm_gmem_get_pfn(file, slot, index, pfn, max_order); if (IS_ERR(folio)) return PTR_ERR(folio); - if (!is_prepared) - r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + if (!folio_test_uptodate(folio)) { + clear_highpage(folio_page(folio, 0)); + folio_mark_uptodate(folio); + } + + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); folio_unlock(folio); @@ -823,13 +820,49 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_pfn); #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE + +static long __kvm_gmem_populate(struct kvm *kvm, struct kvm_memory_slot *slot, + struct file *file, gfn_t gfn, struct page *src_page, + kvm_gmem_populate_cb post_populate, void *opaque) +{ + pgoff_t index = kvm_gmem_get_index(slot, gfn); + struct folio *folio; + kvm_pfn_t pfn; + int ret; + + filemap_invalidate_lock(file->f_mapping); + + folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, NULL); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + goto out_unlock; + } + + folio_unlock(folio); + + if (!kvm_range_has_memory_attributes(kvm, gfn, gfn + 1, + KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + ret = -EINVAL; + goto out_put_folio; + } + + ret = post_populate(kvm, gfn, pfn, src_page, opaque); + if (!ret) + folio_mark_uptodate(folio); + +out_put_folio: + folio_put(folio); +out_unlock: + filemap_invalidate_unlock(file->f_mapping); + return ret; +} + long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { struct kvm_memory_slot *slot; - void __user *p; - - int ret = 0, max_order; + int ret = 0; long i; lockdep_assert_held(&kvm->slots_lock); @@ -837,6 +870,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long if (WARN_ON_ONCE(npages <= 0)) return -EINVAL; + if (WARN_ON_ONCE(!PAGE_ALIGNED(src))) + return -EINVAL; + slot = gfn_to_memslot(kvm, start_gfn); if (!kvm_slot_has_gmem(slot)) return -EINVAL; @@ -845,60 +881,37 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long if (!file) return -EFAULT; - filemap_invalidate_lock(file->f_mapping); - npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); - for (i = 0; i < npages; i += (1 << max_order)) { - struct folio *folio; - gfn_t gfn = start_gfn + i; - pgoff_t index = kvm_gmem_get_index(slot, gfn); - bool is_prepared = false; - kvm_pfn_t pfn; + for (i = 0; i < npages; i++) { + struct page *src_page = NULL; if (signal_pending(current)) { ret = -EINTR; break; } - folio = __kvm_gmem_get_pfn(file, slot, index, &pfn, &is_prepared, &max_order); - if (IS_ERR(folio)) { - ret = PTR_ERR(folio); - break; - } + if (src) { + unsigned long uaddr = (unsigned long)src + i * PAGE_SIZE; - if (is_prepared) { - folio_unlock(folio); - folio_put(folio); - ret = -EEXIST; - break; + ret = get_user_pages_fast(uaddr, 1, 0, &src_page); + if (ret < 0) + break; + if (ret != 1) { + ret = -ENOMEM; + break; + } } - folio_unlock(folio); - WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || - (npages - i) < (1 << max_order)); + ret = __kvm_gmem_populate(kvm, slot, file, start_gfn + i, src_page, + post_populate, opaque); - ret = -EINVAL; - while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), - KVM_MEMORY_ATTRIBUTE_PRIVATE, - KVM_MEMORY_ATTRIBUTE_PRIVATE)) { - if (!max_order) - goto put_folio_and_exit; - max_order--; - } - - p = src ? src + i * PAGE_SIZE : NULL; - ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); - if (!ret) - kvm_gmem_mark_prepared(folio); + if (src_page) + put_page(src_page); -put_folio_and_exit: - folio_put(folio); if (ret) break; } - filemap_invalidate_unlock(file->f_mapping); - return ret && !i ? ret : i; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate); |
