summaryrefslogtreecommitdiff
path: root/arch/s390/kvm/gmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/gmap.c')
-rw-r--r--arch/s390/kvm/gmap.c160
1 files changed, 114 insertions, 46 deletions
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index ef0c6ebfdde2..645c32c767d2 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
struct clear_young_pte_priv *priv = walk->priv;
union crste crste, new;
- crste = READ_ONCE(*crstep);
+ do {
+ crste = READ_ONCE(*crstep);
+
+ if (!crste.h.fc)
+ return 0;
+ if (!crste.s.fc1.y && crste.h.i)
+ return 0;
+ if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
+ break;
- if (!crste.h.fc)
- return 0;
- if (!crste.s.fc1.y && crste.h.i)
- return 0;
- if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
new = crste;
new.h.i = 1;
new.s.fc1.y = 0;
@@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
new.s.fc1.d = 0;
new.h.p = 1;
- dat_crstep_xchg(crstep, new, gfn, walk->asce);
- }
+ } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
+
priv->young = 1;
return 0;
}
@@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
{
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
+ union crste old = *crstep;
- if (crstep->h.fc) {
- if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
- folio = phys_to_folio(crste_origin_large(*crstep));
- gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
- if (folio)
- uv_convert_from_secure_folio(folio);
- }
+ if (!old.h.fc)
+ return 0;
+
+ if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
+ folio = phys_to_folio(crste_origin_large(old));
+ /* No races should happen because kvm->mmu_lock is held in write mode */
+ KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
+ priv->gmap->kvm);
+ if (folio)
+ uv_convert_from_secure_folio(folio);
return 0;
}
@@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
if (fatal_signal_pending(current))
return 1;
- crste = READ_ONCE(*table);
- if (!crste.h.fc)
- return 0;
- if (crste.h.p && !crste.s.fc1.sd)
- return 0;
+ do {
+ crste = READ_ONCE(*table);
+ if (!crste.h.fc)
+ return 0;
+ if (crste.h.p && !crste.s.fc1.sd)
+ return 0;
- /*
- * If this large page contains one or more prefixes of vCPUs that are
- * currently running, do not reset the protection, leave it marked as
- * dirty.
- */
- if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
+ /*
+ * If this large page contains one or more prefixes of vCPUs that are
+ * currently running, do not reset the protection, leave it marked as
+ * dirty.
+ */
+ if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
+ break;
new = crste;
new.h.p = 1;
new.s.fc1.sd = 0;
- gmap_crstep_xchg(gmap, table, new, gfn);
- }
+ } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
for ( ; gfn < end; gfn++)
mark_page_dirty(gmap->kvm, gfn);
@@ -511,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end)
_dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap);
}
-static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
+static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f)
{
union crste newcrste, oldcrste = READ_ONCE(*f->crstep);
@@ -536,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
newcrste.s.fc1.d = 1;
newcrste.s.fc1.sd = 1;
}
- if (!oldcrste.s.fc1.d && newcrste.s.fc1.d)
- SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
/* In case of races, let the slow path deal with it. */
- return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce);
+ return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn);
}
/* Trying to write on a read-only page, let the slow path deal with it. */
return 1;
@@ -568,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste,
newpte.s.d = 1;
newpte.s.sd = 1;
}
- if (!oldpte.s.d && newpte.s.d)
- SetPageDirty(pfn_to_page(newpte.h.pfra));
*pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn);
return 0;
@@ -606,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault)
fault->callback(fault);
pgste_set_unlock(fault->ptep, pgste);
} else {
- rc = gmap_handle_minor_crste_fault(gmap->asce, fault);
+ rc = gmap_handle_minor_crste_fault(gmap, fault);
if (!rc && fault->callback)
fault->callback(fault);
}
@@ -623,10 +627,61 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
}
+static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
+ struct guest_fault *f)
+{
+ union crste oldval, newval;
+ union pte newpte, oldpte;
+ union pgste pgste;
+ int rc = 0;
+
+ rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
+ &f->crstep, &f->ptep);
+ if (rc == -ENOMEM)
+ return rc;
+ if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
+ return rc;
+ if (rc)
+ return -EAGAIN;
+ if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
+ return -EINVAL;
+
+ if (f->ptep) {
+ pgste = pgste_get_lock(f->ptep);
+ oldpte = *f->ptep;
+ newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
+ newpte.s.sd = oldpte.s.sd;
+ oldpte.s.sd = 0;
+ if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
+ pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
+ if (f->callback)
+ f->callback(f);
+ } else {
+ rc = -EAGAIN;
+ }
+ pgste_set_unlock(f->ptep, pgste);
+ } else {
+ do {
+ oldval = READ_ONCE(*f->crstep);
+ newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
+ f->write_attempt | oldval.s.fc1.d);
+ newval.s.fc1.s = !f->page;
+ newval.s.fc1.sd = oldval.s.fc1.sd;
+ if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
+ crste_origin_large(oldval) != crste_origin_large(newval))
+ return -EAGAIN;
+ } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
+ if (f->callback)
+ f->callback(f);
+ }
+
+ return rc;
+}
+
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
{
unsigned int order;
- int rc, level;
+ int level;
lockdep_assert_held(&gmap->kvm->mmu_lock);
@@ -638,16 +693,14 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
level = TABLE_TYPE_SEGMENT;
}
- rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
- KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
- return rc;
+ return _gmap_link(mc, gmap, level, f);
}
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
{
+ union crste newcrste, oldcrste;
struct page_table *pt;
- union crste newcrste;
union crste *crstep;
union pte *ptep;
int rc;
@@ -673,7 +726,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
&crstep, &ptep);
if (rc)
return rc;
- dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (oldcrste.val == newcrste.val)
+ break;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
return 0;
}
@@ -777,8 +834,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
int rc;
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
- if (!rc)
- dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
+ if (rc)
+ return;
+ while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
+ ;
}
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
@@ -1017,8 +1076,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
return;
}
- crste = READ_ONCE(*crstep);
- dat_crstep_clear(crstep, r_gfn, sg->asce);
+
+ crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
if (crste_leaf(crste) || crste.h.i)
return;
if (is_pmd(crste))
@@ -1101,6 +1160,7 @@ struct gmap_protect_asce_top_level {
static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
struct gmap_protect_asce_top_level *context)
{
+ struct gmap *parent;
int rc, i;
guard(write_lock)(&sg->kvm->mmu_lock);
@@ -1108,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s
if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f))
return -EAGAIN;
- scoped_guard(spinlock, &sg->parent->children_lock) {
+ parent = READ_ONCE(sg->parent);
+ if (!parent)
+ return -EAGAIN;
+ scoped_guard(spinlock, &parent->children_lock) {
+ if (READ_ONCE(sg->parent) != parent)
+ return -EAGAIN;
for (i = 0; i < CRST_TABLE_PAGES; i++) {
if (!context->f[i].valid)
continue;
@@ -1191,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare
struct gmap *sg, *new;
int rc;
+ if (WARN_ON(!parent))
+ return ERR_PTR(-EINVAL);
+
scoped_guard(spinlock, &parent->children_lock) {
sg = gmap_find_shadow(parent, asce, edat_level);
if (sg) {