summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2026-02-11 10:42:30 +0100
committerVlastimil Babka (SUSE) <vbabka@kernel.org>2026-03-02 10:04:22 +0100
commit48647d3f9a644d1e81af6558102d43cdb260597b (patch)
tree17095d16ec6bc68943b3db17ee3aab8487085470
parente9217ca77dc35b4978db0fe901685ddb3f1e223a (diff)
slab: distinguish lock and trylock for sheaf_flush_main()
sheaf_flush_main() can be called from __pcs_replace_full_main() where it's fine if the trylock fails, and pcs_flush_all() where it's not expected to and for some flush callers (when destroying the cache or memory hotremove) it would be actually a problem if it failed and left the main sheaf not flushed. The flush callers can however safely use local_lock() instead of trylock. The trylock failure should not happen in practice on !PREEMPT_RT, but can happen on PREEMPT_RT. The impact is limited in practice because when a trylock fails in the kmem_cache_destroy() path, it means someone is using the cache while destroying it, which is a bug on its own. The memory hotremove path is unlikely to be employed in a production RT config, but it's possible. To fix this, split the function into sheaf_flush_main() (using local_lock()) and sheaf_try_flush_main() (using local_trylock()) where both call __sheaf_flush_main_batch() to flush a single batch of objects. This will also allow lockdep to verify our context assumptions. The problem was raised in an off-list question by Marcelo. Fixes: 2d517aa09bbc ("slab: add opt-in caching layer of percpu sheaves") Cc: stable@vger.kernel.org Reported-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Reviewed-by: Hao Li <hao.li@linux.dev> Link: https://patch.msgid.link/20260211-b4-sheaf-flush-v1-1-4e7f492f0055@suse.cz Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
-rw-r--r--mm/slub.c47
1 files changed, 37 insertions, 10 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 0c906fefc31b..b1e9f16ba435 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2858,19 +2858,19 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
* object pointers are moved to a on-stack array under the lock. To bound the
* stack usage, limit each batch to PCS_BATCH_MAX.
*
- * returns true if at least partially flushed
+ * Must be called with s->cpu_sheaves->lock locked, returns with the lock
+ * unlocked.
+ *
+ * Returns how many objects are remaining to be flushed
*/
-static bool sheaf_flush_main(struct kmem_cache *s)
+static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s)
{
struct slub_percpu_sheaves *pcs;
unsigned int batch, remaining;
void *objects[PCS_BATCH_MAX];
struct slab_sheaf *sheaf;
- bool ret = false;
-next_batch:
- if (!local_trylock(&s->cpu_sheaves->lock))
- return ret;
+ lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
pcs = this_cpu_ptr(s->cpu_sheaves);
sheaf = pcs->main;
@@ -2888,10 +2888,37 @@ next_batch:
stat_add(s, SHEAF_FLUSH, batch);
- ret = true;
+ return remaining;
+}
- if (remaining)
- goto next_batch;
+static void sheaf_flush_main(struct kmem_cache *s)
+{
+ unsigned int remaining;
+
+ do {
+ local_lock(&s->cpu_sheaves->lock);
+
+ remaining = __sheaf_flush_main_batch(s);
+
+ } while (remaining);
+}
+
+/*
+ * Returns true if the main sheaf was at least partially flushed.
+ */
+static bool sheaf_try_flush_main(struct kmem_cache *s)
+{
+ unsigned int remaining;
+ bool ret = false;
+
+ do {
+ if (!local_trylock(&s->cpu_sheaves->lock))
+ return ret;
+
+ ret = true;
+ remaining = __sheaf_flush_main_batch(s);
+
+ } while (remaining);
return ret;
}
@@ -5704,7 +5731,7 @@ alloc_empty:
if (put_fail)
stat(s, BARN_PUT_FAIL);
- if (!sheaf_flush_main(s))
+ if (!sheaf_try_flush_main(s))
return NULL;
if (!local_trylock(&s->cpu_sheaves->lock))