summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/userspace-api/landlock.rst23
-rw-r--r--samples/landlock/sandboxer.c5
-rw-r--r--security/landlock/domain.c3
-rw-r--r--security/landlock/ruleset.c9
-rw-r--r--security/landlock/tsync.c92
-rw-r--r--tools/testing/selftests/landlock/tsync_test.c91
6 files changed, 190 insertions, 33 deletions
diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index 13134bccdd39..7f86d7a37dc2 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -8,7 +8,7 @@ Landlock: unprivileged access control
=====================================
:Author: Mickaël Salaün
-:Date: January 2026
+:Date: March 2026
The goal of Landlock is to enable restriction of ambient rights (e.g. global
filesystem or network access) for a set of processes. Because Landlock
@@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
.. code-block:: c
- __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
- if (abi < 7) {
- /* Clear logging flags unsupported before ABI 7. */
+ __u32 restrict_flags =
+ LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
+ LANDLOCK_RESTRICT_SELF_TSYNC;
+ switch (abi) {
+ case 1 ... 6:
+ /* Removes logging flags for ABI < 7 */
restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
+ __attribute__((fallthrough));
+ case 7:
+ /*
+ * Removes multithreaded enforcement flag for ABI < 8
+ *
+ * WARNING: Without this flag, calling landlock_restrict_self(2) is
+ * only equivalent if the calling process is single-threaded. Below
+ * ABI v8 (and as of ABI v8, when not using this flag), a Landlock
+ * policy would only be enforced for the calling thread and its
+ * children (and not for all threads, including parents and siblings).
+ */
+ restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
}
The next step is to restrict the current thread from gaining more privileges
diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c
index e7af02f98208..9f21088c0855 100644
--- a/samples/landlock/sandboxer.c
+++ b/samples/landlock/sandboxer.c
@@ -299,7 +299,7 @@ out_unset:
/* clang-format on */
-#define LANDLOCK_ABI_LAST 7
+#define LANDLOCK_ABI_LAST 8
#define XSTR(s) #s
#define STR(s) XSTR(s)
@@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp)
/* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
supported_restrict_flags &=
~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
-
+ __attribute__((fallthrough));
+ case 7:
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
fprintf(stderr,
"Hint: You should update the running kernel "
diff --git a/security/landlock/domain.c b/security/landlock/domain.c
index f5b78d4766cd..f0d83f43afa1 100644
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void)
* allocate with GFP_KERNEL_ACCOUNT because it is independent from the
* caller.
*/
- details =
- kzalloc_flex(*details, exe_path, path_size);
+ details = kzalloc_flex(*details, exe_path, path_size);
if (!details)
return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index 319873586385..73018dc8d6c7 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
{
struct landlock_ruleset *new_ruleset;
- new_ruleset =
- kzalloc_flex(*new_ruleset, access_masks, num_layers,
- GFP_KERNEL_ACCOUNT);
+ new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
+ GFP_KERNEL_ACCOUNT);
if (!new_ruleset)
return ERR_PTR(-ENOMEM);
refcount_set(&new_ruleset->usage, 1);
@@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
if (IS_ERR(new_dom))
return new_dom;
- new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy,
- GFP_KERNEL_ACCOUNT);
+ new_dom->hierarchy =
+ kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
if (!new_dom->hierarchy)
return ERR_PTR(-ENOMEM);
diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c
index de01aa899751..4d4427ba8d93 100644
--- a/security/landlock/tsync.c
+++ b/security/landlock/tsync.c
@@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
return ctx;
}
+/**
+ * tsync_works_trim - Put the last tsync_work element
+ *
+ * @s: TSYNC works to trim.
+ *
+ * Put the last task and decrement the size of @s.
+ *
+ * This helper does not cancel a running task, but just reset the last element
+ * to zero.
+ */
+static void tsync_works_trim(struct tsync_works *s)
+{
+ struct tsync_work *ctx;
+
+ if (WARN_ON_ONCE(s->size <= 0))
+ return;
+
+ ctx = s->works[s->size - 1];
+
+ /*
+ * For consistency, remove the task from ctx so that it does not look like
+ * we handed it a task_work.
+ */
+ put_task_struct(ctx->task);
+ *ctx = (typeof(*ctx)){};
+
+ /*
+ * Cancel the tsync_works_provide() change to recycle the reserved memory
+ * for the next thread, if any. This also ensures that cancel_tsync_works()
+ * and tsync_works_release() do not see any NULL task pointers.
+ */
+ s->size--;
+}
+
/*
* tsync_works_grow_by - preallocates space for n more contexts in s
*
@@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
* tsync_works_contains - checks for presence of task in s
*/
static bool tsync_works_contains_task(const struct tsync_works *s,
- struct task_struct *task)
+ const struct task_struct *task)
{
size_t i;
for (i = 0; i < s->size; i++)
if (s->works[i]->task == task)
return true;
+
return false;
}
@@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s)
size_t i;
for (i = 0; i < s->size; i++) {
- if (!s->works[i]->task)
+ if (WARN_ON_ONCE(!s->works[i]->task))
continue;
put_task_struct(s->works[i]->task);
@@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s)
for (i = 0; i < s->capacity; i++)
kfree(s->works[i]);
+
kfree(s->works);
s->works = NULL;
s->size = 0;
@@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s)
*/
static size_t count_additional_threads(const struct tsync_works *works)
{
- struct task_struct *thread, *caller;
+ const struct task_struct *caller, *thread;
size_t n = 0;
caller = current;
@@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
int err;
- struct task_struct *thread, *caller;
+ const struct task_struct *caller;
+ struct task_struct *thread;
struct tsync_work *ctx;
bool found_more_threads = false;
@@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works,
init_task_work(&ctx->work, restrict_one_thread_callback);
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
- if (err) {
+ if (unlikely(err)) {
/*
* task_work_add() only fails if the task is about to exit. We
* checked that earlier, but it can happen as a race. Resume
* without setting an error, as the task is probably gone in the
- * next loop iteration. For consistency, remove the task from ctx
- * so that it does not look like we handed it a task_work.
+ * next loop iteration.
*/
- put_task_struct(ctx->task);
- ctx->task = NULL;
+ tsync_works_trim(works);
atomic_dec(&shared_ctx->num_preparing);
atomic_dec(&shared_ctx->num_unfinished);
@@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works,
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
* completions if needed, as if the task was never scheduled.
*/
-static void cancel_tsync_works(struct tsync_works *works,
+static void cancel_tsync_works(const struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
- int i;
+ size_t i;
for (i = 0; i < works->size; i++) {
+ if (WARN_ON_ONCE(!works->works[i]->task))
+ continue;
+
if (!task_work_cancel(works->works[i]->task,
&works->works[i]->work))
continue;
@@ -448,6 +486,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
shared_ctx.set_no_new_privs = task_no_new_privs(current);
/*
+ * Serialize concurrent TSYNC operations to prevent deadlocks when
+ * multiple threads call landlock_restrict_self() simultaneously.
+ * If the lock is already held, we gracefully yield by restarting the
+ * syscall. This allows the current thread to process pending
+ * task_works before retrying.
+ */
+ if (!down_write_trylock(&current->signal->exec_update_lock))
+ return restart_syscall();
+
+ /*
* We schedule a pseudo-signal task_work for each of the calling task's
* sibling threads. In the task work, each thread:
*
@@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
-ERESTARTNOINTR);
/*
- * Cancel task works for tasks that did not start running yet,
- * and decrement all_prepared and num_unfinished accordingly.
+ * Opportunistic improvement: try to cancel task
+ * works for tasks that did not start running
+ * yet. We do not have a guarantee that it
+ * cancels any of the enqueued task works
+ * because task_work_run() might already have
+ * dequeued them.
*/
cancel_tsync_works(&works, &shared_ctx);
/*
- * The remaining task works have started running, so waiting for
- * their completion will finish.
+ * Break the loop with error. The cleanup code
+ * after the loop unblocks the remaining
+ * task_works.
*/
- wait_for_completion(&shared_ctx.all_prepared);
+ break;
}
}
} while (found_more_threads &&
!atomic_read(&shared_ctx.preparation_error));
/*
- * We now have all sibling threads blocking and in "prepared" state in the
- * task work. Ask all threads to commit.
+ * We now have either (a) all sibling threads blocking and in "prepared"
+ * state in the task work, or (b) the preparation error is set. Ask all
+ * threads to commit (or abort).
*/
complete_all(&shared_ctx.ready_to_commit);
@@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
wait_for_completion(&shared_ctx.all_finished);
tsync_works_release(&works);
-
+ up_write(&current->signal->exec_update_lock);
return atomic_read(&shared_ctx.preparation_error);
}
diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c
index 37ef0d2270db..2b9ad4f154f4 100644
--- a/tools/testing/selftests/landlock/tsync_test.c
+++ b/tools/testing/selftests/landlock/tsync_test.c
@@ -6,9 +6,10 @@
*/
#define _GNU_SOURCE
+#include <linux/landlock.h>
#include <pthread.h>
+#include <signal.h>
#include <sys/prctl.h>
-#include <linux/landlock.h>
#include "common.h"
@@ -158,4 +159,92 @@ TEST(competing_enablement)
EXPECT_EQ(0, close(ruleset_fd));
}
+static void signal_nop_handler(int sig)
+{
+}
+
+struct signaler_data {
+ pthread_t target;
+ volatile bool stop;
+};
+
+static void *signaler_thread(void *data)
+{
+ struct signaler_data *sd = data;
+
+ while (!sd->stop)
+ pthread_kill(sd->target, SIGUSR1);
+
+ return NULL;
+}
+
+/*
+ * Number of idle sibling threads. This must be large enough that even on
+ * machines with many cores, the sibling threads cannot all complete their
+ * credential preparation in a single parallel wave, otherwise the signaler
+ * thread has no window to interrupt wait_for_completion_interruptible().
+ * 200 threads on a 64-core machine yields ~3 serialized waves, giving the
+ * tight signal loop enough time to land an interruption.
+ */
+#define NUM_IDLE_THREADS 200
+
+/*
+ * Exercises the tsync interruption and cancellation paths in tsync.c.
+ *
+ * When a signal interrupts the calling thread while it waits for sibling
+ * threads to finish their credential preparation
+ * (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
+ * the kernel sets ERESTARTNOINTR, cancels queued task works that have not
+ * started yet (cancel_tsync_works), then waits for the remaining works to
+ * finish. On the error return, syscalls.c aborts the prepared credentials.
+ * The kernel automatically restarts the syscall, so userspace sees success.
+ */
+TEST(tsync_interrupt)
+{
+ size_t i;
+ pthread_t threads[NUM_IDLE_THREADS];
+ pthread_t signaler;
+ struct signaler_data sd;
+ struct sigaction sa = {};
+ const int ruleset_fd = create_ruleset(_metadata);
+
+ disable_caps(_metadata);
+
+ /* Install a no-op SIGUSR1 handler so the signal does not kill us. */
+ sa.sa_handler = signal_nop_handler;
+ sigemptyset(&sa.sa_mask);
+ ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ for (i = 0; i < NUM_IDLE_THREADS; i++)
+ ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
+
+ /*
+ * Start a signaler thread that continuously sends SIGUSR1 to the
+ * calling thread. This maximizes the chance of interrupting
+ * wait_for_completion_interruptible() in the kernel's tsync path.
+ */
+ sd.target = pthread_self();
+ sd.stop = false;
+ ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
+
+ /*
+ * The syscall may be interrupted and transparently restarted by the
+ * kernel (ERESTARTNOINTR). From userspace, it should always succeed.
+ */
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
+ LANDLOCK_RESTRICT_SELF_TSYNC));
+
+ sd.stop = true;
+ ASSERT_EQ(0, pthread_join(signaler, NULL));
+
+ for (i = 0; i < NUM_IDLE_THREADS; i++) {
+ ASSERT_EQ(0, pthread_cancel(threads[i]));
+ ASSERT_EQ(0, pthread_join(threads[i], NULL));
+ }
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
TEST_HARNESS_MAIN