summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-03-29 09:34:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-03-29 09:34:50 -0700
commita3d97d1d3fa68253927a6e6c2fdfe7c039073af7 (patch)
tree442a09adfce72a98f13f9ac26157617d9d877b44
parent241d4ca15de9bf2cc04bdec466a6a2b0bd5dbc19 (diff)
parent1f6ee9be92f8df85a8c9a5a78c20fd39c0c21a95 (diff)
Merge tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs
Pull overlayfs fixes from Amir Goldstein: - Fix regression in 'xino' feature detection I clumsily introduced this regression myself when working on another subsystem (fsnotify). Both the regression and the fix have almost no visible impact on users except for some kmsg prints. - Fix to performance regression in v6.12. This regression was reported by Google COS developers. It is not uncommon these days for the year-old mature LTS to get adopted by distros and get exposed to many new workloads. We made a sub-smart move of making a behavior change in v6.12 which could impact performance, without making it opt-in. Fixing this mistake retroactively, to be picked by LTS. * tag 'ovl-fixes-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs: ovl: make fsync after metadata copy-up opt-in mount option ovl: fix wrong detection of 32bit inode numbers
-rw-r--r--Documentation/filesystems/overlayfs.rst50
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/overlayfs.h21
-rw-r--r--fs/overlayfs/ovl_entry.h7
-rw-r--r--fs/overlayfs/params.c33
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/overlayfs/util.c5
7 files changed, 108 insertions, 16 deletions
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index af5a69f87da4..eb846518e6ac 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values:
mounted with "uuid=on".
+Durability and copy up
+----------------------
+
+The fsync(2) system call ensures that the data and metadata of a file
+are safely written to the backing storage, which is expected to
+guarantee the existence of the information post system crash.
+
+Without an fsync(2) call, there is no guarantee that the observed
+data after a system crash will be either the old or the new data, but
+in practice, the observed data after crash is often the old or new data
+or a mix of both.
+
+When an overlayfs file is modified for the first time, copy up will
+create a copy of the lower file and its parent directories in the upper
+layer. Since the Linux filesystem API does not enforce any particular
+ordering on storing changes without explicit fsync(2) calls, in case
+of a system crash, the upper file could end up with no data at all
+(i.e. zeros), which would be an unusual outcome. To avoid this
+experience, overlayfs calls fsync(2) on the upper file before completing
+data copy up with rename(2) or link(2) to make the copy up "atomic".
+
+By default, overlayfs does not explicitly call fsync(2) on copied up
+directories or on metadata-only copy up, so it provides no guarantee to
+persist the user's modification unless the user calls fsync(2).
+The fsync during copy up only guarantees that if a copy up is observed
+after a crash, the observed data is not zeroes or intermediate values
+from the copy up staging area.
+
+On traditional local filesystems with a single journal (e.g. ext4, xfs),
+fsync on a file also persists the parent directory changes, because they
+are usually modified in the same transaction, so metadata durability during
+data copy up effectively comes for free. Overlayfs further limits risk by
+disallowing network filesystems as upper layer.
+
+Overlayfs can be tuned to prefer performance or durability when storing
+to the underlying upper layer. This is controlled by the "fsync" mount
+option, which supports these values:
+
+- "auto": (default)
+ Call fsync(2) on upper file before completion of data copy up.
+ No explicit fsync(2) on directory or metadata-only copy up.
+- "strict":
+ Call fsync(2) on upper file and directories before completion of any
+ copy up.
+- "volatile": [*]
+ Prefer performance over durability (see `Volatile mount`_)
+
+[*] The mount option "volatile" is an alias to "fsync=volatile".
+
+
Volatile mount
--------------
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 758611ee4475..13cb60b52bd6 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return -EOVERFLOW;
/*
- * With metacopy disabled, we fsync after final metadata copyup, for
+ * With "fsync=strict", we fsync after final metadata copyup, for
* both regular files and directories to get atomic copyup semantics
* on filesystems that do not use strict metadata ordering (e.g. ubifs).
*
- * With metacopy enabled we want to avoid fsync on all meta copyup
+ * By default, we want to avoid fsync on all meta copyup, because
* that will hurt performance of workloads such as chown -R, so we
* only fsync on data copyup as legacy behavior.
*/
- ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy &&
+ ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) &&
(S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode));
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index cad2055ebf18..63b299bf12f7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -99,6 +99,12 @@ enum {
OVL_VERITY_REQUIRE,
};
+enum {
+ OVL_FSYNC_VOLATILE,
+ OVL_FSYNC_AUTO,
+ OVL_FSYNC_STRICT,
+};
+
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs)
return ofs->config.xino == OVL_XINO_ON;
}
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE;
+}
+
+static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode == OVL_FSYNC_STRICT;
+}
+
+static inline bool ovl_is_volatile(struct ovl_config *config)
+{
+ return config->fsync_mode == OVL_FSYNC_VOLATILE;
+}
+
/*
* To avoid regressions in existing setups with overlay lower offline changes,
* we allow lower changes only if none of the new features are used.
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 1d4828dbcf7a..80cad4ea96a3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -18,7 +18,7 @@ struct ovl_config {
int xino;
bool metacopy;
bool userxattr;
- bool ovl_volatile;
+ int fsync_mode;
};
struct ovl_sb {
@@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
return (struct ovl_fs *)sb->s_fs_info;
}
-static inline bool ovl_should_sync(struct ovl_fs *ofs)
-{
- return !ofs->config.ovl_volatile;
-}
-
static inline unsigned int ovl_numlower(struct ovl_entry *oe)
{
return oe ? oe->__numlower : 0;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 8111b437ae5d..c93fcaa45d4a 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -58,6 +58,7 @@ enum ovl_opt {
Opt_xino,
Opt_metacopy,
Opt_verity,
+ Opt_fsync,
Opt_volatile,
Opt_override_creds,
};
@@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void)
return OVL_VERITY_OFF;
}
+static const struct constant_table ovl_parameter_fsync[] = {
+ { "volatile", OVL_FSYNC_VOLATILE },
+ { "auto", OVL_FSYNC_AUTO },
+ { "strict", OVL_FSYNC_STRICT },
+ {}
+};
+
+static const char *ovl_fsync_mode(struct ovl_config *config)
+{
+ return ovl_parameter_fsync[config->fsync_mode].name;
+}
+
+static int ovl_fsync_mode_def(void)
+{
+ return OVL_FSYNC_AUTO;
+}
+
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
@@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
fsparam_enum("verity", Opt_verity, ovl_parameter_verity),
+ fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync),
fsparam_flag("volatile", Opt_volatile),
fsparam_flag_no("override_creds", Opt_override_creds),
{}
@@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_verity:
config->verity_mode = result.uint_32;
break;
+ case Opt_fsync:
+ config->fsync_mode = result.uint_32;
+ break;
case Opt_volatile:
- config->ovl_volatile = true;
+ config->fsync_mode = OVL_FSYNC_VOLATILE;
break;
case Opt_userxattr:
config->userxattr = true;
@@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc)
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
+ ofs->config.fsync_mode = ovl_fsync_mode_def();
fc->s_fs_info = ofs;
fc->fs_private = ctx;
@@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
config->index = false;
}
- if (!config->upperdir && config->ovl_volatile) {
+ if (!config->upperdir && ovl_is_volatile(config)) {
pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
- config->ovl_volatile = false;
+ config->fsync_mode = ovl_fsync_mode_def();
}
if (!config->upperdir && config->uuid == OVL_UUID_ON) {
@@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
- if (ofs->config.ovl_volatile)
- seq_puts(m, ",volatile");
+ if (ofs->config.fsync_mode != ovl_fsync_mode_def())
+ seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config));
if (ofs->config.userxattr)
seq_puts(m, ",userxattr");
if (ofs->config.verity_mode != ovl_verity_mode_def())
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d4c12feec039..0822987cfb51 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
* For volatile mount, create a incompat/volatile/dirty file to keep
* track of it.
*/
- if (ofs->config.ovl_volatile) {
+ if (ovl_is_volatile(&ofs->config)) {
err = ovl_create_volatile_dirty(ofs);
if (err < 0) {
pr_err("Failed to create volatile/dirty file.\n");
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 3f1b763a8bb4..2ea769f311c3 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb)
if (!exportfs_can_decode_fh(sb->s_export_op))
return 0;
- return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
+ if (sb->s_export_op->encode_fh == generic_encode_ino32_fh)
+ return FILEID_INO32_GEN;
+
+ return -1;
}
struct dentry *ovl_indexdir(struct super_block *sb)