diff options
| -rw-r--r-- | Documentation/filesystems/overlayfs.rst | 50 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 6 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 21 | ||||
| -rw-r--r-- | fs/overlayfs/ovl_entry.h | 7 | ||||
| -rw-r--r-- | fs/overlayfs/params.c | 33 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 2 | ||||
| -rw-r--r-- | fs/overlayfs/util.c | 5 |
7 files changed, 108 insertions, 16 deletions
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index af5a69f87da4..eb846518e6ac 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -783,6 +783,56 @@ controlled by the "uuid" mount option, which supports these values: mounted with "uuid=on". +Durability and copy up +---------------------- + +The fsync(2) system call ensures that the data and metadata of a file +are safely written to the backing storage, which is expected to +guarantee the existence of the information post system crash. + +Without an fsync(2) call, there is no guarantee that the observed +data after a system crash will be either the old or the new data, but +in practice, the observed data after crash is often the old or new data +or a mix of both. + +When an overlayfs file is modified for the first time, copy up will +create a copy of the lower file and its parent directories in the upper +layer. Since the Linux filesystem API does not enforce any particular +ordering on storing changes without explicit fsync(2) calls, in case +of a system crash, the upper file could end up with no data at all +(i.e. zeros), which would be an unusual outcome. To avoid this +experience, overlayfs calls fsync(2) on the upper file before completing +data copy up with rename(2) or link(2) to make the copy up "atomic". + +By default, overlayfs does not explicitly call fsync(2) on copied up +directories or on metadata-only copy up, so it provides no guarantee to +persist the user's modification unless the user calls fsync(2). +The fsync during copy up only guarantees that if a copy up is observed +after a crash, the observed data is not zeroes or intermediate values +from the copy up staging area. + +On traditional local filesystems with a single journal (e.g. ext4, xfs), +fsync on a file also persists the parent directory changes, because they +are usually modified in the same transaction, so metadata durability during +data copy up effectively comes for free. Overlayfs further limits risk by +disallowing network filesystems as upper layer. + +Overlayfs can be tuned to prefer performance or durability when storing +to the underlying upper layer. This is controlled by the "fsync" mount +option, which supports these values: + +- "auto": (default) + Call fsync(2) on upper file before completion of data copy up. + No explicit fsync(2) on directory or metadata-only copy up. +- "strict": + Call fsync(2) on upper file and directories before completion of any + copy up. +- "volatile": [*] + Prefer performance over durability (see `Volatile mount`_) + +[*] The mount option "volatile" is an alias to "fsync=volatile". + + Volatile mount -------------- diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 758611ee4475..13cb60b52bd6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, return -EOVERFLOW; /* - * With metacopy disabled, we fsync after final metadata copyup, for + * With "fsync=strict", we fsync after final metadata copyup, for * both regular files and directories to get atomic copyup semantics * on filesystems that do not use strict metadata ordering (e.g. ubifs). * - * With metacopy enabled we want to avoid fsync on all meta copyup + * By default, we want to avoid fsync on all meta copyup, because * that will hurt performance of workloads such as chown -R, so we * only fsync on data copyup as legacy behavior. */ - ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy && + ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) && (S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode)); ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cad2055ebf18..63b299bf12f7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -99,6 +99,12 @@ enum { OVL_VERITY_REQUIRE, }; +enum { + OVL_FSYNC_VOLATILE, + OVL_FSYNC_AUTO, + OVL_FSYNC_STRICT, +}; + /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, * where: @@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs) return ofs->config.xino == OVL_XINO_ON; } +static inline bool ovl_should_sync(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE; +} + +static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs) +{ + return ofs->config.fsync_mode == OVL_FSYNC_STRICT; +} + +static inline bool ovl_is_volatile(struct ovl_config *config) +{ + return config->fsync_mode == OVL_FSYNC_VOLATILE; +} + /* * To avoid regressions in existing setups with overlay lower offline changes, * we allow lower changes only if none of the new features are used. diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 1d4828dbcf7a..80cad4ea96a3 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -18,7 +18,7 @@ struct ovl_config { int xino; bool metacopy; bool userxattr; - bool ovl_volatile; + int fsync_mode; }; struct ovl_sb { @@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb) return (struct ovl_fs *)sb->s_fs_info; } -static inline bool ovl_should_sync(struct ovl_fs *ofs) -{ - return !ofs->config.ovl_volatile; -} - static inline unsigned int ovl_numlower(struct ovl_entry *oe) { return oe ? oe->__numlower : 0; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 8111b437ae5d..c93fcaa45d4a 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -58,6 +58,7 @@ enum ovl_opt { Opt_xino, Opt_metacopy, Opt_verity, + Opt_fsync, Opt_volatile, Opt_override_creds, }; @@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } +static const struct constant_table ovl_parameter_fsync[] = { + { "volatile", OVL_FSYNC_VOLATILE }, + { "auto", OVL_FSYNC_AUTO }, + { "strict", OVL_FSYNC_STRICT }, + {} +}; + +static const char *ovl_fsync_mode(struct ovl_config *config) +{ + return ovl_parameter_fsync[config->fsync_mode].name; +} + +static int ovl_fsync_mode_def(void) +{ + return OVL_FSYNC_AUTO; +} + const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_file_or_string("lowerdir+", Opt_lowerdir_add), @@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_enum("xino", Opt_xino, ovl_parameter_xino), fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), + fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync), fsparam_flag("volatile", Opt_volatile), fsparam_flag_no("override_creds", Opt_override_creds), {} @@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_verity: config->verity_mode = result.uint_32; break; + case Opt_fsync: + config->fsync_mode = result.uint_32; + break; case Opt_volatile: - config->ovl_volatile = true; + config->fsync_mode = OVL_FSYNC_VOLATILE; break; case Opt_userxattr: config->userxattr = true; @@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc) ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; + ofs->config.fsync_mode = ovl_fsync_mode_def(); fc->s_fs_info = ofs; fc->fs_private = ctx; @@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx, config->index = false; } - if (!config->upperdir && config->ovl_volatile) { + if (!config->upperdir && ovl_is_volatile(config)) { pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); - config->ovl_volatile = false; + config->fsync_mode = ovl_fsync_mode_def(); } if (!config->upperdir && config->uuid == OVL_UUID_ON) { @@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); - if (ofs->config.ovl_volatile) - seq_puts(m, ",volatile"); + if (ofs->config.fsync_mode != ovl_fsync_mode_def()) + seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config)); if (ofs->config.userxattr) seq_puts(m, ",userxattr"); if (ofs->config.verity_mode != ovl_verity_mode_def()) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d4c12feec039..0822987cfb51 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, * For volatile mount, create a incompat/volatile/dirty file to keep * track of it. */ - if (ofs->config.ovl_volatile) { + if (ovl_is_volatile(&ofs->config)) { err = ovl_create_volatile_dirty(ofs); if (err < 0) { pr_err("Failed to create volatile/dirty file.\n"); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 3f1b763a8bb4..2ea769f311c3 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb) if (!exportfs_can_decode_fh(sb->s_export_op)) return 0; - return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN; + if (sb->s_export_op->encode_fh == generic_encode_ino32_fh) + return FILEID_INO32_GEN; + + return -1; } struct dentry *ovl_indexdir(struct super_block *sb) |
