提交 6b1c776d 编写于 作者: L Linus Torvalds

Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs updates from Miklos Szeredi:
 "This work from Amir introduces the inodes index feature, which
  provides:

   - hardlinks are not broken on copy up

   - infrastructure for overlayfs NFS export

  This also fixes constant st_ino for samefs case for lower hardlinks"

* 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (33 commits)
  ovl: mark parent impure and restore timestamp on ovl_link_up()
  ovl: document copying layers restrictions with inodes index
  ovl: cleanup orphan index entries
  ovl: persistent overlay inode nlink for indexed inodes
  ovl: implement index dir copy up
  ovl: move copy up lock out
  ovl: rearrange copy up
  ovl: add flag for upper in ovl_entry
  ovl: use struct copy_up_ctx as function argument
  ovl: base tmpfile in workdir too
  ovl: factor out ovl_copy_up_inode() helper
  ovl: extract helper to get temp file in copy up
  ovl: defer upper dir lock to tempfile link
  ovl: hash overlay non-dir inodes by copy up origin
  ovl: cleanup bad and stale index entries on mount
  ovl: lookup index entry for copy up origin
  ovl: verify index dir matches upper dir
  ovl: verify upper root dir matches lower root dir
  ovl: introduce the inodes index dir feature
  ovl: generalize ovl_create_workdir()
  ...
......@@ -201,6 +201,40 @@ rightmost one and going left. In the above example lower1 will be the
top, lower2 the middle and lower3 the bottom layer.
Sharing and copying layers
--------------------------
Lower layers may be shared among several overlay mounts and that is indeed
a very common practice. An overlay mount may use the same lower layer
path as another overlay mount and it may use a lower layer path that is
beneath or above the path of another overlay lower layer path.
Using an upper layer path and/or a workdir path that are already used by
another overlay mount is not allowed and will fail with EBUSY. Using
partially overlapping paths is not allowed but will not fail with EBUSY.
Mounting an overlay using an upper layer path, where the upper layer path
was previously used by another mounted overlay in combination with a
different lower layer path, is allowed, unless the "inodes index" feature
is enabled.
With the "inodes index" feature, on the first time mount, an NFS file
handle of the lower layer root directory, along with the UUID of the lower
filesystem, are encoded and stored in the "trusted.overlay.origin" extended
attribute on the upper layer root directory. On subsequent mount attempts,
the lower root directory file handle and lower filesystem UUID are compared
to the stored origin in upper root directory. On failure to verify the
lower root origin, mount will fail with ESTALE. An overlayfs mount with
"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
does not support NFS export, lower filesystem does not have a valid UUID or
if the upper filesystem does not support extended attributes.
It is quite a common practice to copy overlay layers to a different
directory tree on the same or different underlying filesystem, and even
to a different machine. With the "inodes index" feature, trying to mount
the copied layers will fail the verification of the lower root file handle.
Non-standard behavior
---------------------
......
......@@ -23,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR
Note, that redirects are not backward compatible. That is, mounting
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
config OVERLAY_FS_INDEX
bool "Overlayfs: turn on inodes index feature by default"
depends on OVERLAY_FS
help
If this config option is enabled then overlay filesystems will use
the inodes index dir to map lower inodes to upper inodes by default.
In this case it is still possible to turn off index globally with the
"index=off" module option or on a filesystem instance basis with the
"index=off" mount option.
The inodes index feature prevents breaking of lower hardlinks on copy
up.
Note, that the inodes index feature is read-only backward compatible.
That is, mounting an overlay which has an index dir on a kernel that
doesn't support this feature read-only, will not have any negative
outcomes. However, mounting the same overlay with an old kernel
read-write and then mounting it again with a new kernel, will have
unexpected results.
......@@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
void *buf;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &lower->d_sb->s_uuid;
buf = kmalloc(buflen, GFP_TEMPORARY);
if (!buf)
......@@ -271,6 +272,14 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
fh->magic = OVL_FH_MAGIC;
fh->type = fh_type;
fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
* dentry when we decode fid, the only way to tell if we should assign
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
fh->flags |= OVL_FH_FLAG_PATH_UPPER;
fh->len = fh_len;
fh->uuid = *uuid;
memcpy(fh->fid, buf, buflen);
......@@ -283,7 +292,6 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
struct super_block *sb = lower->d_sb;
const struct ovl_fh *fh = NULL;
int err;
......@@ -292,9 +300,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
!uuid_is_null(&sb->s_uuid)) {
fh = ovl_encode_fh(lower, &sb->s_uuid);
if (ovl_can_decode_fh(lower->d_sb)) {
fh = ovl_encode_fh(lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
......@@ -309,84 +316,156 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
return err;
}
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
struct kstat *stat, const char *link,
struct kstat *pstat, bool tmpfile)
struct ovl_copy_up_ctx {
struct dentry *parent;
struct dentry *dentry;
struct path lowerpath;
struct kstat stat;
struct kstat pstat;
const char *link;
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
bool tmpfile;
bool origin;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
{
int err;
struct dentry *upper;
struct dentry *upperdir = ovl_dentry_upper(c->parent);
struct inode *udir = d_inode(upperdir);
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
return err;
err = ovl_set_nlink_lower(c->dentry);
if (err)
return err;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = lookup_one_len(c->dentry->d_name.name, upperdir,
c->dentry->d_name.len);
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
true);
dput(upper);
if (!err) {
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, &c->pstat);
ovl_dentry_set_upper_alias(c->dentry);
}
}
inode_unlock(udir);
ovl_set_nlink_upper(c->dentry);
return err;
}
static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
struct dentry **newdentry)
{
struct inode *wdir = workdir->d_inode;
struct inode *udir = upperdir->d_inode;
struct dentry *newdentry = NULL;
struct dentry *upper = NULL;
struct dentry *temp = NULL;
int err;
struct dentry *upper;
struct inode *udir = d_inode(c->destdir);
upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
if (IS_ERR(upper))
return PTR_ERR(upper);
if (c->tmpfile)
err = ovl_do_link(temp, udir, upper, true);
else
err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
if (!err)
*newdentry = dget(c->tmpfile ? upper : temp);
dput(upper);
return err;
}
static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
{
int err;
struct dentry *temp;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
struct cattr cattr = {
/* Can't properly set mode on creation because of the umask */
.mode = stat->mode & S_IFMT,
.rdev = stat->rdev,
.link = link
.mode = c->stat.mode & S_IFMT,
.rdev = c->stat.rdev,
.link = c->link
};
err = security_inode_copy_up(dentry, &new_creds);
err = security_inode_copy_up(c->dentry, &new_creds);
if (err < 0)
goto out;
if (new_creds)
old_creds = override_creds(new_creds);
if (tmpfile)
temp = ovl_do_tmpfile(upperdir, stat->mode);
else
temp = ovl_lookup_temp(workdir);
err = 0;
if (IS_ERR(temp)) {
err = PTR_ERR(temp);
temp = NULL;
}
if (!err && !tmpfile)
err = ovl_create_real(wdir, temp, &cattr, NULL, true);
if (c->tmpfile) {
temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
if (IS_ERR(temp))
goto temp_err;
} else {
temp = ovl_lookup_temp(c->workdir);
if (IS_ERR(temp))
goto temp_err;
err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
NULL, true);
if (err) {
dput(temp);
goto out;
}
}
err = 0;
*tempp = temp;
out:
if (new_creds) {
revert_creds(old_creds);
put_cred(new_creds);
}
if (err)
return err;
temp_err:
err = PTR_ERR(temp);
goto out;
}
if (S_ISREG(stat->mode)) {
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
int err;
if (S_ISREG(c->stat.mode)) {
struct path upperpath;
ovl_path_upper(dentry, &upperpath);
ovl_path_upper(c->dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
upperpath.dentry = temp;
if (tmpfile) {
inode_unlock(udir);
err = ovl_copy_up_data(lowerpath, &upperpath,
stat->size);
inode_lock_nested(udir, I_MUTEX_PARENT);
} else {
err = ovl_copy_up_data(lowerpath, &upperpath,
stat->size);
}
err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
if (err)
goto out_cleanup;
return err;
}
err = ovl_copy_xattr(lowerpath->dentry, temp);
err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
goto out_cleanup;
return err;
inode_lock(temp->d_inode);
err = ovl_set_attr(temp, stat);
err = ovl_set_attr(temp, &c->stat);
inode_unlock(temp->d_inode);
if (err)
goto out_cleanup;
return err;
/*
* Store identifier of lower inode in upper inode xattr to
......@@ -395,41 +474,48 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
* Don't set origin when we are breaking the association with a lower
* hard link.
*/
if (S_ISDIR(stat->mode) || stat->nlink == 1) {
err = ovl_set_origin(dentry, lowerpath->dentry, temp);
if (c->origin) {
err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
if (err)
goto out_cleanup;
return err;
}
upper = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
if (IS_ERR(upper)) {
err = PTR_ERR(upper);
upper = NULL;
goto out_cleanup;
}
return 0;
}
if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
else
err = ovl_do_rename(wdir, temp, udir, upper, 0);
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
{
struct inode *udir = c->destdir->d_inode;
struct dentry *newdentry = NULL;
struct dentry *temp = NULL;
int err;
err = ovl_get_tmpfile(c, &temp);
if (err)
goto out;
err = ovl_copy_up_inode(c, temp);
if (err)
goto out_cleanup;
newdentry = dget(tmpfile ? upper : temp);
ovl_dentry_update(dentry, newdentry);
ovl_inode_update(d_inode(dentry), d_inode(newdentry));
if (c->tmpfile) {
inode_lock_nested(udir, I_MUTEX_PARENT);
err = ovl_install_temp(c, temp, &newdentry);
inode_unlock(udir);
} else {
err = ovl_install_temp(c, temp, &newdentry);
}
if (err)
goto out_cleanup;
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, pstat);
ovl_inode_update(d_inode(c->dentry), newdentry);
out:
dput(temp);
dput(upper);
return err;
out_cleanup:
if (!tmpfile)
ovl_cleanup(wdir, temp);
if (!c->tmpfile)
ovl_cleanup(d_inode(c->workdir), temp);
goto out;
}
......@@ -442,78 +528,119 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
bool indexed = false;
if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
c->stat.nlink > 1)
indexed = true;
if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
c->origin = true;
if (indexed) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
if (err)
return err;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
err = ovl_set_impure(c->parent, c->destdir);
if (err)
return err;
}
/* Should we copyup with O_TMPFILE or with workdir? */
if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
c->tmpfile = true;
err = ovl_copy_up_locked(c);
} else {
err = -EIO;
if (lock_rename(c->workdir, c->destdir) != NULL) {
pr_err("overlayfs: failed to lock workdir+upperdir\n");
} else {
err = ovl_copy_up_locked(c);
unlock_rename(c->workdir, c->destdir);
}
}
if (indexed) {
if (!err)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
kfree(c->destname.name);
} else if (!err) {
struct inode *udir = d_inode(c->destdir);
/* Restore timestamps on parent (best effort) */
inode_lock(udir);
ovl_set_timestamps(c->destdir, &c->pstat);
inode_unlock(udir);
ovl_dentry_set_upper_alias(c->dentry);
}
return err;
}
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
struct path *lowerpath, struct kstat *stat)
int flags)
{
DEFINE_DELAYED_CALL(done);
struct dentry *workdir = ovl_workdir(dentry);
int err;
struct kstat pstat;
DEFINE_DELAYED_CALL(done);
struct path parentpath;
struct dentry *lowerdentry = lowerpath->dentry;
struct dentry *upperdir;
const char *link = NULL;
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_copy_up_ctx ctx = {
.parent = parent,
.dentry = dentry,
.workdir = ovl_workdir(dentry),
};
if (WARN_ON(!workdir))
if (WARN_ON(!ctx.workdir))
return -EROFS;
ovl_do_check_copy_up(lowerdentry);
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(parent, upperdir);
ovl_path_lower(dentry, &ctx.lowerpath);
err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
err = vfs_getattr(&parentpath, &pstat,
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
ctx.destname = dentry->d_name;
err = vfs_getattr(&parentpath, &ctx.pstat,
STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
if (S_ISLNK(stat->mode)) {
link = vfs_get_link(lowerdentry, &done);
if (IS_ERR(link))
return PTR_ERR(link);
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
ctx.stat.size = 0;
if (S_ISLNK(ctx.stat.mode)) {
ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
if (IS_ERR(ctx.link))
return PTR_ERR(ctx.link);
}
ovl_do_check_copy_up(ctx.lowerpath.dentry);
/* Should we copyup with O_TMPFILE or with workdir? */
if (S_ISREG(stat->mode) && ofs->tmpfile) {
err = ovl_copy_up_start(dentry);
/* err < 0: interrupted, err > 0: raced with another copy-up */
if (unlikely(err)) {
pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
if (err > 0)
err = 0;
goto out_done;
}
inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
stat, link, &pstat, true);
inode_unlock(upperdir->d_inode);
} else {
if (!ovl_dentry_upper(dentry))
err = ovl_do_copy_up(&ctx);
if (!err && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
ovl_copy_up_end(dentry);
goto out_done;
}
err = -EIO;
if (lock_rename(workdir, upperdir) != NULL) {
pr_err("overlayfs: failed to lock workdir+upperdir\n");
goto out_unlock;
}
if (ovl_dentry_upper(dentry)) {
/* Raced with another copy-up? Nothing to do, then... */
err = 0;
goto out_unlock;
}
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
stat, link, &pstat, false);
out_unlock:
unlock_rename(workdir, upperdir);
out_done:
do_delayed_call(&done);
return err;
......@@ -527,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
while (!err) {
struct dentry *next;
struct dentry *parent;
struct path lowerpath;
struct kstat stat;
enum ovl_path_type type = ovl_path_type(dentry);
if (OVL_TYPE_UPPER(type))
/*
* Check if copy-up has happened as well as for upper alias (in
* case of hard links) is there.
*
* Both checks are lockless:
* - false negatives: will recheck under oi->lock
* - false positives:
* + ovl_dentry_upper() uses memory barriers to ensure the
* upper dentry is up-to-date
* + ovl_dentry_has_upper_alias() relies on locking of
* upper parent i_rwsem to prevent reordering copy-up
* with rename.
*/
if (ovl_dentry_upper(dentry) &&
ovl_dentry_has_upper_alias(dentry))
break;
next = dget(dentry);
......@@ -539,22 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
for (;;) {
parent = dget_parent(next);
type = ovl_path_type(parent);
if (OVL_TYPE_UPPER(type))
if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
ovl_path_lower(next, &lowerpath);
err = vfs_getattr(&lowerpath, &stat,
STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
/* maybe truncate regular file. this has no effect on dirs */
if (flags & O_TRUNC)
stat.size = 0;
if (!err)
err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
......
......@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
......@@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
return err;
}
struct dentry *ovl_lookup_temp(struct dentry *workdir)
......@@ -154,12 +156,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent);
ovl_dentry_update(dentry, newdentry);
ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
ovl_inode_update(inode, d_inode(newdentry));
ovl_inode_update(inode, newdentry);
ovl_copyattr(newdentry->d_inode, inode);
} else {
WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
dput(newdentry);
inc_nlink(inode);
}
d_instantiate(dentry, inode);
......@@ -588,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
......@@ -598,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
err = ovl_nlink_start(old, &locked);
if (err)
goto out_drop_write;
inode = d_inode(old);
ihold(inode);
......@@ -605,12 +613,18 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
ovl_nlink_end(old, locked);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
{
return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
}
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
......@@ -646,7 +660,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && ovl_dentry_upper(dentry) &&
upper != ovl_dentry_upper(dentry))) {
!ovl_matches_upper(dentry, upper))) {
goto out_dput_upper;
}
......@@ -707,7 +721,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && upper != ovl_dentry_upper(dentry)))
(!opaquedir && !ovl_matches_upper(dentry, upper)))
goto out_dput_upper;
if (is_dir)
......@@ -735,8 +749,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
enum ovl_path_type type;
int err;
bool locked = false;
const struct cred *old_cred;
err = ovl_want_write(dentry);
......@@ -747,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (err)
goto out_drop_write;
type = ovl_path_type(dentry);
err = ovl_nlink_start(dentry, &locked);
if (err)
goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
if (!ovl_lower_positive(dentry))
......@@ -761,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
else
drop_nlink(dentry->d_inode);
}
ovl_nlink_end(dentry, locked);
out_drop_write:
ovl_drop_write(dentry);
out:
......@@ -883,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unsigned int flags)
{
int err;
bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
......@@ -926,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
} else {
err = ovl_nlink_start(new, &locked);
if (err)
goto out_drop_write;
}
old_cred = ovl_override_creds(old->d_sb);
......@@ -985,7 +1007,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_unlock;
err = -ESTALE;
if (olddentry != ovl_dentry_upper(old))
if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
......@@ -998,12 +1020,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
if (ovl_dentry_upper(new)) {
if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
goto out_dput;
} else {
if (newdentry != ovl_dentry_upper(new))
if (!ovl_matches_upper(new, newdentry))
goto out_dput;
}
} else {
......@@ -1046,6 +1068,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (cleanup_whiteout)
ovl_cleanup(old_upperdir->d_inode, newdentry);
if (overwrite && d_inode(new)) {
if (new_is_dir)
clear_nlink(d_inode(new));
else
drop_nlink(d_inode(new));
}
ovl_dentry_version_inc(old->d_parent);
ovl_dentry_version_inc(new->d_parent);
......@@ -1057,6 +1086,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
revert_creds(old_cred);
ovl_nlink_end(new, locked);
out_drop_write:
ovl_drop_write(old);
out:
......
......@@ -12,6 +12,7 @@
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
......@@ -96,11 +97,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
WARN_ON_ONCE(stat->dev != lowerstat.dev);
/*
* Lower hardlinks are broken on copy up to different
* Lower hardlinks may be broken on copy up to different
* upper files, so we cannot use the lower origin st_ino
* for those different files, even for the same fs case.
* With inodes index enabled, it is safe to use st_ino
* of an indexed hardlinked origin. The index validates
* that the upper hardlink is not broken.
*/
if (is_dir || lowerstat.nlink == 1)
if (is_dir || lowerstat.nlink == 1 ||
ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->ino = lowerstat.ino;
}
stat->dev = dentry->d_sb->s_dev;
......@@ -126,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (is_dir && OVL_TYPE_MERGE(type))
stat->nlink = 1;
/*
* Return the overlay inode nlinks for indexed upper inodes.
* Overlay inode nlink counts the union of the upper hardlinks
* and non-covered lower hardlinks. It does not include the upper
* index hardlink.
*/
if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
stat->nlink = dentry->d_inode->i_nlink;
out:
revert_creds(old_cred);
......@@ -134,8 +148,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
int ovl_permission(struct inode *inode, int mask)
{
bool is_upper;
struct inode *realinode = ovl_inode_real(inode, &is_upper);
struct inode *upperinode = ovl_inode_upper(inode);
struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
const struct cred *old_cred;
int err;
......@@ -154,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask)
return err;
old_cred = ovl_override_creds(inode->i_sb);
if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
if (!upperinode &&
!special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
......@@ -286,7 +301,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
struct inode *realinode = ovl_inode_real(inode, NULL);
struct inode *realinode = ovl_inode_real(inode);
const struct cred *old_cred;
struct posix_acl *acl;
......@@ -300,13 +315,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
struct dentry *realdentry)
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
if (OVL_TYPE_UPPER(type))
if (ovl_dentry_upper(dentry) &&
ovl_dentry_has_upper_alias(dentry))
return false;
if (special_file(realdentry->d_inode->i_mode))
if (special_file(d_inode(dentry)->i_mode))
return false;
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
......@@ -318,11 +333,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
{
int err = 0;
struct path realpath;
enum ovl_path_type type;
type = ovl_path_real(dentry, &realpath);
if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
if (ovl_open_need_copy_up(dentry, file_flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, file_flags);
......@@ -440,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
}
}
/*
* With inodes index enabled, an overlay inode nlink counts the union of upper
* hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
* upper inode, the following nlink modifying operations can happen:
*
* 1. Lower hardlink copy up
* 2. Upper hardlink created, unlinked or renamed over
* 3. Lower hardlink whiteout or renamed over
*
* For the first, copy up case, the union nlink does not change, whether the
* operation succeeds or fails, but the upper inode nlink may change.
* Therefore, before copy up, we store the union nlink value relative to the
* lower inode nlink in the index inode xattr trusted.overlay.nlink.
*
* For the second, upper hardlink case, the union nlink should be incremented
* or decremented IFF the operation succeeds, aligned with nlink change of the
* upper inode. Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in the index inode.
*
* For the last, lower cover up case, we simplify things by preceding the
* whiteout or cover up with copy up. This makes sure that there is an index
* upper inode where the nlink xattr can be stored before the copied up upper
* entry is unlink.
*/
#define OVL_NLINK_ADD_UPPER (1 << 0)
/*
* On-disk format for indexed nlink:
*
* nlink relative to the upper inode - "U[+-]NUM"
* nlink relative to the lower inode - "L[+-]NUM"
*/
static int ovl_set_nlink_common(struct dentry *dentry,
struct dentry *realdentry, const char *format)
{
struct inode *inode = d_inode(dentry);
struct inode *realinode = d_inode(realdentry);
char buf[13];
int len;
len = snprintf(buf, sizeof(buf), format,
(int) (inode->i_nlink - realinode->i_nlink));
return ovl_do_setxattr(ovl_dentry_upper(dentry),
OVL_XATTR_NLINK, buf, len, 0);
}
int ovl_set_nlink_upper(struct dentry *dentry)
{
return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
}
int ovl_set_nlink_lower(struct dentry *dentry)
{
return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
}
unsigned int ovl_get_nlink(struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback)
{
int nlink_diff;
int nlink;
char buf[13];
int err;
if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
return fallback;
err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
if (err < 0)
goto fail;
buf[err] = '\0';
if ((buf[0] != 'L' && buf[0] != 'U') ||
(buf[1] != '+' && buf[1] != '-'))
goto fail;
err = kstrtoint(buf + 1, 10, &nlink_diff);
if (err < 0)
goto fail;
nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
nlink += nlink_diff;
if (nlink <= 0)
goto fail;
return nlink;
fail:
pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
upperdentry, err);
return fallback;
}
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
{
struct inode *inode;
......@@ -453,27 +562,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
static int ovl_inode_test(struct inode *inode, void *data)
{
return ovl_inode_real(inode, NULL) == data;
return inode->i_private == data;
}
static int ovl_inode_set(struct inode *inode, void *data)
{
inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
inode->i_private = data;
return 0;
}
struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
struct dentry *upperdentry)
{
struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
/* Lower (origin) inode must match, even if NULL */
if (ovl_inode_lower(inode) != lowerinode)
return false;
/*
* Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
* This happens when finding a lower alias for a copied up hard link.
*/
if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
return false;
return true;
}
struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
{
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
inode = iget5_locked(sb, (unsigned long) realinode,
ovl_inode_test, ovl_inode_set, realinode);
if (inode && inode->i_state & I_NEW) {
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
set_nlink(inode, realinode->i_nlink);
unlock_new_inode(inode);
if (!realinode)
realinode = d_inode(lowerdentry);
if (!S_ISDIR(realinode->i_mode) &&
(upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
struct inode *key = d_inode(lowerdentry ?: upperdentry);
unsigned int nlink;
inode = iget5_locked(dentry->d_sb, (unsigned long) key,
ovl_inode_test, ovl_inode_set, key);
if (!inode)
goto out_nomem;
if (!(inode->i_state & I_NEW)) {
/*
* Verify that the underlying files stored in the inode
* match those in the dentry.
*/
if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
iput(inode);
inode = ERR_PTR(-ESTALE);
goto out;
}
dput(upperdentry);
goto out;
}
nlink = ovl_get_nlink(lowerdentry, upperdentry,
realinode->i_nlink);
set_nlink(inode, nlink);
} else {
inode = new_inode(dentry->d_sb);
if (!inode)
goto out_nomem;
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
ovl_inode_init(inode, upperdentry, lowerdentry);
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
out:
return inode;
out_nomem:
inode = ERR_PTR(-ENOMEM);
goto out;
}
......@@ -88,13 +88,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
return 1;
}
static struct dentry *ovl_get_origin(struct dentry *dentry,
struct vfsmount *mnt)
static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
{
int res;
struct ovl_fh *fh = NULL;
struct dentry *origin = NULL;
int bytes;
res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
if (res < 0) {
......@@ -129,7 +126,29 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
(fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
goto out;
bytes = (fh->len - offsetof(struct ovl_fh, fid));
return fh;
out:
kfree(fh);
return NULL;
fail:
pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
goto out;
invalid:
pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
goto out;
}
static struct dentry *ovl_get_origin(struct dentry *dentry,
struct vfsmount *mnt)
{
struct dentry *origin = NULL;
struct ovl_fh *fh = ovl_get_origin_fh(dentry);
int bytes;
if (IS_ERR_OR_NULL(fh))
return (struct dentry *)fh;
/*
* Make sure that the stored uuid matches the uuid of the lower
......@@ -138,6 +157,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
goto out;
bytes = (fh->len - offsetof(struct ovl_fh, fid));
origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
bytes >> 2, (int)fh->type,
ovl_acceptable, NULL);
......@@ -149,21 +169,17 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
}
if (ovl_dentry_weird(origin) ||
((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
dput(origin);
origin = NULL;
((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
goto invalid;
}
out:
kfree(fh);
return origin;
fail:
pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
goto out;
invalid:
pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
dput(origin);
origin = NULL;
goto out;
}
......@@ -269,33 +285,30 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
}
static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
static int ovl_check_origin(struct dentry *upperdentry,
struct path *lowerstack, unsigned int numlower,
struct path **stackp, unsigned int *ctrp)
{
struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct vfsmount *mnt;
struct dentry *origin;
if (!same_sb || !roe->numlower)
return 0;
struct dentry *origin = NULL;
int i;
/*
* Since all layers are on the same fs, we use the first layer for
* decoding the file handle. We may get a disconnected dentry,
* which is fine, because we only need to hold the origin inode in
* cache and use its inode number. We may even get a connected dentry,
* that is not under the first layer's root. That is also fine for
* using it's inode number - it's the same as if we held a reference
* to a dentry in first layer that was moved under us.
*/
mnt = roe->lowerstack[0].mnt;
for (i = 0; i < numlower; i++) {
mnt = lowerstack[i].mnt;
origin = ovl_get_origin(upperdentry, mnt);
if (IS_ERR_OR_NULL(origin))
if (IS_ERR(origin))
return PTR_ERR(origin);
BUG_ON(*stackp || *ctrp);
if (origin)
break;
}
if (!origin)
return 0;
BUG_ON(*ctrp);
if (!*stackp)
*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
if (!*stackp) {
dput(origin);
......@@ -307,6 +320,215 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
return 0;
}
/*
* Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
{
struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
int err = 0;
if (!ofh)
return -ENODATA;
if (IS_ERR(ofh))
return PTR_ERR(ofh);
if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
err = -ESTALE;
kfree(ofh);
return err;
}
/*
* Verify that an inode matches the origin file handle stored in upper inode.
*
* If @set is true and there is no stored file handle, encode and store origin
* file handle in OVL_XATTR_ORIGIN.
*
* Return 0 on match, -ESTALE on mismatch, < 0 on error.
*/
int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
struct dentry *origin, bool is_upper, bool set)
{
struct inode *inode;
struct ovl_fh *fh;
int err;
fh = ovl_encode_fh(origin, is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh))
goto fail;
err = ovl_verify_origin_fh(dentry, fh);
if (set && err == -ENODATA)
err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
if (err)
goto fail;
out:
kfree(fh);
return err;
fail:
inode = d_inode(origin);
pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
origin, inode ? inode->i_ino : 0, err);
goto out;
}
/*
* Verify that an index entry name matches the origin file handle stored in
* OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
* Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
*/
int ovl_verify_index(struct dentry *index, struct path *lowerstack,
unsigned int numlower)
{
struct ovl_fh *fh = NULL;
size_t len;
struct path origin = { };
struct path *stack = &origin;
unsigned int ctr = 0;
int err;
if (!d_inode(index))
return 0;
err = -EISDIR;
if (d_is_dir(index))
goto fail;
err = -EINVAL;
if (index->d_name.len < sizeof(struct ovl_fh)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
fh = kzalloc(len, GFP_TEMPORARY);
if (!fh)
goto fail;
err = -EINVAL;
if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
goto fail;
err = ovl_verify_origin_fh(index, fh);
if (err)
goto fail;
err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
if (!err && !ctr)
err = -ESTALE;
if (err)
goto fail;
/* Check if index is orphan and don't warn before cleaning it */
if (d_inode(index)->i_nlink == 1 &&
ovl_get_nlink(index, origin.dentry, 0) == 0)
err = -ENOENT;
dput(origin.dentry);
out:
kfree(fh);
return err;
fail:
pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n",
index, err);
goto out;
}
/*
* Lookup in indexdir for the index entry of a lower real inode or a copy up
* origin inode. The index entry name is the hex representation of the lower
* inode file handle.
*
* If the index dentry in negative, then either no lower aliases have been
* copied up yet, or aliases have been copied up in older kernels and are
* not indexed.
*
* If the index dentry for a copy up origin inode is positive, but points
* to an inode different than the upper inode, then either the upper inode
* has been copied up and not indexed or it was indexed, but since then
* index dir was cleared. Either way, that index cannot be used to indentify
* the overlay inode.
*/
int ovl_get_index_name(struct dentry *origin, struct qstr *name)
{
int err;
struct ovl_fh *fh;
char *n, *s;
fh = ovl_encode_fh(origin, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
err = -ENOMEM;
n = kzalloc(fh->len * 2, GFP_TEMPORARY);
if (n) {
s = bin2hex(n, fh, fh->len);
*name = (struct qstr) QSTR_INIT(n, s - n);
err = 0;
}
kfree(fh);
return err;
}
static struct dentry *ovl_lookup_index(struct dentry *dentry,
struct dentry *upper,
struct dentry *origin)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *index;
struct inode *inode;
struct qstr name;
int err;
err = ovl_get_index_name(origin, &name);
if (err)
return ERR_PTR(err);
index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
"overlayfs: mount with '-o index=off' to disable inodes index.\n",
d_inode(origin)->i_ino, name.len, name.name,
err);
goto out;
}
if (d_is_negative(index)) {
if (upper && d_inode(origin)->i_nlink > 1) {
pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
d_inode(origin)->i_ino);
goto fail;
}
dput(index);
index = NULL;
} else if (upper && d_inode(index) != d_inode(upper)) {
inode = d_inode(index);
pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n",
d_inode(index)->i_ino,
d_inode(upper)->i_ino);
goto fail;
}
out:
kfree(name.name);
return index;
fail:
dput(index);
index = ERR_PTR(-EIO);
goto out;
}
/*
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
......@@ -338,10 +560,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *index = NULL;
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
bool upperimpure = false;
char *upperredirect = NULL;
struct dentry *this;
unsigned int i;
......@@ -359,7 +581,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
old_cred = ovl_override_creds(dentry->d_sb);
upperdir = ovl_upperdentry_dereference(poe);
upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
err = ovl_lookup_layer(upperdir, &d, &upperdentry);
if (err)
......@@ -372,8 +594,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
if (upperdentry && !d.is_dir) {
BUG_ON(!d.stop || d.redirect);
err = ovl_check_origin(dentry, upperdentry,
&stack, &ctr);
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
* because we only need to hold the origin inode in
* cache and use its inode number. We may even get a
* connected dentry, that is not under any of the lower
* layers root. That is also fine for using it's inode
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
err = ovl_check_origin(upperdentry, roe->lowerstack,
roe->numlower, &stack, &ctr);
if (err)
goto out;
}
......@@ -386,8 +618,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
}
upperopaque = d.opaque;
if (upperdentry && d.is_dir)
upperimpure = ovl_is_impuredir(upperdentry);
}
if (!d.stop && poe->numlower) {
......@@ -428,48 +658,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
/* Lookup index by lower inode and verify it matches upper inode */
if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
struct dentry *origin = stack[0].dentry;
index = ovl_lookup_index(dentry, upperdentry, origin);
if (IS_ERR(index)) {
err = PTR_ERR(index);
index = NULL;
goto out_put;
}
}
oe = ovl_alloc_entry(ctr);
err = -ENOMEM;
if (!oe)
goto out_put;
if (upperdentry || ctr) {
struct dentry *realdentry;
struct inode *realinode;
oe->opaque = upperopaque;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
dentry->d_fsdata = oe;
realdentry = upperdentry ? upperdentry : stack[0].dentry;
realinode = d_inode(realdentry);
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
else if (index)
upperdentry = dget(index);
err = -ENOMEM;
if (upperdentry && !d_is_dir(upperdentry)) {
inode = ovl_get_inode(dentry->d_sb, realinode);
} else {
inode = ovl_new_inode(dentry->d_sb, realinode->i_mode,
realinode->i_rdev);
if (inode)
ovl_inode_init(inode, realinode, !!upperdentry);
}
if (!inode)
if (upperdentry || ctr) {
inode = ovl_get_inode(dentry, upperdentry);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
ovl_copyattr(realdentry->d_inode, inode);
OVL_I(inode)->redirect = upperredirect;
if (index)
ovl_set_flag(OVL_INDEX, inode);
}
revert_creds(old_cred);
oe->opaque = upperopaque;
oe->impure = upperimpure;
oe->redirect = upperredirect;
oe->__upperdentry = upperdentry;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
dput(index);
kfree(stack);
kfree(d.redirect);
dentry->d_fsdata = oe;
d_add(dentry, inode);
return NULL;
out_free_oe:
dentry->d_fsdata = NULL;
kfree(oe);
out_put:
dput(index);
for (i = 0; i < ctr; i++)
dput(stack[i].dentry);
kfree(stack);
......@@ -499,7 +737,7 @@ bool ovl_lower_positive(struct dentry *dentry)
return oe->opaque;
/* Negative upper -> positive lower */
if (!oe->__upperdentry)
if (!ovl_dentry_upper(dentry))
return true;
/* Positive upper -> have to look up lower to see whether it exists */
......
......@@ -25,6 +25,12 @@ enum ovl_path_type {
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
enum ovl_flag {
OVL_IMPURE,
OVL_INDEX,
};
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
......@@ -38,6 +44,8 @@ enum ovl_path_type {
/* CPU byte order required for fid decoding: */
#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
/* Is the real inode encoded in fid an upper inode? */
#define OVL_FH_FLAG_PATH_UPPER (1 << 2)
#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
......@@ -60,8 +68,6 @@ struct ovl_fh {
u8 fid[0]; /* file identifier */
} __packed;
#define OVL_ISUPPER_MASK 1UL
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
......@@ -175,22 +181,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
return ret;
}
static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
{
unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
if (is_upper)
*is_upper = x & OVL_ISUPPER_MASK;
return (struct inode *) (x & ~OVL_ISUPPER_MASK);
}
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
struct super_block *ovl_same_sb(struct super_block *sb);
bool ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
......@@ -201,19 +199,22 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct inode *ovl_inode_upper(struct inode *inode);
struct inode *ovl_inode_lower(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_impure(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
void ovl_inode_init(struct inode *inode, struct inode *realinode,
bool is_upper);
void ovl_inode_update(struct inode *inode, struct inode *upperinode);
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
struct dentry *lowerdentry);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dentry_version_inc(struct dentry *dentry);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
......@@ -225,6 +226,12 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
const char *name, const void *value, size_t size,
int xerr);
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
void ovl_set_flag(unsigned long flag, struct inode *inode);
bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
......@@ -233,6 +240,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
/* namei.c */
int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
struct dentry *origin, bool is_upper, bool set);
int ovl_verify_index(struct dentry *index, struct path *lowerstack,
unsigned int numlower);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
......@@ -245,8 +257,15 @@ void ovl_cache_free(struct list_head *list);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
struct path *lowerstack, unsigned int numlower);
/* inode.c */
int ovl_set_nlink_upper(struct dentry *dentry);
int ovl_set_nlink_lower(struct dentry *dentry);
unsigned int ovl_get_nlink(struct dentry *lowerdentry,
struct dentry *upperdentry,
unsigned int fallback);
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
int ovl_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
......@@ -262,7 +281,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
......@@ -284,10 +303,11 @@ struct cattr {
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct cattr *attr,
struct dentry *hardlink, bool debug);
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
int ovl_cleanup(struct inode *dir, struct dentry *dentry);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
......@@ -14,6 +14,7 @@ struct ovl_config {
char *workdir;
bool default_permissions;
bool redirect_dir;
bool index;
};
/* private information held for overlayfs's superblock */
......@@ -21,7 +22,12 @@ struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned numlower;
struct vfsmount **lower_mnt;
/* workbasedir is the path at workdir= mount option */
struct dentry *workbasedir;
/* workdir is the 'work' directory under workbasedir */
struct dentry *workdir;
/* index directory listing overlay inodes by origin file handle */
struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
......@@ -29,22 +35,16 @@ struct ovl_fs {
const struct cred *creator_cred;
bool tmpfile;
bool noxattr;
wait_queue_head_t copyup_wq;
/* sb common to all layers */
struct super_block *same_sb;
};
/* private information held for every overlayfs dentry */
struct ovl_entry {
struct dentry *__upperdentry;
struct ovl_dir_cache *cache;
union {
struct {
u64 version;
const char *redirect;
unsigned long has_upper;
bool opaque;
bool impure;
bool copying;
};
struct rcu_head rcu;
};
......@@ -54,7 +54,25 @@ struct ovl_entry {
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
struct ovl_inode {
struct ovl_dir_cache *cache;
const char *redirect;
u64 version;
unsigned long flags;
struct inode vfs_inode;
struct dentry *__upperdentry;
struct inode *lower;
/* synchronize copy up and more */
struct mutex lock;
};
static inline struct ovl_inode *OVL_I(struct inode *inode)
{
return container_of(inode, struct ovl_inode, vfs_inode);
}
static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
{
return lockless_dereference(oe->__upperdentry);
return lockless_dereference(oi->__upperdentry);
}
......@@ -667,3 +667,53 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
ovl_cleanup(dir, dentry);
}
}
int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
struct path *lowerstack, unsigned int numlower)
{
int err;
struct inode *dir = dentry->d_inode;
struct path path = { .mnt = mnt, .dentry = dentry };
LIST_HEAD(list);
struct ovl_cache_entry *p;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
.dentry = NULL,
.list = &list,
.root = RB_ROOT,
.is_lowest = false,
};
err = ovl_dir_read(&path, &rdd);
if (err)
goto out;
inode_lock_nested(dir, I_MUTEX_PARENT);
list_for_each_entry(p, &list, l_node) {
struct dentry *index;
if (p->name[0] == '.') {
if (p->len == 1)
continue;
if (p->len == 2 && p->name[1] == '.')
continue;
}
index = lookup_one_len(p->name, dentry, p->len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
break;
}
if (ovl_verify_index(index, lowerstack, numlower)) {
err = ovl_cleanup(dir, index);
if (err)
break;
}
dput(index);
}
inode_unlock(dir);
out:
ovl_cache_free(&list);
if (err)
pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
return err;
}
......@@ -34,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
"Default to on or off for the redirect_dir feature");
static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
module_param_named(index, ovl_index_def, bool, 0644);
MODULE_PARM_DESC(ovl_index_def,
"Default to on or off for the inodes index feature");
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
......@@ -41,8 +46,6 @@ static void ovl_dentry_release(struct dentry *dentry)
if (oe) {
unsigned int i;
dput(oe->__upperdentry);
kfree(oe->redirect);
for (i = 0; i < oe->numlower; i++)
dput(oe->lowerstack[i].dentry);
kfree_rcu(oe, rcu);
......@@ -165,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = {
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
{
struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
oi->cache = NULL;
oi->redirect = NULL;
oi->version = 0;
oi->flags = 0;
oi->__upperdentry = NULL;
oi->lower = NULL;
mutex_init(&oi->lock);
return &oi->vfs_inode;
}
static void ovl_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
}
static void ovl_destroy_inode(struct inode *inode)
{
struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
kfree(oi->redirect);
mutex_destroy(&oi->lock);
call_rcu(&inode->i_rcu, ovl_i_callback);
}
static void ovl_put_super(struct super_block *sb)
{
struct ovl_fs *ufs = sb->s_fs_info;
unsigned i;
dput(ufs->indexdir);
dput(ufs->workdir);
ovl_inuse_unlock(ufs->workbasedir);
dput(ufs->workbasedir);
if (ufs->upper_mnt)
ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
......@@ -228,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
/* Will this overlay be forced to mount/remount ro? */
static bool ovl_force_readonly(struct ovl_fs *ufs)
{
return (!ufs->upper_mnt || !ufs->workdir);
}
/**
* ovl_show_options
*
......@@ -249,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ufs->config.redirect_dir != ovl_redirect_dir_def)
seq_printf(m, ",redirect_dir=%s",
ufs->config.redirect_dir ? "on" : "off");
if (ufs->config.index != ovl_index_def)
seq_printf(m, ",index=%s",
ufs->config.index ? "on" : "off");
return 0;
}
......@@ -256,19 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
return -EROFS;
return 0;
}
static const struct super_operations ovl_super_operations = {
.alloc_inode = ovl_alloc_inode,
.destroy_inode = ovl_destroy_inode,
.drop_inode = generic_delete_inode,
.put_super = ovl_put_super,
.sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
.remount_fs = ovl_remount,
.drop_inode = generic_delete_inode,
};
enum {
......@@ -278,6 +332,8 @@ enum {
OPT_DEFAULT_PERMISSIONS,
OPT_REDIRECT_DIR_ON,
OPT_REDIRECT_DIR_OFF,
OPT_INDEX_ON,
OPT_INDEX_OFF,
OPT_ERR,
};
......@@ -288,6 +344,8 @@ static const match_table_t ovl_tokens = {
{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_REDIRECT_DIR_ON, "redirect_dir=on"},
{OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
{OPT_INDEX_ON, "index=on"},
{OPT_INDEX_OFF, "index=off"},
{OPT_ERR, NULL}
};
......@@ -360,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->redirect_dir = false;
break;
case OPT_INDEX_ON:
config->index = true;
break;
case OPT_INDEX_OFF:
config->index = false;
break;
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
......@@ -378,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
#define OVL_WORKDIR_NAME "work"
#define OVL_INDEXDIR_NAME "index"
static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
struct dentry *dentry)
static struct dentry *ovl_workdir_create(struct super_block *sb,
struct ovl_fs *ufs,
struct dentry *dentry,
const char *name, bool persist)
{
struct inode *dir = dentry->d_inode;
struct vfsmount *mnt = ufs->upper_mnt;
struct dentry *work;
int err;
bool retried = false;
bool locked = false;
err = mnt_want_write(mnt);
if (err)
return ERR_PTR(err);
goto out_err;
inode_lock_nested(dir, I_MUTEX_PARENT);
locked = true;
retry:
work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
strlen(OVL_WORKDIR_NAME));
work = lookup_one_len(name, dentry, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
......@@ -407,6 +479,9 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
if (retried)
goto out_dput;
if (persist)
goto out_unlock;
retried = true;
ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
......@@ -446,16 +521,24 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
inode_unlock(work->d_inode);
if (err)
goto out_dput;
} else {
err = PTR_ERR(work);
goto out_err;
}
out_unlock:
inode_unlock(dir);
mnt_drop_write(mnt);
if (locked)
inode_unlock(dir);
return work;
out_dput:
dput(work);
work = ERR_PTR(err);
out_err:
pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
ufs->config.workdir, name, -err);
sb->s_flags |= MS_RDONLY;
work = NULL;
goto out_unlock;
}
......@@ -555,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path,
if (ovl_dentry_remote(path->dentry))
*remote = true;
/*
* The inodes index feature needs to encode and decode file
* handles, so it requires that all layers support them.
*/
if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
ofs->config.index = false;
pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
}
return 0;
out_put:
......@@ -610,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *realinode = ovl_inode_real(inode, NULL);
struct inode *realinode = ovl_inode_real(inode);
struct posix_acl *acl = NULL;
int err;
......@@ -652,7 +744,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
err = ovl_xattr_set(dentry, handler->name, value, size, flags);
if (!err)
ovl_copyattr(ovl_inode_real(inode, NULL), inode);
ovl_copyattr(ovl_inode_real(inode), inode);
return err;
......@@ -734,7 +826,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
struct path upperpath = { };
struct path workpath = { };
struct dentry *root_dentry;
struct inode *realinode;
struct ovl_entry *oe;
struct ovl_fs *ufs;
struct path *stack = NULL;
......@@ -752,8 +843,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ufs)
goto out;
init_waitqueue_head(&ufs->copyup_wq);
ufs->config.redirect_dir = ovl_redirect_dir_def;
ufs->config.index = ovl_index_def;
err = ovl_parse_opt((char *) data, &ufs->config);
if (err)
goto out_free_config;
......@@ -788,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_put_upperpath;
err = -EBUSY;
if (!ovl_inuse_trylock(upperpath.dentry)) {
pr_err("overlayfs: upperdir is in-use by another mount\n");
goto out_put_upperpath;
}
err = ovl_mount_dir(ufs->config.workdir, &workpath);
if (err)
goto out_put_upperpath;
goto out_unlock_upperdentry;
err = -EINVAL;
if (upperpath.mnt != workpath.mnt) {
......@@ -801,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
goto out_put_workpath;
}
err = -EBUSY;
if (!ovl_inuse_trylock(workpath.dentry)) {
pr_err("overlayfs: workdir is in-use by another mount\n");
goto out_put_workpath;
}
ufs->workbasedir = workpath.dentry;
sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
}
err = -ENOMEM;
lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
if (!lowertmp)
goto out_put_workpath;
goto out_unlock_workdentry;
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
......@@ -849,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: failed to clone upperpath\n");
goto out_put_lowerpath;
}
/* Don't inherit atime flags */
ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
err = PTR_ERR(ufs->workdir);
if (IS_ERR(ufs->workdir)) {
pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
ufs->config.workdir, OVL_WORKDIR_NAME, -err);
sb->s_flags |= MS_RDONLY;
ufs->workdir = NULL;
}
ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
OVL_WORKDIR_NAME, false);
/*
* Upper should support d_type, else whiteouts are visible.
* Given workdir and upper are on same fs, we can do
......@@ -904,6 +1003,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
} else {
vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
}
/* Check if upper/work fs supports file handles */
if (ufs->config.index &&
!ovl_can_decode_fh(ufs->workdir->d_sb)) {
ufs->config.index = false;
pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
}
}
}
......@@ -941,6 +1047,44 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
ufs->same_sb = NULL;
if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
/* Verify lower root is upper root origin */
err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
stack[0].dentry, false, true);
if (err) {
pr_err("overlayfs: failed to verify upper root origin\n");
goto out_put_lower_mnt;
}
ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
OVL_INDEXDIR_NAME, true);
err = PTR_ERR(ufs->indexdir);
if (IS_ERR(ufs->indexdir))
goto out_put_lower_mnt;
if (ufs->indexdir) {
/* Verify upper root is index dir origin */
err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
upperpath.dentry, true, true);
if (err)
pr_err("overlayfs: failed to verify index dir origin\n");
/* Cleanup bad/stale/orphan index entries */
if (!err)
err = ovl_indexdir_cleanup(ufs->indexdir,
ufs->upper_mnt,
stack, numlower);
}
if (err || !ufs->indexdir)
pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
if (err)
goto out_put_indexdir;
}
/* Show index=off/on in /proc/mounts for any of the reasons above */
if (!ufs->indexdir)
ufs->config.index = false;
if (remote)
sb->s_d_op = &ovl_reval_dentry_operations;
else
......@@ -948,7 +1092,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->creator_cred = cred = prepare_creds();
if (!cred)
goto out_put_lower_mnt;
goto out_put_indexdir;
/* Never override disk quota limits or use reserved space */
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
......@@ -971,12 +1115,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
mntput(upperpath.mnt);
for (i = 0; i < numlower; i++)
mntput(stack[i].mnt);
path_put(&workpath);
mntput(workpath.mnt);
kfree(lowertmp);
if (upperpath.dentry) {
oe->__upperdentry = upperpath.dentry;
oe->impure = ovl_is_impuredir(upperpath.dentry);
oe->has_upper = true;
if (ovl_is_impuredir(upperpath.dentry))
ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
}
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = stack[i].dentry;
......@@ -986,9 +1131,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
root_dentry->d_fsdata = oe;
realinode = d_inode(ovl_dentry_real(root_dentry));
ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
ovl_copyattr(realinode, d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
ovl_dentry_lower(root_dentry));
sb->s_root = root_dentry;
......@@ -998,6 +1142,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
kfree(oe);
out_put_cred:
put_cred(ufs->creator_cred);
out_put_indexdir:
dput(ufs->indexdir);
out_put_lower_mnt:
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
......@@ -1011,8 +1157,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
kfree(stack);
out_free_lowertmp:
kfree(lowertmp);
out_unlock_workdentry:
ovl_inuse_unlock(workpath.dentry);
out_put_workpath:
path_put(&workpath);
out_unlock_upperdentry:
ovl_inuse_unlock(upperpath.dentry);
out_put_upperpath:
path_put(&upperpath);
out_free_config:
......@@ -1038,14 +1188,43 @@ static struct file_system_type ovl_fs_type = {
};
MODULE_ALIAS_FS("overlay");
static void ovl_inode_init_once(void *foo)
{
struct ovl_inode *oi = foo;
inode_init_once(&oi->vfs_inode);
}
static int __init ovl_init(void)
{
return register_filesystem(&ovl_fs_type);
int err;
ovl_inode_cachep = kmem_cache_create("ovl_inode",
sizeof(struct ovl_inode), 0,
(SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
ovl_inode_init_once);
if (ovl_inode_cachep == NULL)
return -ENOMEM;
err = register_filesystem(&ovl_fs_type);
if (err)
kmem_cache_destroy(ovl_inode_cachep);
return err;
}
static void __exit ovl_exit(void)
{
unregister_filesystem(&ovl_fs_type);
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep);
}
module_init(ovl_init);
......
......@@ -12,6 +12,10 @@
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/exportfs.h>
#include <linux/uuid.h>
#include <linux/namei.h>
#include <linux/ratelimit.h>
#include "overlayfs.h"
#include "ovl_entry.h"
......@@ -47,6 +51,19 @@ struct super_block *ovl_same_sb(struct super_block *sb)
return ofs->same_sb;
}
bool ovl_can_decode_fh(struct super_block *sb)
{
return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
!uuid_is_null(&sb->s_uuid));
}
struct dentry *ovl_indexdir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
return ofs->indexdir;
}
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
......@@ -78,7 +95,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
struct ovl_entry *oe = dentry->d_fsdata;
enum ovl_path_type type = 0;
if (oe->__upperdentry) {
if (ovl_dentry_upper(dentry)) {
type = __OVL_PATH_UPPER;
/*
......@@ -99,10 +116,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
void ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *oe = dentry->d_fsdata;
path->mnt = ofs->upper_mnt;
path->dentry = ovl_upperdentry_dereference(oe);
path->dentry = ovl_dentry_upper(dentry);
}
void ovl_path_lower(struct dentry *dentry, struct path *path)
......@@ -125,48 +141,48 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
}
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return ovl_upperdentry_dereference(oe);
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
struct dentry *ovl_dentry_real(struct dentry *dentry)
{
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
struct inode *ovl_inode_upper(struct inode *inode)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode));
return __ovl_dentry_lower(oe);
return upperdentry ? d_inode(upperdentry) : NULL;
}
struct dentry *ovl_dentry_real(struct dentry *dentry)
struct inode *ovl_inode_lower(struct inode *inode)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct dentry *realdentry;
realdentry = ovl_upperdentry_dereference(oe);
if (!realdentry)
realdentry = __ovl_dentry_lower(oe);
return OVL_I(inode)->lower;
}
return realdentry;
struct inode *ovl_inode_real(struct inode *inode)
{
return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return oe->cache;
return OVL_I(d_inode(dentry))->cache;
}
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
{
struct ovl_entry *oe = dentry->d_fsdata;
oe->cache = cache;
OVL_I(d_inode(dentry))->cache = cache;
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
......@@ -175,23 +191,35 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
return oe->opaque;
}
bool ovl_dentry_is_impure(struct dentry *dentry)
bool ovl_dentry_is_whiteout(struct dentry *dentry)
{
return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
}
void ovl_dentry_set_opaque(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return oe->impure;
oe->opaque = true;
}
bool ovl_dentry_is_whiteout(struct dentry *dentry)
/*
* For hard links it's possible for ovl_dentry_upper() to return positive, while
* there's no actual upper alias for the inode. Copy up code needs to know
* about the existence of the upper alias, so it can't use ovl_dentry_upper().
*/
bool ovl_dentry_has_upper_alias(struct dentry *dentry)
{
return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
struct ovl_entry *oe = dentry->d_fsdata;
return oe->has_upper;
}
void ovl_dentry_set_opaque(struct dentry *dentry)
void ovl_dentry_set_upper_alias(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
oe->opaque = true;
oe->has_upper = true;
}
bool ovl_redirect_dir(struct super_block *sb)
......@@ -203,63 +231,59 @@ bool ovl_redirect_dir(struct super_block *sb)
const char *ovl_dentry_get_redirect(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
return oe->redirect;
return OVL_I(d_inode(dentry))->redirect;
}
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct ovl_inode *oi = OVL_I(d_inode(dentry));
kfree(oe->redirect);
oe->redirect = redirect;
kfree(oi->redirect);
oi->redirect = redirect;
}
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
struct dentry *lowerdentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
if (upperdentry)
OVL_I(inode)->__upperdentry = upperdentry;
if (lowerdentry)
OVL_I(inode)->lower = d_inode(lowerdentry);
WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
WARN_ON(oe->__upperdentry);
/*
* Make sure upperdentry is consistent before making it visible to
* ovl_upperdentry_dereference().
*/
smp_wmb();
oe->__upperdentry = upperdentry;
ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
}
void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper)
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
WRITE_ONCE(inode->i_private, (unsigned long) realinode |
(is_upper ? OVL_ISUPPER_MASK : 0));
}
struct inode *upperinode = d_inode(upperdentry);
void ovl_inode_update(struct inode *inode, struct inode *upperinode)
{
WARN_ON(!upperinode);
WARN_ON(!inode_unhashed(inode));
WRITE_ONCE(inode->i_private,
(unsigned long) upperinode | OVL_ISUPPER_MASK);
if (!S_ISDIR(upperinode->i_mode))
WARN_ON(OVL_I(inode)->__upperdentry);
/*
* Make sure upperdentry is consistent before making it visible
*/
smp_wmb();
OVL_I(inode)->__upperdentry = upperdentry;
if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
}
}
void ovl_dentry_version_inc(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(dentry->d_inode));
oe->version++;
WARN_ON(!inode_is_locked(inode));
OVL_I(inode)->version++;
}
u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
struct inode *inode = d_inode(dentry);
WARN_ON(!inode_is_locked(dentry->d_inode));
return oe->version;
WARN_ON(!inode_is_locked(inode));
return OVL_I(inode)->version;
}
bool ovl_is_whiteout(struct dentry *dentry)
......@@ -276,32 +300,21 @@ struct file *ovl_path_open(struct path *path, int flags)
int ovl_copy_up_start(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *oe = dentry->d_fsdata;
struct ovl_inode *oi = OVL_I(d_inode(dentry));
int err;
spin_lock(&ofs->copyup_wq.lock);
err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
if (!err) {
if (oe->__upperdentry)
err = mutex_lock_interruptible(&oi->lock);
if (!err && ovl_dentry_has_upper_alias(dentry)) {
err = 1; /* Already copied up */
else
oe->copying = true;
mutex_unlock(&oi->lock);
}
spin_unlock(&ofs->copyup_wq.lock);
return err;
}
void ovl_copy_up_end(struct dentry *dentry)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *oe = dentry->d_fsdata;
spin_lock(&ofs->copyup_wq.lock);
oe->copying = false;
wake_up_locked(&ofs->copyup_wq);
spin_unlock(&ofs->copyup_wq.lock);
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
......@@ -343,9 +356,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
{
int err;
struct ovl_entry *oe = dentry->d_fsdata;
if (oe->impure)
if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
return 0;
/*
......@@ -355,7 +367,176 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
"y", 1, 0);
if (!err)
oe->impure = true;
ovl_set_flag(OVL_IMPURE, d_inode(dentry));
return err;
}
void ovl_set_flag(unsigned long flag, struct inode *inode)
{
set_bit(flag, &OVL_I(inode)->flags);
}
bool ovl_test_flag(unsigned long flag, struct inode *inode)
{
return test_bit(flag, &OVL_I(inode)->flags);
}
/**
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
bool ovl_inuse_trylock(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
bool locked = false;
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_OVL_INUSE)) {
inode->i_state |= I_OVL_INUSE;
locked = true;
}
spin_unlock(&inode->i_lock);
return locked;
}
void ovl_inuse_unlock(struct dentry *dentry)
{
if (dentry) {
struct inode *inode = d_inode(dentry);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_OVL_INUSE));
inode->i_state &= ~I_OVL_INUSE;
spin_unlock(&inode->i_lock);
}
}
/* Called must hold OVL_I(inode)->oi_lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *index = NULL;
struct inode *inode;
struct qstr name;
int err;
err = ovl_get_index_name(lowerdentry, &name);
if (err)
goto fail;
inode = d_inode(upperdentry);
if (inode->i_nlink != 1) {
pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
upperdentry, inode->i_ino, inode->i_nlink);
/*
* We either have a bug with persistent union nlink or a lower
* hardlink was added while overlay is mounted. Adding a lower
* hardlink and then unlinking all overlay hardlinks would drop
* overlay nlink to zero before all upper inodes are unlinked.
* As a safety measure, when that situation is detected, set
* the overlay nlink to the index inode nlink minus one for the
* index entry itself.
*/
set_nlink(d_inode(dentry), inode->i_nlink - 1);
ovl_set_nlink_upper(dentry);
goto out;
}
inode_lock_nested(dir, I_MUTEX_PARENT);
/* TODO: whiteout instead of cleanup to block future open by handle */
index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
err = PTR_ERR(index);
if (!IS_ERR(index))
err = ovl_cleanup(dir, index);
inode_unlock(dir);
if (err)
goto fail;
out:
dput(index);
return;
fail:
pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
goto out;
}
/*
* Operations that change overlay inode and upper inode nlink need to be
* synchronized with copy up for persistent nlink accounting.
*/
int ovl_nlink_start(struct dentry *dentry, bool *locked)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
const struct cred *old_cred;
int err;
if (!d_inode(dentry) || d_is_dir(dentry))
return 0;
/*
* With inodes index is enabled, we store the union overlay nlink
* in an xattr on the index inode. When whiting out lower hardlinks
* we need to decrement the overlay persistent nlink, but before the
* first copy up, we have no upper index inode to store the xattr.
*
* As a workaround, before whiteout/rename over of a lower hardlink,
* copy up to create the upper index. Creating the upper index will
* initialize the overlay nlink, so it could be dropped if unlink
* or rename succeeds.
*
* TODO: implement metadata only index copy up when called with
* ovl_copy_up_flags(dentry, O_PATH).
*/
if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
err = ovl_copy_up(dentry);
if (err)
return err;
}
err = mutex_lock_interruptible(&oi->lock);
if (err)
return err;
if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
/*
* The overlay inode nlink should be incremented/decremented IFF the
* upper operation succeeds, along with nlink change of upper inode.
* Therefore, before link/unlink/rename, we store the union nlink
* value relative to the upper inode nlink in an upper inode xattr.
*/
err = ovl_set_nlink_upper(dentry);
revert_creds(old_cred);
out:
if (err)
mutex_unlock(&oi->lock);
else
*locked = true;
return err;
}
void ovl_nlink_end(struct dentry *dentry, bool locked)
{
if (locked) {
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
d_inode(dentry)->i_nlink == 0) {
const struct cred *old_cred;
old_cred = ovl_override_creds(dentry->d_sb);
ovl_cleanup_index(dentry);
revert_creds(old_cred);
}
mutex_unlock(&OVL_I(d_inode(dentry))->lock);
}
}
......@@ -1955,6 +1955,9 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* wb stat updates to grab mapping->tree_lock. See
* inode_switch_wb_work_fn() for details.
*
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
* and work dirs among overlayfs mounts.
*
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
......@@ -1975,6 +1978,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
#define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_WB_SWITCH (1 << 13)
#define I_OVL_INUSE (1 << 14)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册