提交 7b47a9e7 编写于 作者: L Linus Torvalds

Merge branch 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs mount infrastructure updates from Al Viro:
 "The rest of core infrastructure; no new syscalls in that pile, but the
  old parts are switched to new infrastructure. At that point
  conversions of individual filesystems can happen independently; some
  are done here (afs, cgroup, procfs, etc.), there's also a large series
  outside of that pile dealing with NFS (quite a bit of option-parsing
  stuff is getting used there - it's one of the most convoluted
  filesystems in terms of mount-related logics), but NFS bits are the
  next cycle fodder.

  It got seriously simplified since the last cycle; documentation is
  probably the weakest bit at the moment - I considered dropping the
  commit introducing Documentation/filesystems/mount_api.txt (cutting
  the size increase by quarter ;-), but decided that it would be better
  to fix it up after -rc1 instead.

  That pile allows to do followup work in independent branches, which
  should make life much easier for the next cycle. fs/super.c size
  increase is unpleasant; there's a followup series that allows to
  shrink it considerably, but I decided to leave that until the next
  cycle"

* 'work.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (41 commits)
  afs: Use fs_context to pass parameters over automount
  afs: Add fs_context support
  vfs: Add some logging to the core users of the fs_context log
  vfs: Implement logging through fs_context
  vfs: Provide documentation for new mount API
  vfs: Remove kern_mount_data()
  hugetlbfs: Convert to fs_context
  cpuset: Use fs_context
  kernfs, sysfs, cgroup, intel_rdt: Support fs_context
  cgroup: store a reference to cgroup_ns into cgroup_fs_context
  cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper
  cgroup_do_mount(): massage calling conventions
  cgroup: stash cgroup_root reference into cgroup_fs_context
  cgroup2: switch to option-by-option parsing
  cgroup1: switch to option-by-option parsing
  cgroup: take options parsing into ->parse_monolithic()
  cgroup: fold cgroup1_mount() into cgroup1_get_tree()
  cgroup: start switching to fs_context
  ipc: Convert mqueue fs to fs_context
  proc: Add fs_context support to procfs
  ...
无相关合并请求
此差异已折叠。
......@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h>
#define MSR_IA32_L3_QOS_CFG 0xc81
......@@ -40,6 +41,21 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
bool enable_cdpl3;
bool enable_mba_mbps;
};
static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
return container_of(kfc, struct rdt_fs_context, kfc);
}
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
/**
......
......@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
#include <linux/seq_buf.h>
......@@ -32,6 +33,7 @@
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/task_work.h>
#include <linux/user_namespace.h>
#include <uapi/linux/magic.h>
......@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable();
}
static int parse_rdtgroupfs_options(char *data)
{
char *token, *o = data;
int ret = 0;
while ((token = strsep(&o, ",")) != NULL) {
if (!*token) {
ret = -EINVAL;
goto out;
}
if (!strcmp(token, "cdp")) {
ret = cdpl3_enable();
if (ret)
goto out;
} else if (!strcmp(token, "cdpl2")) {
ret = cdpl2_enable();
if (ret)
goto out;
} else if (!strcmp(token, "mba_MBps")) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = set_mba_sc(true);
else
ret = -EINVAL;
if (ret)
goto out;
} else {
ret = -EINVAL;
goto out;
}
}
return 0;
out:
pr_err("Invalid mount option \"%s\"\n", token);
return ret;
}
/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
......@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);
static struct dentry *rdt_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
int ret = 0;
if (ctx->enable_cdpl2)
ret = cdpl2_enable();
if (!ret && ctx->enable_cdpl3)
ret = cdpl3_enable();
if (!ret && ctx->enable_mba_mbps)
ret = set_mba_sc(true);
return ret;
}
static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom;
struct rdt_resource *r;
struct dentry *dentry;
int ret;
cpus_read_lock();
......@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once.
*/
if (static_branch_unlikely(&rdt_enable_key)) {
dentry = ERR_PTR(-EBUSY);
ret = -EBUSY;
goto out;
}
ret = parse_rdtgroupfs_options(data);
if (ret) {
dentry = ERR_PTR(ret);
ret = rdt_enable_ctx(ctx);
if (ret < 0)
goto out_cdp;
}
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret) {
dentry = ERR_PTR(ret);
goto out_cdp;
}
if (ret < 0)
goto out_mba;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
if (ret < 0)
goto out_info;
}
kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
if (ret) {
dentry = ERR_PTR(ret);
if (ret < 0)
goto out_mongrp;
}
kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}
ret = rdt_pseudo_lock_init();
if (ret) {
dentry = ERR_PTR(ret);
if (ret)
goto out_mondata;
}
dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
ret = kernfs_get_tree(fc);
if (ret < 0)
goto out_psl;
if (rdt_alloc_capable)
......@@ -2059,14 +2024,95 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
out_mba:
if (ctx->enable_mba_mbps)
set_mba_sc(false);
out_cdp:
cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
return ret;
}
enum rdt_param {
Opt_cdp,
Opt_cdpl2,
Opt_mba_mpbs,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
fsparam_flag("mba_mpbs", Opt_mba_mpbs),
{}
};
static const struct fs_parameter_description rdt_fs_parameters = {
.name = "rdt",
.specs = rdt_param_specs,
};
static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
int opt;
opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
if (opt < 0)
return opt;
return dentry;
switch (opt) {
case Opt_cdp:
ctx->enable_cdpl3 = true;
return 0;
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mpbs:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
return 0;
}
return -EINVAL;
}
static void rdt_fs_context_free(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
kernfs_free_fs_context(fc);
kfree(ctx);
}
static const struct fs_context_operations rdt_fs_context_ops = {
.free = rdt_fs_context_free,
.parse_param = rdt_parse_param,
.get_tree = rdt_get_tree,
};
static int rdt_init_fs_context(struct fs_context *fc)
{
struct rdt_fs_context *ctx;
ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->kfc.root = rdt_root;
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
if (fc->user_ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(&init_user_ns);
fc->global = true;
return 0;
}
static int reset_all_ctrls(struct rdt_resource *r)
......@@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
}
static struct file_system_type rdt_fs_type = {
.name = "resctrl",
.mount = rdt_mount,
.kill_sb = rdt_kill_sb,
.name = "resctrl",
.init_fs_context = rdt_init_fs_context,
.parameters = &rdt_fs_parameters,
.kill_sb = rdt_kill_sb,
};
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
......
......@@ -8,6 +8,13 @@ menu "File systems"
config DCACHE_WORD_ACCESS
bool
config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
if BLOCK
config FS_IOMAP
......
......@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_types.o
fs_types.o fs_context.o fs_parser.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o
......
......@@ -36,15 +36,14 @@
struct pagevec;
struct afs_call;
struct afs_mount_params {
bool rwpath; /* T if the parent should be considered R/W */
struct afs_fs_context {
bool force; /* T to force cell type */
bool autocell; /* T if set auto mount operation */
bool dyn_root; /* T if dynamic root */
bool no_cell; /* T if the source is "none" (for dynroot) */
afs_voltype_t type; /* type of volume requested */
int volnamesz; /* size of volume name */
unsigned int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */
struct net *net_ns; /* Network namespace in effect */
struct afs_net *net; /* the AFS net namespace stuff */
struct afs_cell *cell; /* cell in which to find volume */
struct afs_volume *volume; /* volume record */
......@@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
return volume;
}
extern struct afs_volume *afs_create_volume(struct afs_mount_params *);
extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern void afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
......
......@@ -17,6 +17,7 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/gfp.h>
#include <linux/fs_context.h>
#include "internal.h"
......@@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
static const char afs_root_volume[] = "root.cell";
/*
* no valid lookup procedure on this sort of dir
*/
......@@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
}
/*
* create a vfsmount to be automounted
* Set the parameters for the proposed superblock.
*/
static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
{
struct afs_super_info *as;
struct vfsmount *mnt;
struct afs_vnode *vnode;
struct page *page;
char *devname, *options;
bool rwpath = false;
struct afs_fs_context *ctx = fc->fs_private;
struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb);
struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt));
struct afs_cell *cell;
const char *p;
int ret;
_enter("{%pd}", mntpt);
BUG_ON(!d_inode(mntpt));
ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
if (!devname)
goto error_no_devname;
options = (char *) get_zeroed_page(GFP_KERNEL);
if (!options)
goto error_no_options;
if (fc->net_ns != src_as->net_ns) {
put_net(fc->net_ns);
fc->net_ns = get_net(src_as->net_ns);
}
vnode = AFS_FS_I(d_inode(mntpt));
if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) {
ctx->type = AFSVL_RWVOL;
ctx->force = true;
}
if (ctx->cell) {
afs_put_cell(ctx->net, ctx->cell);
ctx->cell = NULL;
}
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
/* if the directory is a pseudo directory, use the d_name */
static const char afs_root_cell[] = ":root.cell.";
unsigned size = mntpt->d_name.len;
ret = -ENOENT;
if (size < 2 || size > AFS_MAXCELLNAME)
goto error_no_page;
if (size < 2)
return -ENOENT;
p = mntpt->d_name.name;
if (mntpt->d_name.name[0] == '.') {
devname[0] = '%';
memcpy(devname + 1, mntpt->d_name.name + 1, size - 1);
memcpy(devname + size, afs_root_cell,
sizeof(afs_root_cell));
rwpath = true;
} else {
devname[0] = '#';
memcpy(devname + 1, mntpt->d_name.name, size);
memcpy(devname + size + 1, afs_root_cell,
sizeof(afs_root_cell));
size--;
p++;
ctx->type = AFSVL_RWVOL;
ctx->force = true;
}
if (size > AFS_MAXCELLNAME)
return -ENAMETOOLONG;
cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
return PTR_ERR(cell);
}
ctx->cell = cell;
ctx->volname = afs_root_volume;
ctx->volnamesz = sizeof(afs_root_volume) - 1;
} else {
/* read the contents of the AFS special symlink */
struct page *page;
loff_t size = i_size_read(d_inode(mntpt));
char *buf;
ret = -EINVAL;
if (src_as->cell)
ctx->cell = afs_get_cell(src_as->cell);
if (size > PAGE_SIZE - 1)
goto error_no_page;
return -EINVAL;
page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto error_no_page;
}
if (IS_ERR(page))
return PTR_ERR(page);
if (PageError(page)) {
ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
goto error;
put_page(page);
return ret;
}
buf = kmap_atomic(page);
memcpy(devname, buf, size);
kunmap_atomic(buf);
buf = kmap(page);
ret = vfs_parse_fs_string(fc, "source", buf, size);
kunmap(page);
put_page(page);
page = NULL;
if (ret < 0)
return ret;
}
/* work out what options we want */
as = AFS_FS_S(mntpt->d_sb);
if (as->cell) {
memcpy(options, "cell=", 5);
strcpy(options + 5, as->cell->name);
if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath)
strcat(options, ",rwpath");
}
return 0;
}
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
/*
* create a vfsmount to be automounted
*/
static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
{
struct fs_context *fc;
struct vfsmount *mnt;
int ret;
free_page((unsigned long) devname);
free_page((unsigned long) options);
_leave(" = %p", mnt);
return mnt;
BUG_ON(!d_inode(mntpt));
error:
put_page(page);
error_no_page:
free_page((unsigned long) options);
error_no_options:
free_page((unsigned long) devname);
error_no_devname:
_leave(" = %d", ret);
return ERR_PTR(ret);
fc = fs_context_for_submount(&afs_fs_type, mntpt);
if (IS_ERR(fc))
return ERR_CAST(fc);
ret = afs_mntpt_set_params(fc, mntpt);
if (!ret)
mnt = fc_mount(fc);
else
mnt = ERR_PTR(ret);
put_fs_context(fc);
return mnt;
}
/*
......
/* AFS superblock handling
*
* Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
* Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved.
*
* This software may be freely redistributed under the terms of the
* GNU General Public License.
......@@ -21,7 +21,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
#include <linux/fs_parser.h>
#include <linux/statfs.h>
#include <linux/sched.h>
#include <linux/nsproxy.h>
......@@ -30,21 +30,22 @@
#include "internal.h"
static void afs_i_init_once(void *foo);
static struct dentry *afs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data);
static void afs_kill_super(struct super_block *sb);
static struct inode *afs_alloc_inode(struct super_block *sb);
static void afs_destroy_inode(struct inode *inode);
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int afs_show_devname(struct seq_file *m, struct dentry *root);
static int afs_show_options(struct seq_file *m, struct dentry *root);
static int afs_init_fs_context(struct fs_context *fc);
static const struct fs_parameter_description afs_fs_parameters;
struct file_system_type afs_fs_type = {
.owner = THIS_MODULE,
.name = "afs",
.mount = afs_mount,
.kill_sb = afs_kill_super,
.fs_flags = 0,
.owner = THIS_MODULE,
.name = "afs",
.init_fs_context = afs_init_fs_context,
.parameters = &afs_fs_parameters,
.kill_sb = afs_kill_super,
.fs_flags = 0,
};
MODULE_ALIAS_FS("afs");
......@@ -63,22 +64,22 @@ static const struct super_operations afs_super_ops = {
static struct kmem_cache *afs_inode_cachep;
static atomic_t afs_count_active_inodes;
enum {
afs_no_opt,
afs_opt_cell,
afs_opt_dyn,
afs_opt_rwpath,
afs_opt_vol,
afs_opt_autocell,
enum afs_param {
Opt_autocell,
Opt_dyn,
Opt_source,
};
static const match_table_t afs_options_list = {
{ afs_opt_cell, "cell=%s" },
{ afs_opt_dyn, "dyn" },
{ afs_opt_rwpath, "rwpath" },
{ afs_opt_vol, "vol=%s" },
{ afs_opt_autocell, "autocell" },
{ afs_no_opt, NULL },
static const struct fs_parameter_spec afs_param_specs[] = {
fsparam_flag ("autocell", Opt_autocell),
fsparam_flag ("dyn", Opt_dyn),
fsparam_string("source", Opt_source),
{}
};
static const struct fs_parameter_description afs_fs_parameters = {
.name = "kAFS",
.specs = afs_param_specs,
};
/*
......@@ -190,84 +191,23 @@ static int afs_show_options(struct seq_file *m, struct dentry *root)
}
/*
* parse the mount options
* - this function has been shamelessly adapted from the ext3 fs which
* shamelessly adapted it from the msdos fs
*/
static int afs_parse_options(struct afs_mount_params *params,
char *options, const char **devname)
{
struct afs_cell *cell;
substring_t args[MAX_OPT_ARGS];
char *p;
int token;
_enter("%s", options);
options[PAGE_SIZE - 1] = 0;
while ((p = strsep(&options, ","))) {
if (!*p)
continue;
token = match_token(p, afs_options_list, args);
switch (token) {
case afs_opt_cell:
rcu_read_lock();
cell = afs_lookup_cell_rcu(params->net,
args[0].from,
args[0].to - args[0].from);
rcu_read_unlock();
if (IS_ERR(cell))
return PTR_ERR(cell);
afs_put_cell(params->net, params->cell);
params->cell = cell;
break;
case afs_opt_rwpath:
params->rwpath = true;
break;
case afs_opt_vol:
*devname = args[0].from;
break;
case afs_opt_autocell:
params->autocell = true;
break;
case afs_opt_dyn:
params->dyn_root = true;
break;
default:
printk(KERN_ERR "kAFS:"
" Unknown or invalid mount option: '%s'\n", p);
return -EINVAL;
}
}
_leave(" = 0");
return 0;
}
/*
* parse a device name to get cell name, volume name, volume type and R/W
* selector
* - this can be one of the following:
* Parse the source name to get cell name, volume name, volume type and R/W
* selector.
*
* This can be one of the following:
* "%[cell:]volume[.]" R/W volume
* "#[cell:]volume[.]" R/O or R/W volume (rwpath=0),
* or R/W (rwpath=1) volume
* "#[cell:]volume[.]" R/O or R/W volume (R/O parent),
* or R/W (R/W parent) volume
* "%[cell:]volume.readonly" R/O volume
* "#[cell:]volume.readonly" R/O volume
* "%[cell:]volume.backup" Backup volume
* "#[cell:]volume.backup" Backup volume
*/
static int afs_parse_device_name(struct afs_mount_params *params,
const char *name)
static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
{
struct afs_fs_context *ctx = fc->fs_private;
struct afs_cell *cell;
const char *cellname, *suffix;
const char *cellname, *suffix, *name = param->string;
int cellnamesz;
_enter(",%s", name);
......@@ -278,69 +218,149 @@ static int afs_parse_device_name(struct afs_mount_params *params,
}
if ((name[0] != '%' && name[0] != '#') || !name[1]) {
/* To use dynroot, we don't want to have to provide a source */
if (strcmp(name, "none") == 0) {
ctx->no_cell = true;
return 0;
}
printk(KERN_ERR "kAFS: unparsable volume name\n");
return -EINVAL;
}
/* determine the type of volume we're looking for */
params->type = AFSVL_ROVOL;
params->force = false;
if (params->rwpath || name[0] == '%') {
params->type = AFSVL_RWVOL;
params->force = true;
if (name[0] == '%') {
ctx->type = AFSVL_RWVOL;
ctx->force = true;
}
name++;
/* split the cell name out if there is one */
params->volname = strchr(name, ':');
if (params->volname) {
ctx->volname = strchr(name, ':');
if (ctx->volname) {
cellname = name;
cellnamesz = params->volname - name;
params->volname++;
cellnamesz = ctx->volname - name;
ctx->volname++;
} else {
params->volname = name;
ctx->volname = name;
cellname = NULL;
cellnamesz = 0;
}
/* the volume type is further affected by a possible suffix */
suffix = strrchr(params->volname, '.');
suffix = strrchr(ctx->volname, '.');
if (suffix) {
if (strcmp(suffix, ".readonly") == 0) {
params->type = AFSVL_ROVOL;
params->force = true;
ctx->type = AFSVL_ROVOL;
ctx->force = true;
} else if (strcmp(suffix, ".backup") == 0) {
params->type = AFSVL_BACKVOL;
params->force = true;
ctx->type = AFSVL_BACKVOL;
ctx->force = true;
} else if (suffix[1] == 0) {
} else {
suffix = NULL;
}
}
params->volnamesz = suffix ?
suffix - params->volname : strlen(params->volname);
ctx->volnamesz = suffix ?
suffix - ctx->volname : strlen(ctx->volname);
_debug("cell %*.*s [%p]",
cellnamesz, cellnamesz, cellname ?: "", params->cell);
cellnamesz, cellnamesz, cellname ?: "", ctx->cell);
/* lookup the cell record */
if (cellname || !params->cell) {
cell = afs_lookup_cell(params->net, cellname, cellnamesz,
if (cellname) {
cell = afs_lookup_cell(ctx->net, cellname, cellnamesz,
NULL, false);
if (IS_ERR(cell)) {
printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n",
pr_err("kAFS: unable to lookup cell '%*.*s'\n",
cellnamesz, cellnamesz, cellname ?: "");
return PTR_ERR(cell);
}
afs_put_cell(params->net, params->cell);
params->cell = cell;
afs_put_cell(ctx->net, ctx->cell);
ctx->cell = cell;
}
_debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
params->cell->name, params->cell,
params->volnamesz, params->volnamesz, params->volname,
suffix ?: "-", params->type, params->force ? " FORCE" : "");
ctx->cell->name, ctx->cell,
ctx->volnamesz, ctx->volnamesz, ctx->volname,
suffix ?: "-", ctx->type, ctx->force ? " FORCE" : "");
fc->source = param->string;
param->string = NULL;
return 0;
}
/*
* Parse a single mount parameter.
*/
static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct fs_parse_result result;
struct afs_fs_context *ctx = fc->fs_private;
int opt;
opt = fs_parse(fc, &afs_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case Opt_source:
return afs_parse_source(fc, param);
case Opt_autocell:
ctx->autocell = true;
break;
case Opt_dyn:
ctx->dyn_root = true;
break;
default:
return -EINVAL;
}
_leave(" = 0");
return 0;
}
/*
* Validate the options, get the cell key and look up the volume.
*/
static int afs_validate_fc(struct fs_context *fc)
{
struct afs_fs_context *ctx = fc->fs_private;
struct afs_volume *volume;
struct key *key;
if (!ctx->dyn_root) {
if (ctx->no_cell) {
pr_warn("kAFS: Can only specify source 'none' with -o dyn\n");
return -EINVAL;
}
if (!ctx->cell) {
pr_warn("kAFS: No cell specified\n");
return -EDESTADDRREQ;
}
/* We try to do the mount securely. */
key = afs_request_key(ctx->cell);
if (IS_ERR(key))
return PTR_ERR(key);
ctx->key = key;
if (ctx->volume) {
afs_put_volume(ctx->cell, ctx->volume);
ctx->volume = NULL;
}
volume = afs_create_volume(ctx);
if (IS_ERR(volume))
return PTR_ERR(volume);
ctx->volume = volume;
}
return 0;
}
......@@ -348,39 +368,34 @@ static int afs_parse_device_name(struct afs_mount_params *params,
/*
* check a superblock to see if it's the one we're looking for
*/
static int afs_test_super(struct super_block *sb, void *data)
static int afs_test_super(struct super_block *sb, struct fs_context *fc)
{
struct afs_super_info *as1 = data;
struct afs_fs_context *ctx = fc->fs_private;
struct afs_super_info *as = AFS_FS_S(sb);
return (as->net_ns == as1->net_ns &&
return (as->net_ns == fc->net_ns &&
as->volume &&
as->volume->vid == as1->volume->vid &&
as->volume->vid == ctx->volume->vid &&
!as->dyn_root);
}
static int afs_dynroot_test_super(struct super_block *sb, void *data)
static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc)
{
struct afs_super_info *as1 = data;
struct afs_super_info *as = AFS_FS_S(sb);
return (as->net_ns == as1->net_ns &&
return (as->net_ns == fc->net_ns &&
as->dyn_root);
}
static int afs_set_super(struct super_block *sb, void *data)
static int afs_set_super(struct super_block *sb, struct fs_context *fc)
{
struct afs_super_info *as = data;
sb->s_fs_info = as;
return set_anon_super(sb, NULL);
}
/*
* fill in the superblock
*/
static int afs_fill_super(struct super_block *sb,
struct afs_mount_params *params)
static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
{
struct afs_super_info *as = AFS_FS_S(sb);
struct afs_fid fid;
......@@ -412,13 +427,13 @@ static int afs_fill_super(struct super_block *sb,
fid.vnode = 1;
fid.vnode_hi = 0;
fid.unique = 1;
inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL);
}
if (IS_ERR(inode))
return PTR_ERR(inode);
if (params->autocell || params->dyn_root)
if (ctx->autocell || as->dyn_root)
set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
ret = -ENOMEM;
......@@ -443,17 +458,20 @@ static int afs_fill_super(struct super_block *sb,
return ret;
}
static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params)
static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
{
struct afs_fs_context *ctx = fc->fs_private;
struct afs_super_info *as;
as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
if (as) {
as->net_ns = get_net(params->net_ns);
if (params->dyn_root)
as->net_ns = get_net(fc->net_ns);
if (ctx->dyn_root) {
as->dyn_root = true;
else
as->cell = afs_get_cell(params->cell);
} else {
as->cell = afs_get_cell(ctx->cell);
as->volume = __afs_get_volume(ctx->volume);
}
}
return as;
}
......@@ -475,7 +493,7 @@ static void afs_kill_super(struct super_block *sb)
if (as->dyn_root)
afs_dynroot_depopulate(sb);
/* Clear the callback interests (which will do ilookup5) before
* deactivating the superblock.
*/
......@@ -488,111 +506,103 @@ static void afs_kill_super(struct super_block *sb)
}
/*
* get an AFS superblock
* Get an AFS superblock and root directory.
*/
static struct dentry *afs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *options)
static int afs_get_tree(struct fs_context *fc)
{
struct afs_mount_params params;
struct afs_fs_context *ctx = fc->fs_private;
struct super_block *sb;
struct afs_volume *candidate;
struct key *key;
struct afs_super_info *as;
int ret;
_enter(",,%s,%p", dev_name, options);
memset(&params, 0, sizeof(params));
ret = -EINVAL;
if (current->nsproxy->net_ns != &init_net)
ret = afs_validate_fc(fc);
if (ret)
goto error;
params.net_ns = current->nsproxy->net_ns;
params.net = afs_net(params.net_ns);
/* parse the options and device name */
if (options) {
ret = afs_parse_options(&params, options, &dev_name);
if (ret < 0)
goto error;
}
if (!params.dyn_root) {
ret = afs_parse_device_name(&params, dev_name);
if (ret < 0)
goto error;
/* try and do the mount securely */
key = afs_request_key(params.cell);
if (IS_ERR(key)) {
_leave(" = %ld [key]", PTR_ERR(key));
ret = PTR_ERR(key);
goto error;
}
params.key = key;
}
_enter("");
/* allocate a superblock info record */
ret = -ENOMEM;
as = afs_alloc_sbi(&params);
as = afs_alloc_sbi(fc);
if (!as)
goto error_key;
if (!params.dyn_root) {
/* Assume we're going to need a volume record; at the very
* least we can use it to update the volume record if we have
* one already. This checks that the volume exists within the
* cell.
*/
candidate = afs_create_volume(&params);
if (IS_ERR(candidate)) {
ret = PTR_ERR(candidate);
goto error_as;
}
as->volume = candidate;
}
goto error;
fc->s_fs_info = as;
/* allocate a deviceless superblock */
sb = sget(fs_type,
as->dyn_root ? afs_dynroot_test_super : afs_test_super,
afs_set_super, flags, as);
sb = sget_fc(fc,
as->dyn_root ? afs_dynroot_test_super : afs_test_super,
afs_set_super);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
goto error_as;
goto error;
}
if (!sb->s_root) {
/* initial superblock/root creation */
_debug("create");
ret = afs_fill_super(sb, &params);
ret = afs_fill_super(sb, ctx);
if (ret < 0)
goto error_sb;
as = NULL;
sb->s_flags |= SB_ACTIVE;
} else {
_debug("reuse");
ASSERTCMP(sb->s_flags, &, SB_ACTIVE);
afs_destroy_sbi(as);
as = NULL;
}
afs_put_cell(params.net, params.cell);
key_put(params.key);
fc->root = dget(sb->s_root);
_leave(" = 0 [%p]", sb);
return dget(sb->s_root);
return 0;
error_sb:
deactivate_locked_super(sb);
goto error_key;
error_as:
afs_destroy_sbi(as);
error_key:
key_put(params.key);
error:
afs_put_cell(params.net, params.cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
return ret;
}
static void afs_free_fc(struct fs_context *fc)
{
struct afs_fs_context *ctx = fc->fs_private;
afs_destroy_sbi(fc->s_fs_info);
afs_put_volume(ctx->cell, ctx->volume);
afs_put_cell(ctx->net, ctx->cell);
key_put(ctx->key);
kfree(ctx);
}
static const struct fs_context_operations afs_context_ops = {
.free = afs_free_fc,
.parse_param = afs_parse_param,
.get_tree = afs_get_tree,
};
/*
* Set up the filesystem mount context.
*/
static int afs_init_fs_context(struct fs_context *fc)
{
struct afs_fs_context *ctx;
struct afs_cell *cell;
ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->type = AFSVL_ROVOL;
ctx->net = afs_net(fc->net_ns);
/* Default to the workstation cell. */
rcu_read_lock();
cell = afs_lookup_cell_rcu(ctx->net, NULL, 0);
rcu_read_unlock();
if (IS_ERR(cell))
cell = NULL;
ctx->cell = cell;
fc->fs_private = ctx;
fc->ops = &afs_context_ops;
return 0;
}
/*
......
......@@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
/*
* Allocate a volume record and load it up from a vldb record.
*/
static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params,
static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
struct afs_vldb_entry *vldb,
unsigned long type_mask)
{
......@@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
* - Rule 3: If parent volume is R/W, then only mount R/W volume unless
* explicitly told otherwise
*/
struct afs_volume *afs_create_volume(struct afs_mount_params *params)
struct afs_volume *afs_create_volume(struct afs_fs_context *params)
{
struct afs_vldb_entry *vldb;
struct afs_volume *volume;
......
......@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/fs_parser.h>
/*
* Handling of filesystem drivers list.
......@@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs)
int res = 0;
struct file_system_type ** p;
if (fs->parameters && !fs_validate_description(fs->parameters))
return -EINVAL;
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
......
/* Provide a way to create a superblock configuration context within the kernel
* that allows a superblock to be set up prior to mounting.
*
* Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include <linux/security.h>
#include <linux/mnt_namespace.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include "mount.h"
#include "internal.h"
enum legacy_fs_param {
LEGACY_FS_UNSET_PARAMS,
LEGACY_FS_MONOLITHIC_PARAMS,
LEGACY_FS_INDIVIDUAL_PARAMS,
};
struct legacy_fs_context {
char *legacy_data; /* Data page for legacy filesystems */
size_t data_size;
enum legacy_fs_param param_type;
};
static int legacy_init_fs_context(struct fs_context *fc);
static const struct constant_table common_set_sb_flag[] = {
{ "dirsync", SB_DIRSYNC },
{ "lazytime", SB_LAZYTIME },
{ "mand", SB_MANDLOCK },
{ "posixacl", SB_POSIXACL },
{ "ro", SB_RDONLY },
{ "sync", SB_SYNCHRONOUS },
};
static const struct constant_table common_clear_sb_flag[] = {
{ "async", SB_SYNCHRONOUS },
{ "nolazytime", SB_LAZYTIME },
{ "nomand", SB_MANDLOCK },
{ "rw", SB_RDONLY },
{ "silent", SB_SILENT },
};
static const char *const forbidden_sb_flag[] = {
"bind",
"dev",
"exec",
"move",
"noatime",
"nodev",
"nodiratime",
"noexec",
"norelatime",
"nostrictatime",
"nosuid",
"private",
"rec",
"relatime",
"remount",
"shared",
"slave",
"strictatime",
"suid",
"unbindable",
};
/*
* Check for a common mount option that manipulates s_flags.
*/
static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
{
unsigned int token;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
if (strcmp(key, forbidden_sb_flag[i]) == 0)
return -EINVAL;
token = lookup_constant(common_set_sb_flag, key, 0);
if (token) {
fc->sb_flags |= token;
fc->sb_flags_mask |= token;
return 0;
}
token = lookup_constant(common_clear_sb_flag, key, 0);
if (token) {
fc->sb_flags &= ~token;
fc->sb_flags_mask |= token;
return 0;
}
return -ENOPARAM;
}
/**
* vfs_parse_fs_param - Add a single parameter to a superblock config
* @fc: The filesystem context to modify
* @param: The parameter
*
* A single mount option in string form is applied to the filesystem context
* being set up. Certain standard options (for example "ro") are translated
* into flag bits without going to the filesystem. The active security module
* is allowed to observe and poach options. Any other options are passed over
* to the filesystem to parse.
*
* This may be called multiple times for a context.
*
* Returns 0 on success and a negative error code on failure. In the event of
* failure, supplementary error information may have been set.
*/
int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
{
int ret;
if (!param->key)
return invalf(fc, "Unnamed parameter\n");
ret = vfs_parse_sb_flag(fc, param->key);
if (ret != -ENOPARAM)
return ret;
ret = security_fs_context_parse_param(fc, param);
if (ret != -ENOPARAM)
/* Param belongs to the LSM or is disallowed by the LSM; so
* don't pass to the FS.
*/
return ret;
if (fc->ops->parse_param) {
ret = fc->ops->parse_param(fc, param);
if (ret != -ENOPARAM)
return ret;
}
/* If the filesystem doesn't take any arguments, give it the
* default handling of source.
*/
if (strcmp(param->key, "source") == 0) {
if (param->type != fs_value_is_string)
return invalf(fc, "VFS: Non-string source");
if (fc->source)
return invalf(fc, "VFS: Multiple sources");
fc->source = param->string;
param->string = NULL;
return 0;
}
return invalf(fc, "%s: Unknown parameter '%s'",
fc->fs_type->name, param->key);
}
EXPORT_SYMBOL(vfs_parse_fs_param);
/**
* vfs_parse_fs_string - Convenience function to just parse a string.
*/
int vfs_parse_fs_string(struct fs_context *fc, const char *key,
const char *value, size_t v_size)
{
int ret;
struct fs_parameter param = {
.key = key,
.type = fs_value_is_string,
.size = v_size,
};
if (v_size > 0) {
param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
if (!param.string)
return -ENOMEM;
}
ret = vfs_parse_fs_param(fc, &param);
kfree(param.string);
return ret;
}
EXPORT_SYMBOL(vfs_parse_fs_string);
/**
* generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
* @ctx: The superblock configuration to fill in.
* @data: The data to parse
*
* Parse a blob of data that's in key[=val][,key[=val]]* form. This can be
* called from the ->monolithic_mount_data() fs_context operation.
*
* Returns 0 on success or the error returned by the ->parse_option() fs_context
* operation on failure.
*/
int generic_parse_monolithic(struct fs_context *fc, void *data)
{
char *options = data, *key;
int ret = 0;
if (!options)
return 0;
ret = security_sb_eat_lsm_opts(options, &fc->security);
if (ret)
return ret;
while ((key = strsep(&options, ",")) != NULL) {
if (*key) {
size_t v_len = 0;
char *value = strchr(key, '=');
if (value) {
if (value == key)
continue;
*value++ = 0;
v_len = strlen(value);
}
ret = vfs_parse_fs_string(fc, key, value, v_len);
if (ret < 0)
break;
}
}
return ret;
}
EXPORT_SYMBOL(generic_parse_monolithic);
/**
* alloc_fs_context - Create a filesystem context.
* @fs_type: The filesystem type.
* @reference: The dentry from which this one derives (or NULL)
* @sb_flags: Filesystem/superblock flags (SB_*)
* @sb_flags_mask: Applicable members of @sb_flags
* @purpose: The purpose that this configuration shall be used for.
*
* Open a filesystem and create a mount context. The mount context is
* initialised with the supplied flags and, if a submount/automount from
* another superblock (referred to by @reference) is supplied, may have
* parameters such as namespaces copied across from that superblock.
*/
static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
struct dentry *reference,
unsigned int sb_flags,
unsigned int sb_flags_mask,
enum fs_context_purpose purpose)
{
int (*init_fs_context)(struct fs_context *);
struct fs_context *fc;
int ret = -ENOMEM;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
return ERR_PTR(-ENOMEM);
fc->purpose = purpose;
fc->sb_flags = sb_flags;
fc->sb_flags_mask = sb_flags_mask;
fc->fs_type = get_filesystem(fs_type);
fc->cred = get_current_cred();
fc->net_ns = get_net(current->nsproxy->net_ns);
switch (purpose) {
case FS_CONTEXT_FOR_MOUNT:
fc->user_ns = get_user_ns(fc->cred->user_ns);
break;
case FS_CONTEXT_FOR_SUBMOUNT:
fc->user_ns = get_user_ns(reference->d_sb->s_user_ns);
break;
case FS_CONTEXT_FOR_RECONFIGURE:
/* We don't pin any namespaces as the superblock's
* subscriptions cannot be changed at this point.
*/
atomic_inc(&reference->d_sb->s_active);
fc->root = dget(reference);
break;
}
/* TODO: Make all filesystems support this unconditionally */
init_fs_context = fc->fs_type->init_fs_context;
if (!init_fs_context)
init_fs_context = legacy_init_fs_context;
ret = init_fs_context(fc);
if (ret < 0)
goto err_fc;
fc->need_free = true;
return fc;
err_fc:
put_fs_context(fc);
return ERR_PTR(ret);
}
struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
unsigned int sb_flags)
{
return alloc_fs_context(fs_type, NULL, sb_flags, 0,
FS_CONTEXT_FOR_MOUNT);
}
EXPORT_SYMBOL(fs_context_for_mount);
struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
unsigned int sb_flags,
unsigned int sb_flags_mask)
{
return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags,
sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE);
}
EXPORT_SYMBOL(fs_context_for_reconfigure);
struct fs_context *fs_context_for_submount(struct file_system_type *type,
struct dentry *reference)
{
return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
}
EXPORT_SYMBOL(fs_context_for_submount);
void fc_drop_locked(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
dput(fc->root);
fc->root = NULL;
deactivate_locked_super(sb);
}
static void legacy_fs_context_free(struct fs_context *fc);
/**
* vfs_dup_fc_config: Duplicate a filesystem context.
* @src_fc: The context to copy.
*/
struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
{
struct fs_context *fc;
int ret;
if (!src_fc->ops->dup)
return ERR_PTR(-EOPNOTSUPP);
fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
return ERR_PTR(-ENOMEM);
fc->fs_private = NULL;
fc->s_fs_info = NULL;
fc->source = NULL;
fc->security = NULL;
get_filesystem(fc->fs_type);
get_net(fc->net_ns);
get_user_ns(fc->user_ns);
get_cred(fc->cred);
/* Can't call put until we've called ->dup */
ret = fc->ops->dup(fc, src_fc);
if (ret < 0)
goto err_fc;
ret = security_fs_context_dup(fc, src_fc);
if (ret < 0)
goto err_fc;
return fc;
err_fc:
put_fs_context(fc);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(vfs_dup_fs_context);
#ifdef CONFIG_PRINTK
/**
* logfc - Log a message to a filesystem context
* @fc: The filesystem context to log to.
* @fmt: The format of the buffer.
*/
void logfc(struct fs_context *fc, const char *fmt, ...)
{
va_list va;
va_start(va, fmt);
switch (fmt[0]) {
case 'w':
vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
break;
case 'e':
vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
break;
default:
vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
break;
}
pr_cont("\n");
va_end(va);
}
EXPORT_SYMBOL(logfc);
#endif
/**
* put_fs_context - Dispose of a superblock configuration context.
* @fc: The context to dispose of.
*/
void put_fs_context(struct fs_context *fc)
{
struct super_block *sb;
if (fc->root) {
sb = fc->root->d_sb;
dput(fc->root);
fc->root = NULL;
deactivate_super(sb);
}
if (fc->need_free && fc->ops && fc->ops->free)
fc->ops->free(fc);
security_free_mnt_opts(&fc->security);
put_net(fc->net_ns);
put_user_ns(fc->user_ns);
put_cred(fc->cred);
kfree(fc->subtype);
put_filesystem(fc->fs_type);
kfree(fc->source);
kfree(fc);
}
EXPORT_SYMBOL(put_fs_context);
/*
* Free the config for a filesystem that doesn't support fs_context.
*/
static void legacy_fs_context_free(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
if (ctx) {
if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS)
kfree(ctx->legacy_data);
kfree(ctx);
}
}
/*
* Duplicate a legacy config.
*/
static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
{
struct legacy_fs_context *ctx;
struct legacy_fs_context *src_ctx = src_fc->fs_private;
ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) {
ctx->legacy_data = kmemdup(src_ctx->legacy_data,
src_ctx->data_size, GFP_KERNEL);
if (!ctx->legacy_data) {
kfree(ctx);
return -ENOMEM;
}
}
fc->fs_private = ctx;
return 0;
}
/*
* Add a parameter to a legacy config. We build up a comma-separated list of
* options.
*/
static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct legacy_fs_context *ctx = fc->fs_private;
unsigned int size = ctx->data_size;
size_t len = 0;
if (strcmp(param->key, "source") == 0) {
if (param->type != fs_value_is_string)
return invalf(fc, "VFS: Legacy: Non-string source");
if (fc->source)
return invalf(fc, "VFS: Legacy: Multiple sources");
fc->source = param->string;
param->string = NULL;
return 0;
}
if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
strcmp(param->key, "subtype") == 0) {
if (param->type != fs_value_is_string)
return invalf(fc, "VFS: Legacy: Non-string subtype");
if (fc->subtype)
return invalf(fc, "VFS: Legacy: Multiple subtype");
fc->subtype = param->string;
param->string = NULL;
return 0;
}
if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
switch (param->type) {
case fs_value_is_string:
len = 1 + param->size;
/* Fall through */
case fs_value_is_flag:
len += strlen(param->key);
break;
default:
return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
param->key);
}
if (len > PAGE_SIZE - 2 - size)
return invalf(fc, "VFS: Legacy: Cumulative options too large");
if (strchr(param->key, ',') ||
(param->type == fs_value_is_string &&
memchr(param->string, ',', param->size)))
return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
param->key);
if (!ctx->legacy_data) {
ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!ctx->legacy_data)
return -ENOMEM;
}
ctx->legacy_data[size++] = ',';
len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len);
size += len;
if (param->type == fs_value_is_string) {
ctx->legacy_data[size++] = '=';
memcpy(ctx->legacy_data + size, param->string, param->size);
size += param->size;
}
ctx->legacy_data[size] = '\0';
ctx->data_size = size;
ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
return 0;
}
/*
* Add monolithic mount data.
*/
static int legacy_parse_monolithic(struct fs_context *fc, void *data)
{
struct legacy_fs_context *ctx = fc->fs_private;
if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
pr_warn("VFS: Can't mix monolithic and individual options\n");
return -EINVAL;
}
ctx->legacy_data = data;
ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
if (!ctx->legacy_data)
return 0;
if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
return 0;
return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
}
/*
* Get a mountable root with the legacy mount command.
*/
static int legacy_get_tree(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
struct super_block *sb;
struct dentry *root;
root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
fc->source, ctx->legacy_data);
if (IS_ERR(root))
return PTR_ERR(root);
sb = root->d_sb;
BUG_ON(!sb);
fc->root = root;
return 0;
}
/*
* Handle remount.
*/
static int legacy_reconfigure(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;
struct super_block *sb = fc->root->d_sb;
if (!sb->s_op->remount_fs)
return 0;
return sb->s_op->remount_fs(sb, &fc->sb_flags,
ctx ? ctx->legacy_data : NULL);
}
const struct fs_context_operations legacy_fs_context_ops = {
.free = legacy_fs_context_free,
.dup = legacy_fs_context_dup,
.parse_param = legacy_parse_param,
.parse_monolithic = legacy_parse_monolithic,
.get_tree = legacy_get_tree,
.reconfigure = legacy_reconfigure,
};
/*
* Initialise a legacy context for a filesystem that doesn't support
* fs_context.
*/
static int legacy_init_fs_context(struct fs_context *fc)
{
fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
if (!fc->fs_private)
return -ENOMEM;
fc->ops = &legacy_fs_context_ops;
return 0;
}
int parse_monolithic_mount_data(struct fs_context *fc, void *data)
{
int (*monolithic_mount_data)(struct fs_context *, void *);
monolithic_mount_data = fc->ops->parse_monolithic;
if (!monolithic_mount_data)
monolithic_mount_data = generic_parse_monolithic;
return monolithic_mount_data(fc, data);
}
/* Filesystem parameter parser.
*
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/export.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/namei.h>
#include "internal.h"
static const struct constant_table bool_names[] = {
{ "0", false },
{ "1", true },
{ "false", false },
{ "no", false },
{ "true", true },
{ "yes", true },
};
/**
* lookup_constant - Look up a constant by name in an ordered table
* @tbl: The table of constants to search.
* @tbl_size: The size of the table.
* @name: The name to look up.
* @not_found: The value to return if the name is not found.
*/
int __lookup_constant(const struct constant_table *tbl, size_t tbl_size,
const char *name, int not_found)
{
unsigned int i;
for (i = 0; i < tbl_size; i++)
if (strcmp(name, tbl[i].name) == 0)
return tbl[i].value;
return not_found;
}
EXPORT_SYMBOL(__lookup_constant);
static const struct fs_parameter_spec *fs_lookup_key(
const struct fs_parameter_description *desc,
const char *name)
{
const struct fs_parameter_spec *p;
if (!desc->specs)
return NULL;
for (p = desc->specs; p->name; p++)
if (strcmp(p->name, name) == 0)
return p;
return NULL;
}
/*
* fs_parse - Parse a filesystem configuration parameter
* @fc: The filesystem context to log errors through.
* @desc: The parameter description to use.
* @param: The parameter.
* @result: Where to place the result of the parse
*
* Parse a filesystem configuration parameter and attempt a conversion for a
* simple parameter for which this is requested. If successful, the determined
* parameter ID is placed into @result->key, the desired type is indicated in
* @result->t and any converted value is placed into an appropriate member of
* the union in @result.
*
* The function returns the parameter number if the parameter was matched,
* -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that
* unknown parameters are okay and -EINVAL if there was a conversion issue or
* the parameter wasn't recognised and unknowns aren't okay.
*/
int fs_parse(struct fs_context *fc,
const struct fs_parameter_description *desc,
struct fs_parameter *param,
struct fs_parse_result *result)
{
const struct fs_parameter_spec *p;
const struct fs_parameter_enum *e;
int ret = -ENOPARAM, b;
result->has_value = !!param->string;
result->negated = false;
result->uint_64 = 0;
p = fs_lookup_key(desc, param->key);
if (!p) {
/* If we didn't find something that looks like "noxxx", see if
* "xxx" takes the "no"-form negative - but only if there
* wasn't an value.
*/
if (result->has_value)
goto unknown_parameter;
if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2])
goto unknown_parameter;
p = fs_lookup_key(desc, param->key + 2);
if (!p)
goto unknown_parameter;
if (!(p->flags & fs_param_neg_with_no))
goto unknown_parameter;
result->boolean = false;
result->negated = true;
}
if (p->flags & fs_param_deprecated)
warnf(fc, "%s: Deprecated parameter '%s'",
desc->name, param->key);
if (result->negated)
goto okay;
/* Certain parameter types only take a string and convert it. */
switch (p->type) {
case __fs_param_wasnt_defined:
return -EINVAL;
case fs_param_is_u32:
case fs_param_is_u32_octal:
case fs_param_is_u32_hex:
case fs_param_is_s32:
case fs_param_is_u64:
case fs_param_is_enum:
case fs_param_is_string:
if (param->type != fs_value_is_string)
goto bad_value;
if (!result->has_value) {
if (p->flags & fs_param_v_optional)
goto okay;
goto bad_value;
}
/* Fall through */
default:
break;
}
/* Try to turn the type we were given into the type desired by the
* parameter and give an error if we can't.
*/
switch (p->type) {
case fs_param_is_flag:
if (param->type != fs_value_is_flag &&
(param->type != fs_value_is_string || result->has_value))
return invalf(fc, "%s: Unexpected value for '%s'",
desc->name, param->key);
result->boolean = true;
goto okay;
case fs_param_is_bool:
switch (param->type) {
case fs_value_is_flag:
result->boolean = true;
goto okay;
case fs_value_is_string:
if (param->size == 0) {
result->boolean = true;
goto okay;
}
b = lookup_constant(bool_names, param->string, -1);
if (b == -1)
goto bad_value;
result->boolean = b;
goto okay;
default:
goto bad_value;
}
case fs_param_is_u32:
ret = kstrtouint(param->string, 0, &result->uint_32);
goto maybe_okay;
case fs_param_is_u32_octal:
ret = kstrtouint(param->string, 8, &result->uint_32);
goto maybe_okay;
case fs_param_is_u32_hex:
ret = kstrtouint(param->string, 16, &result->uint_32);
goto maybe_okay;
case fs_param_is_s32:
ret = kstrtoint(param->string, 0, &result->int_32);
goto maybe_okay;
case fs_param_is_u64:
ret = kstrtoull(param->string, 0, &result->uint_64);
goto maybe_okay;
case fs_param_is_enum:
for (e = desc->enums; e->name[0]; e++) {
if (e->opt == p->opt &&
strcmp(e->name, param->string) == 0) {
result->uint_32 = e->value;
goto okay;
}
}
goto bad_value;
case fs_param_is_string:
goto okay;
case fs_param_is_blob:
if (param->type != fs_value_is_blob)
goto bad_value;
goto okay;
case fs_param_is_fd: {
if (param->type != fs_value_is_file)
goto bad_value;
goto okay;
}
case fs_param_is_blockdev:
case fs_param_is_path:
goto okay;
default:
BUG();
}
maybe_okay:
if (ret < 0)
goto bad_value;
okay:
return p->opt;
bad_value:
return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key);
unknown_parameter:
return -ENOPARAM;
}
EXPORT_SYMBOL(fs_parse);
/**
* fs_lookup_param - Look up a path referred to by a parameter
* @fc: The filesystem context to log errors through.
* @param: The parameter.
* @want_bdev: T if want a blockdev
* @_path: The result of the lookup
*/
int fs_lookup_param(struct fs_context *fc,
struct fs_parameter *param,
bool want_bdev,
struct path *_path)
{
struct filename *f;
unsigned int flags = 0;
bool put_f;
int ret;
switch (param->type) {
case fs_value_is_string:
f = getname_kernel(param->string);
if (IS_ERR(f))
return PTR_ERR(f);
put_f = true;
break;
case fs_value_is_filename_empty:
flags = LOOKUP_EMPTY;
/* Fall through */
case fs_value_is_filename:
f = param->name;
put_f = false;
break;
default:
return invalf(fc, "%s: not usable as path", param->key);
}
ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
if (ret < 0) {
errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
goto out;
}
if (want_bdev &&
!S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) {
path_put(_path);
_path->dentry = NULL;
_path->mnt = NULL;
errorf(fc, "%s: Non-blockdev passed as '%s'",
param->key, f->name);
ret = -ENOTBLK;
}
out:
if (put_f)
putname(f);
return ret;
}
EXPORT_SYMBOL(fs_lookup_param);
#ifdef CONFIG_VALIDATE_FS_PARSER
/**
* validate_constant_table - Validate a constant table
* @name: Name to use in reporting
* @tbl: The constant table to validate.
* @tbl_size: The size of the table.
* @low: The lowest permissible value.
* @high: The highest permissible value.
* @special: One special permissible value outside of the range.
*/
bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
int low, int high, int special)
{
size_t i;
bool good = true;
if (tbl_size == 0) {
pr_warn("VALIDATE C-TBL: Empty\n");
return true;
}
for (i = 0; i < tbl_size; i++) {
if (!tbl[i].name) {
pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
good = false;
} else if (i > 0 && tbl[i - 1].name) {
int c = strcmp(tbl[i-1].name, tbl[i].name);
if (c == 0) {
pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
i, tbl[i].name);
good = false;
}
if (c > 0) {
pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
i, tbl[i-1].name, tbl[i].name);
good = false;
}
}
if (tbl[i].value != special &&
(tbl[i].value < low || tbl[i].value > high)) {
pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
i, tbl[i].name, tbl[i].value, low, high);
good = false;
}
}
return good;
}
/**
* fs_validate_description - Validate a parameter description
* @desc: The parameter description to validate.
*/
bool fs_validate_description(const struct fs_parameter_description *desc)
{
const struct fs_parameter_spec *param, *p2;
const struct fs_parameter_enum *e;
const char *name = desc->name;
unsigned int nr_params = 0;
bool good = true, enums = false;
pr_notice("*** VALIDATE %s ***\n", name);
if (!name[0]) {
pr_err("VALIDATE Parser: No name\n");
name = "Unknown";
good = false;
}
if (desc->specs) {
for (param = desc->specs; param->name; param++) {
enum fs_parameter_type t = param->type;
/* Check that the type is in range */
if (t == __fs_param_wasnt_defined ||
t >= nr__fs_parameter_type) {
pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n",
name, param->name, t);
good = false;
} else if (t == fs_param_is_enum) {
enums = true;
}
/* Check for duplicate parameter names */
for (p2 = desc->specs; p2 < param; p2++) {
if (strcmp(param->name, p2->name) == 0) {
pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
name, param->name);
good = false;
}
}
}
nr_params = param - desc->specs;
}
if (desc->enums) {
if (!nr_params) {
pr_err("VALIDATE %s: Enum table but no parameters\n",
name);
good = false;
goto no_enums;
}
if (!enums) {
pr_err("VALIDATE %s: Enum table but no enum-type values\n",
name);
good = false;
goto no_enums;
}
for (e = desc->enums; e->name[0]; e++) {
/* Check that all entries in the enum table have at
* least one parameter that uses them.
*/
for (param = desc->specs; param->name; param++) {
if (param->opt == e->opt &&
param->type != fs_param_is_enum) {
pr_err("VALIDATE %s: e[%lu] enum val for %s\n",
name, e - desc->enums, param->name);
good = false;
}
}
}
/* Check that all enum-type parameters have at least one enum
* value in the enum table.
*/
for (param = desc->specs; param->name; param++) {
if (param->type != fs_param_is_enum)
continue;
for (e = desc->enums; e->name[0]; e++)
if (e->opt == param->opt)
break;
if (!e->name[0]) {
pr_err("VALIDATE %s: PARAM[%s] enum with no values\n",
name, param->name);
good = false;
}
}
} else {
if (enums) {
pr_err("VALIDATE %s: enum-type values, but no enum table\n",
name);
good = false;
goto no_enums;
}
}
no_enums:
return good;
}
#endif /* CONFIG_VALIDATE_FS_PARSER */
......@@ -27,7 +27,7 @@
#include <linux/backing-dev.h>
#include <linux/hugetlb.h>
#include <linux/pagevec.h>
#include <linux/parser.h>
#include <linux/fs_parser.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/dnotify.h>
......@@ -45,11 +45,17 @@ const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
struct hugetlbfs_config {
enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
struct hugetlbfs_fs_context {
struct hstate *hstate;
unsigned long long max_size_opt;
unsigned long long min_size_opt;
long max_hpages;
long nr_inodes;
long min_hpages;
enum hugetlbfs_size_type max_val_type;
enum hugetlbfs_size_type min_val_type;
kuid_t uid;
kgid_t gid;
umode_t mode;
......@@ -57,22 +63,30 @@ struct hugetlbfs_config {
int sysctl_hugetlb_shm_group;
enum {
Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid,
Opt_pagesize, Opt_min_size,
Opt_err,
enum hugetlb_param {
Opt_gid,
Opt_min_size,
Opt_mode,
Opt_nr_inodes,
Opt_pagesize,
Opt_size,
Opt_uid,
};
static const match_table_t tokens = {
{Opt_size, "size=%s"},
{Opt_nr_inodes, "nr_inodes=%s"},
{Opt_mode, "mode=%o"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_pagesize, "pagesize=%s"},
{Opt_min_size, "min_size=%s"},
{Opt_err, NULL},
static const struct fs_parameter_spec hugetlb_param_specs[] = {
fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32 ("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
fsparam_u32 ("uid", Opt_uid),
{}
};
static const struct fs_parameter_description hugetlb_fs_parameters = {
.name = "hugetlbfs",
.specs = hugetlb_param_specs,
};
#ifdef CONFIG_NUMA
......@@ -708,16 +722,16 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
}
static struct inode *hugetlbfs_get_root(struct super_block *sb,
struct hugetlbfs_config *config)
struct hugetlbfs_fs_context *ctx)
{
struct inode *inode;
inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
inode->i_mode = S_IFDIR | config->mode;
inode->i_uid = config->uid;
inode->i_gid = config->gid;
inode->i_mode = S_IFDIR | ctx->mode;
inode->i_uid = ctx->uid;
inode->i_gid = ctx->gid;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
......@@ -1093,8 +1107,6 @@ static const struct super_operations hugetlbfs_ops = {
.show_options = hugetlbfs_show_options,
};
enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
/*
* Convert size option passed from command line to number of huge pages
* in the pool specified by hstate. Size option could be in bytes
......@@ -1117,170 +1129,151 @@ hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
return size_opt;
}
static int
hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
/*
* Parse one mount parameter.
*/
static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
char *p, *rest;
substring_t args[MAX_OPT_ARGS];
int option;
unsigned long long max_size_opt = 0, min_size_opt = 0;
enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE;
if (!options)
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
char *rest;
unsigned long ps;
int opt;
opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case Opt_uid:
ctx->uid = make_kuid(current_user_ns(), result.uint_32);
if (!uid_valid(ctx->uid))
goto bad_val;
return 0;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
case Opt_gid:
ctx->gid = make_kgid(current_user_ns(), result.uint_32);
if (!gid_valid(ctx->gid))
goto bad_val;
return 0;
token = match_token(p, tokens, args);
switch (token) {
case Opt_uid:
if (match_int(&args[0], &option))
goto bad_val;
pconfig->uid = make_kuid(current_user_ns(), option);
if (!uid_valid(pconfig->uid))
goto bad_val;
break;
case Opt_mode:
ctx->mode = result.uint_32 & 01777U;
return 0;
case Opt_gid:
if (match_int(&args[0], &option))
goto bad_val;
pconfig->gid = make_kgid(current_user_ns(), option);
if (!gid_valid(pconfig->gid))
goto bad_val;
break;
case Opt_size:
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(param->string[0]))
goto bad_val;
ctx->max_size_opt = memparse(param->string, &rest);
ctx->max_val_type = SIZE_STD;
if (*rest == '%')
ctx->max_val_type = SIZE_PERCENT;
return 0;
case Opt_mode:
if (match_octal(&args[0], &option))
goto bad_val;
pconfig->mode = option & 01777U;
break;
case Opt_nr_inodes:
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(param->string[0]))
goto bad_val;
ctx->nr_inodes = memparse(param->string, &rest);
return 0;
case Opt_size: {
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
max_size_opt = memparse(args[0].from, &rest);
max_val_type = SIZE_STD;
if (*rest == '%')
max_val_type = SIZE_PERCENT;
break;
case Opt_pagesize:
ps = memparse(param->string, &rest);
ctx->hstate = size_to_hstate(ps);
if (!ctx->hstate) {
pr_err("Unsupported page size %lu MB\n", ps >> 20);
return -EINVAL;
}
return 0;
case Opt_nr_inodes:
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
pconfig->nr_inodes = memparse(args[0].from, &rest);
break;
case Opt_min_size:
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(param->string[0]))
goto bad_val;
ctx->min_size_opt = memparse(param->string, &rest);
ctx->min_val_type = SIZE_STD;
if (*rest == '%')
ctx->min_val_type = SIZE_PERCENT;
return 0;
case Opt_pagesize: {
unsigned long ps;
ps = memparse(args[0].from, &rest);
pconfig->hstate = size_to_hstate(ps);
if (!pconfig->hstate) {
pr_err("Unsupported page size %lu MB\n",
ps >> 20);
return -EINVAL;
}
break;
}
default:
return -EINVAL;
}
case Opt_min_size: {
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
min_size_opt = memparse(args[0].from, &rest);
min_val_type = SIZE_STD;
if (*rest == '%')
min_val_type = SIZE_PERCENT;
break;
}
bad_val:
return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n",
param->string, param->key);
}
default:
pr_err("Bad mount option: \"%s\"\n", p);
return -EINVAL;
break;
}
}
/*
* Validate the parsed options.
*/
static int hugetlbfs_validate(struct fs_context *fc)
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
/*
* Use huge page pool size (in hstate) to convert the size
* options to number of huge pages. If NO_SIZE, -1 is returned.
*/
pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
max_size_opt, max_val_type);
pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
min_size_opt, min_val_type);
ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
ctx->max_size_opt,
ctx->max_val_type);
ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
ctx->min_size_opt,
ctx->min_val_type);
/*
* If max_size was specified, then min_size must be smaller
*/
if (max_val_type > NO_SIZE &&
pconfig->min_hpages > pconfig->max_hpages) {
pr_err("minimum size can not be greater than maximum size\n");
if (ctx->max_val_type > NO_SIZE &&
ctx->min_hpages > ctx->max_hpages) {
pr_err("Minimum size can not be greater than maximum size\n");
return -EINVAL;
}
return 0;
bad_val:
pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p);
return -EINVAL;
}
static int
hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
int ret;
struct hugetlbfs_config config;
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct hugetlbfs_sb_info *sbinfo;
config.max_hpages = -1; /* No limit on size by default */
config.nr_inodes = -1; /* No limit on number of inodes by default */
config.uid = current_fsuid();
config.gid = current_fsgid();
config.mode = 0755;
config.hstate = &default_hstate;
config.min_hpages = -1; /* No default minimum size */
ret = hugetlbfs_parse_options(data, &config);
if (ret)
return ret;
sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
sbinfo->spool = NULL;
sbinfo->uid = config.uid;
sbinfo->gid = config.gid;
sbinfo->mode = config.mode;
sbinfo->hstate = ctx->hstate;
sbinfo->max_inodes = ctx->nr_inodes;
sbinfo->free_inodes = ctx->nr_inodes;
sbinfo->spool = NULL;
sbinfo->uid = ctx->uid;
sbinfo->gid = ctx->gid;
sbinfo->mode = ctx->mode;
/*
* Allocate and initialize subpool if maximum or minimum size is
* specified. Any needed reservations (for minimim size) are taken
* taken when the subpool is created.
*/
if (config.max_hpages != -1 || config.min_hpages != -1) {
sbinfo->spool = hugepage_new_subpool(config.hstate,
config.max_hpages,
config.min_hpages);
if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
sbinfo->spool = hugepage_new_subpool(ctx->hstate,
ctx->max_hpages,
ctx->min_hpages);
if (!sbinfo->spool)
goto out_free;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = huge_page_size(config.hstate);
sb->s_blocksize_bits = huge_page_shift(config.hstate);
sb->s_blocksize = huge_page_size(ctx->hstate);
sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
if (!sb->s_root)
goto out_free;
return 0;
......@@ -1290,16 +1283,52 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
}
static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
static int hugetlbfs_get_tree(struct fs_context *fc)
{
int err = hugetlbfs_validate(fc);
if (err)
return err;
return vfs_get_super(fc, vfs_get_independent_super, hugetlbfs_fill_super);
}
static void hugetlbfs_fs_context_free(struct fs_context *fc)
{
kfree(fc->fs_private);
}
static const struct fs_context_operations hugetlbfs_fs_context_ops = {
.free = hugetlbfs_fs_context_free,
.parse_param = hugetlbfs_parse_param,
.get_tree = hugetlbfs_get_tree,
};
static int hugetlbfs_init_fs_context(struct fs_context *fc)
{
return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
struct hugetlbfs_fs_context *ctx;
ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->max_hpages = -1; /* No limit on size by default */
ctx->nr_inodes = -1; /* No limit on number of inodes by default */
ctx->uid = current_fsuid();
ctx->gid = current_fsgid();
ctx->mode = 0755;
ctx->hstate = &default_hstate;
ctx->min_hpages = -1; /* No default minimum size */
ctx->max_val_type = NO_SIZE;
ctx->min_val_type = NO_SIZE;
fc->fs_private = ctx;
fc->ops = &hugetlbfs_fs_context_ops;
return 0;
}
static struct file_system_type hugetlbfs_fs_type = {
.name = "hugetlbfs",
.mount = hugetlbfs_mount,
.kill_sb = kill_litter_super,
.name = "hugetlbfs",
.init_fs_context = hugetlbfs_init_fs_context,
.parameters = &hugetlb_fs_parameters,
.kill_sb = kill_litter_super,
};
static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
......@@ -1384,8 +1413,29 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
return file;
}
static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
{
struct fs_context *fc;
struct vfsmount *mnt;
fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
if (IS_ERR(fc)) {
mnt = ERR_CAST(fc);
} else {
struct hugetlbfs_fs_context *ctx = fc->fs_private;
ctx->hstate = h;
mnt = fc_mount(fc);
put_fs_context(fc);
}
if (IS_ERR(mnt))
pr_err("Cannot mount internal hugetlbfs for page size %uK",
1U << (h->order + PAGE_SHIFT - 10));
return mnt;
}
static int __init init_hugetlbfs_fs(void)
{
struct vfsmount *mnt;
struct hstate *h;
int error;
int i;
......@@ -1408,24 +1458,16 @@ static int __init init_hugetlbfs_fs(void)
i = 0;
for_each_hstate(h) {
char buf[50];
unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
buf);
if (IS_ERR(hugetlbfs_vfsmount[i])) {
pr_err("Cannot mount internal hugetlbfs for "
"page size %uK", ps_kb);
error = PTR_ERR(hugetlbfs_vfsmount[i]);
hugetlbfs_vfsmount[i] = NULL;
mnt = mount_one_hugetlbfs(h);
if (IS_ERR(mnt) && i == 0) {
error = PTR_ERR(mnt);
goto out;
}
hugetlbfs_vfsmount[i] = mnt;
i++;
}
/* Non default hstates are optional */
if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
return 0;
return 0;
out:
kmem_cache_destroy(hugetlbfs_inode_cachep);
......
......@@ -17,6 +17,7 @@ struct linux_binprm;
struct path;
struct mount;
struct shrink_control;
struct fs_context;
/*
* block_dev.c
......@@ -51,9 +52,17 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
*/
extern void __init chrdev_init(void);
/*
* fs_context.c
*/
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void fc_drop_locked(struct fs_context *);
/*
* namei.c
*/
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
......@@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/*
* super.c
*/
extern int do_remount_sb(struct super_block *, int, void *, int);
extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb);
extern struct dentry *mount_fs(struct file_system_type *,
int, const char *, void *);
extern struct super_block *user_get_super(dev_t);
/*
......
......@@ -17,6 +17,7 @@
#include <linux/xattr.h>
#include <linux/kernfs.h>
#include <linux/fs_context.h>
struct kernfs_iattrs {
struct iattr ia_iattr;
......
......@@ -22,16 +22,6 @@
struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct kernfs_root *root = kernfs_info(sb)->root;
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->remount_fs)
return scops->remount_fs(root, flags, data);
return 0;
}
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{
struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
......@@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = {
.drop_inode = generic_delete_inode,
.evict_inode = kernfs_evict_inode,
.remount_fs = kernfs_sop_remount_fs,
.show_options = kernfs_sop_show_options,
.show_path = kernfs_sop_show_path,
};
......@@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
} while (true);
}
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
{
struct kernfs_super_info *info = kernfs_info(sb);
struct inode *inode;
......@@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = magic;
sb->s_magic = kfc->magic;
sb->s_op = &kernfs_sops;
sb->s_xattr = kernfs_xattr_handlers;
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
......@@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
return 0;
}
static int kernfs_test_super(struct super_block *sb, void *data)
static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
{
struct kernfs_super_info *sb_info = kernfs_info(sb);
struct kernfs_super_info *info = data;
struct kernfs_super_info *info = fc->s_fs_info;
return sb_info->root == info->root && sb_info->ns == info->ns;
}
static int kernfs_set_super(struct super_block *sb, void *data)
static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
{
int error;
error = set_anon_super(sb, data);
if (!error)
sb->s_fs_info = data;
return error;
struct kernfs_fs_context *kfc = fc->fs_private;
kfc->ns_tag = NULL;
return set_anon_super_fc(sb, fc);
}
/**
......@@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb)
}
/**
* kernfs_mount_ns - kernfs mount helper
* @fs_type: file_system_type of the fs being mounted
* @flags: mount flags specified for the mount
* @root: kernfs_root of the hierarchy being mounted
* @magic: file system specific magic number
* @new_sb_created: tell the caller if we allocated a new superblock
* @ns: optional namespace tag of the mount
* kernfs_get_tree - kernfs filesystem access/retrieval helper
* @fc: The filesystem context.
*
* This is to be called from each kernfs user's file_system_type->mount()
* implementation, which should pass through the specified @fs_type and
* @flags, and specify the hierarchy and namespace tag to mount via @root
* and @ns, respectively.
*
* The return value can be passed to the vfs layer verbatim.
* This is to be called from each kernfs user's fs_context->ops->get_tree()
* implementation, which should set the specified ->@fs_type and ->@flags, and
* specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
* respectively.
*/
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns)
int kernfs_get_tree(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
struct super_block *sb;
struct kernfs_super_info *info;
int error;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
return -ENOMEM;
info->root = root;
info->ns = ns;
info->root = kfc->root;
info->ns = kfc->ns_tag;
INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
&init_user_ns, info);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
fc->s_fs_info = info;
sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
if (IS_ERR(sb))
return ERR_CAST(sb);
if (new_sb_created)
*new_sb_created = !sb->s_root;
return PTR_ERR(sb);
if (!sb->s_root) {
struct kernfs_super_info *info = kernfs_info(sb);
error = kernfs_fill_super(sb, magic);
kfc->new_sb_created = true;
error = kernfs_fill_super(sb, kfc);
if (error) {
deactivate_locked_super(sb);
return ERR_PTR(error);
return error;
}
sb->s_flags |= SB_ACTIVE;
mutex_lock(&kernfs_mutex);
list_add(&info->node, &root->supers);
list_add(&info->node, &info->root->supers);
mutex_unlock(&kernfs_mutex);
}
return dget(sb->s_root);
fc->root = dget(sb->s_root);
return 0;
}
void kernfs_free_fs_context(struct fs_context *fc)
{
/* Note that we don't deal with kfc->ns_tag here. */
kfree(fc->s_fs_info);
fc->s_fs_info = NULL;
}
/**
......@@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb)
kfree(info);
}
/**
* kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
* @kernfs_root: the kernfs_root in question
* @ns: the namespace tag
*
* Pin the superblock so the superblock won't be destroyed in subsequent
* operations. This can be used to block ->kill_sb() which may be useful
* for kernfs users which dynamically manage superblocks.
*
* Returns NULL if there's no superblock associated to this kernfs_root, or
* -EINVAL if the superblock is being freed.
*/
struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
{
struct kernfs_super_info *info;
struct super_block *sb = NULL;
mutex_lock(&kernfs_mutex);
list_for_each_entry(info, &root->supers, node) {
if (info->ns == ns) {
sb = info->sb;
if (!atomic_inc_not_zero(&info->sb->s_active))
sb = ERR_PTR(-EINVAL);
break;
}
}
mutex_unlock(&kernfs_mutex);
return sb;
}
void __init kernfs_init(void)
{
......
......@@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry)
return __is_local_mountpoint(dentry);
}
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
return ns->seq == 0;
}
......@@ -2331,8 +2331,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
return err;
}
static int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
{
int retval;
struct nameidata nd;
......
......@@ -27,6 +27,7 @@
#include <linux/task_work.h>
#include <linux/sched/task.h>
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
#include "pnode.h"
#include "internal.h"
......@@ -940,38 +941,81 @@ static struct mount *skip_mnt_tree(struct mount *p)
return p;
}
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
/**
* vfs_create_mount - Create a mount for a configured superblock
* @fc: The configuration context with the superblock attached
*
* Create a mount to an already configured superblock. If necessary, the
* caller should invoke vfs_get_tree() before calling this.
*
* Note that this does not attach the mount to anything.
*/
struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
struct mount *mnt;
struct dentry *root;
if (!type)
return ERR_PTR(-ENODEV);
if (!fc->root)
return ERR_PTR(-EINVAL);
mnt = alloc_vfsmnt(name);
mnt = alloc_vfsmnt(fc->source ?: "none");
if (!mnt)
return ERR_PTR(-ENOMEM);
if (flags & SB_KERNMOUNT)
if (fc->sb_flags & SB_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
root = mount_fs(type, flags, name, data);
if (IS_ERR(root)) {
mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_CAST(root);
}
atomic_inc(&fc->root->d_sb->s_active);
mnt->mnt.mnt_sb = fc->root->d_sb;
mnt->mnt.mnt_root = dget(fc->root);
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
unlock_mount_hash();
return &mnt->mnt;
}
EXPORT_SYMBOL(vfs_create_mount);
struct vfsmount *fc_mount(struct fs_context *fc)
{
int err = vfs_get_tree(fc);
if (!err) {
up_write(&fc->root->d_sb->s_umount);
return vfs_create_mount(fc);
}
return ERR_PTR(err);
}
EXPORT_SYMBOL(fc_mount);
struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data)
{
struct fs_context *fc;
struct vfsmount *mnt;
int ret = 0;
if (!type)
return ERR_PTR(-EINVAL);
fc = fs_context_for_mount(type, flags);
if (IS_ERR(fc))
return ERR_CAST(fc);
if (name)
ret = vfs_parse_fs_string(fc, "source",
name, strlen(name));
if (!ret)
ret = parse_monolithic_mount_data(fc, data);
if (!ret)
mnt = fc_mount(fc);
else
mnt = ERR_PTR(ret);
put_fs_context(fc);
return mnt;
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
struct vfsmount *
......@@ -1013,27 +1057,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags = old->mnt.mnt_flags;
mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
/* Don't allow unprivileged users to change mount flags */
if (flag & CL_UNPRIVILEGED) {
mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
if (mnt->mnt.mnt_flags & MNT_READONLY)
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
if (mnt->mnt.mnt_flags & MNT_NODEV)
mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
if (mnt->mnt.mnt_flags & MNT_NOSUID)
mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
if (mnt->mnt.mnt_flags & MNT_NOEXEC)
mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
}
/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) &&
(!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
mnt->mnt.mnt_flags |= MNT_LOCKED;
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
......@@ -1464,6 +1487,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
static void shrink_submounts(struct mount *mnt);
static int do_umount_root(struct super_block *sb)
{
int ret = 0;
down_write(&sb->s_umount);
if (!sb_rdonly(sb)) {
struct fs_context *fc;
fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
SB_RDONLY);
if (IS_ERR(fc)) {
ret = PTR_ERR(fc);
} else {
ret = parse_monolithic_mount_data(fc, NULL);
if (!ret)
ret = reconfigure_super(fc);
put_fs_context(fc);
}
}
up_write(&sb->s_umount);
return ret;
}
static int do_umount(struct mount *mnt, int flags)
{
struct super_block *sb = mnt->mnt.mnt_sb;
......@@ -1529,11 +1575,7 @@ static int do_umount(struct mount *mnt, int flags)
*/
if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
return -EPERM;
down_write(&sb->s_umount);
if (!sb_rdonly(sb))
retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
up_write(&sb->s_umount);
return retval;
return do_umount_root(sb);
}
namespace_lock();
......@@ -1839,6 +1881,33 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
return 0;
}
static void lock_mnt_tree(struct mount *mnt)
{
struct mount *p;
for (p = mnt; p; p = next_mnt(p, mnt)) {
int flags = p->mnt.mnt_flags;
/* Don't allow unprivileged users to change mount flags */
flags |= MNT_LOCK_ATIME;
if (flags & MNT_READONLY)
flags |= MNT_LOCK_READONLY;
if (flags & MNT_NODEV)
flags |= MNT_LOCK_NODEV;
if (flags & MNT_NOSUID)
flags |= MNT_LOCK_NOSUID;
if (flags & MNT_NOEXEC)
flags |= MNT_LOCK_NOEXEC;
/* Don't allow unprivileged users to reveal what is under a mount */
if (list_empty(&p->mnt_expire))
flags |= MNT_LOCKED;
p->mnt.mnt_flags = flags;
}
}
static void cleanup_group_ids(struct mount *mnt, struct mount *end)
{
struct mount *p;
......@@ -1956,6 +2025,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct mountpoint *dest_mp,
struct path *parent_path)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
HLIST_HEAD(tree_list);
struct mnt_namespace *ns = dest_mnt->mnt_ns;
struct mountpoint *smp;
......@@ -2006,6 +2076,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
child->mnt_mountpoint);
if (q)
mnt_change_mountpoint(child, smp, q);
/* Notice when we are propagating across user namespaces */
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
lock_mnt_tree(child);
commit_tree(child);
}
put_mountpoint(smp);
......@@ -2313,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
int err;
struct super_block *sb = path->mnt->mnt_sb;
struct mount *mnt = real_mount(path->mnt);
void *sec_opts = NULL;
struct fs_context *fc;
if (!check_mnt(mnt))
return -EINVAL;
......@@ -2324,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) {
err = security_sb_eat_lsm_opts(data, &sec_opts);
if (err)
return err;
}
err = security_sb_remount(sb, sec_opts);
security_free_mnt_opts(&sec_opts);
if (err)
return err;
fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
if (IS_ERR(fc))
return PTR_ERR(fc);
down_write(&sb->s_umount);
err = -EPERM;
if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = do_remount_sb(sb, sb_flags, data, 0);
if (!err)
set_mount_attributes(mnt, mnt_flags);
err = parse_monolithic_mount_data(fc, data);
if (!err) {
down_write(&sb->s_umount);
err = -EPERM;
if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = reconfigure_super(fc);
if (!err)
set_mount_attributes(mnt, mnt_flags);
}
up_write(&sb->s_umount);
}
up_write(&sb->s_umount);
put_fs_context(fc);
return err;
}
......@@ -2425,29 +2496,6 @@ static int do_move_mount(struct path *path, const char *old_name)
return err;
}
static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
{
int err;
const char *subtype = strchr(fstype, '.');
if (subtype) {
subtype++;
err = -EINVAL;
if (!subtype[0])
goto err;
} else
subtype = "";
mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
err = -ENOMEM;
if (!mnt->mnt_sb->s_subtype)
goto err;
return mnt;
err:
mntput(mnt);
return ERR_PTR(err);
}
/*
* add a mount into a namespace's mount tree
*/
......@@ -2492,7 +2540,39 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
return err;
}
static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
/*
* Create a new mount using a superblock configuration and request it
* be added to the namespace tree.
*/
static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
unsigned int mnt_flags)
{
struct vfsmount *mnt;
struct super_block *sb = fc->root->d_sb;
int error;
error = security_sb_kern_mount(sb);
if (!error && mount_too_revealing(sb, &mnt_flags))
error = -EPERM;
if (unlikely(error)) {
fc_drop_locked(fc);
return error;
}
up_write(&sb->s_umount);
mnt = vfs_create_mount(fc);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
if (error < 0)
mntput(mnt);
return error;
}
/*
* create a new mount for userspace and request it to be added into the
......@@ -2502,8 +2582,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
struct vfsmount *mnt;
int err;
struct fs_context *fc;
const char *subtype = NULL;
int err = 0;
if (!fstype)
return -EINVAL;
......@@ -2512,23 +2593,37 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
if (!type)
return -ENODEV;
mnt = vfs_kern_mount(type, sb_flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
if (type->fs_flags & FS_HAS_SUBTYPE) {
subtype = strchr(fstype, '.');
if (subtype) {
subtype++;
if (!*subtype) {
put_filesystem(type);
return -EINVAL;
}
} else {
subtype = "";
}
}
fc = fs_context_for_mount(type, sb_flags);
put_filesystem(type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
if (mount_too_revealing(mnt, &mnt_flags)) {
mntput(mnt);
return -EPERM;
}
if (IS_ERR(fc))
return PTR_ERR(fc);
if (subtype)
err = vfs_parse_fs_string(fc, "subtype",
subtype, strlen(subtype));
if (!err && name)
err = vfs_parse_fs_string(fc, "source", name, strlen(name));
if (!err)
err = parse_monolithic_mount_data(fc, data);
if (!err)
err = vfs_get_tree(fc);
if (!err)
err = do_new_mount_fc(fc, path, mnt_flags);
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
put_fs_context(fc);
return err;
}
......@@ -2863,7 +2958,8 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
static void free_mnt_ns(struct mnt_namespace *ns)
{
ns_free_inum(&ns->ns);
if (!is_anon_ns(ns))
ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kfree(ns);
......@@ -2878,7 +2974,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
*/
static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
{
struct mnt_namespace *new_ns;
struct ucounts *ucounts;
......@@ -2888,28 +2984,27 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
if (!ucounts)
return ERR_PTR(-ENOSPC);
new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
if (!new_ns) {
dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
}
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
if (!anon) {
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
}
new_ns->ns.ops = &mntns_operations;
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
atomic_set(&new_ns->count, 1);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
new_ns->mounts = 0;
new_ns->pending_mounts = 0;
return new_ns;
}
......@@ -2933,7 +3028,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
old = ns->root;
new_ns = alloc_mnt_ns(user_ns);
new_ns = alloc_mnt_ns(user_ns, false);
if (IS_ERR(new_ns))
return new_ns;
......@@ -2941,13 +3036,18 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
/* First pass: copy the tree topology */
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
copy_flags |= CL_SHARED_TO_SLAVE;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
namespace_unlock();
free_mnt_ns(new_ns);
return ERR_CAST(new);
}
if (user_ns != ns->user_ns) {
lock_mount_hash();
lock_mnt_tree(new);
unlock_mount_hash();
}
new_ns->root = new;
list_add_tail(&new_ns->list, &new->mnt_list);
......@@ -2988,37 +3088,25 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
return new_ns;
}
/**
* create_mnt_ns - creates a private namespace and adds a root filesystem
* @mnt: pointer to the new root filesystem mountpoint
*/
static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
{
struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
if (!IS_ERR(new_ns)) {
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
new_ns->mounts++;
list_add(&mnt->mnt_list, &new_ns->list);
} else {
mntput(m);
}
return new_ns;
}
struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
struct dentry *mount_subtree(struct vfsmount *m, const char *name)
{
struct mount *mnt = real_mount(m);
struct mnt_namespace *ns;
struct super_block *s;
struct path path;
int err;
ns = create_mnt_ns(mnt);
if (IS_ERR(ns))
ns = alloc_mnt_ns(&init_user_ns, true);
if (IS_ERR(ns)) {
mntput(m);
return ERR_CAST(ns);
}
mnt->mnt_ns = ns;
ns->root = mnt;
ns->mounts++;
list_add(&mnt->mnt_list, &ns->list);
err = vfs_path_lookup(mnt->mnt_root, mnt,
err = vfs_path_lookup(m->mnt_root, m,
name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
put_mnt_ns(ns);
......@@ -3228,6 +3316,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mount *m;
struct mnt_namespace *ns;
struct path root;
struct file_system_type *type;
......@@ -3240,10 +3329,14 @@ static void __init init_mount_tree(void)
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = create_mnt_ns(mnt);
ns = alloc_mnt_ns(&init_user_ns, false);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
m = real_mount(mnt);
m->mnt_ns = ns;
ns->root = m;
ns->mounts = 1;
list_add(&m->mnt_list, &ns->list);
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
......@@ -3297,10 +3390,10 @@ void put_mnt_ns(struct mnt_namespace *ns)
free_mnt_ns(ns);
}
struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
struct vfsmount *kern_mount(struct file_system_type *type)
{
struct vfsmount *mnt;
mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
if (!IS_ERR(mnt)) {
/*
* it is a longterm mount, don't release mnt until
......@@ -3310,7 +3403,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
}
return mnt;
}
EXPORT_SYMBOL_GPL(kern_mount_data);
EXPORT_SYMBOL_GPL(kern_mount);
void kern_unmount(struct vfsmount *mnt)
{
......@@ -3352,7 +3445,8 @@ bool current_chrooted(void)
return chrooted;
}
static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
static bool mnt_already_visible(struct mnt_namespace *ns,
const struct super_block *sb,
int *new_mnt_flags)
{
int new_flags = *new_mnt_flags;
......@@ -3364,7 +3458,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
struct mount *child;
int mnt_flags;
if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
if (mnt->mnt.mnt_sb->s_type != sb->s_type)
continue;
/* This mount is not fully visible if it's root directory
......@@ -3415,7 +3509,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
return visible;
}
static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
{
const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
......@@ -3425,7 +3519,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
return false;
/* Can this filesystem be too revealing? */
s_iflags = mnt->mnt_sb->s_iflags;
s_iflags = sb->s_iflags;
if (!(s_iflags & SB_I_USERNS_VISIBLE))
return false;
......@@ -3435,7 +3529,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
return true;
}
return !mnt_already_visible(ns, mnt, new_mnt_flags);
return !mnt_already_visible(ns, sb, new_mnt_flags);
}
bool mnt_may_suid(struct vfsmount *mnt)
......@@ -3484,6 +3578,9 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
if (is_anon_ns(mnt_ns))
return -EINVAL;
if (fs->users != 1)
return -EINVAL;
......
......@@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
}
/* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
static struct mount *last_dest, *first_source, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;
......@@ -260,9 +259,6 @@ static int propagate_one(struct mount *m)
type |= CL_MAKE_SHARED;
}
/* Notice when we are propagating across user namespaces */
if (m->mnt_ns->user_ns != user_ns)
type |= CL_UNPRIVILEGED;
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child))
return PTR_ERR(child);
......@@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
* propagate_one(); everything is serialized by namespace_sem,
* so globals will do just fine.
*/
user_ns = current->nsproxy->mnt_ns->user_ns;
last_dest = dest_mnt;
first_source = source_mnt;
last_source = source_mnt;
......
......@@ -27,8 +27,7 @@
#define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40
#define CL_COPY_MNT_NS_FILE 0x80
#define CL_COPY_MNT_NS_FILE 0x40
#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
......
......@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
#include <linux/uaccess.h>
......@@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
return 0;
}
static const struct super_operations proc_sops = {
const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
.evict_inode = proc_evict_inode,
.statfs = simple_statfs,
.remount_fs = proc_remount,
.show_options = proc_show_options,
};
......@@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
pde_put(de);
return inode;
}
int proc_fill_super(struct super_block *s, void *data, int silent)
{
struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
if (!proc_parse_options(data, ns))
return -EINVAL;
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
/*
* procfs isn't actually a stacking filesystem; however, there is
* too much magic going on inside it to permit stacking things on
* top of it
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
/* procfs dentries and inodes don't require IO to create */
s->s_shrink.seeks = 0;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
if (!root_inode) {
pr_err("proc_fill_super: get root inode failed\n");
return -ENOMEM;
}
s->s_root = d_make_root(root_inode);
if (!s->s_root) {
pr_err("proc_fill_super: allocate dentry failed\n");
return -ENOMEM;
}
ret = proc_setup_self(s);
if (ret) {
return ret;
}
return proc_setup_thread_self(s);
}
......@@ -207,13 +207,12 @@ struct pde_opener {
struct completion *c;
} __randomize_layout;
extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
extern const struct super_operations proc_sops;
void proc_init_kmemcache(void);
void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
......@@ -271,10 +270,8 @@ static inline void proc_tty_init(void) {}
* root.c
*/
extern struct proc_dir_entry proc_root;
extern int proc_parse_options(char *options, struct pid_namespace *pid);
extern void proc_self_init(void);
extern int proc_remount(struct super_block *, int *, char *);
/*
* task_[no]mmu.c
......
......@@ -19,86 +19,178 @@
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/user_namespace.h>
#include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
#include <linux/parser.h>
#include <linux/fs_parser.h>
#include <linux/cred.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include "internal.h"
enum {
Opt_gid, Opt_hidepid, Opt_err,
struct proc_fs_context {
struct pid_namespace *pid_ns;
unsigned int mask;
int hidepid;
int gid;
};
static const match_table_t tokens = {
{Opt_hidepid, "hidepid=%u"},
{Opt_gid, "gid=%u"},
{Opt_err, NULL},
enum proc_param {
Opt_gid,
Opt_hidepid,
};
int proc_parse_options(char *options, struct pid_namespace *pid)
static const struct fs_parameter_spec proc_param_specs[] = {
fsparam_u32("gid", Opt_gid),
fsparam_u32("hidepid", Opt_hidepid),
{}
};
static const struct fs_parameter_description proc_fs_parameters = {
.name = "proc",
.specs = proc_param_specs,
};
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
switch (token) {
case Opt_gid:
if (match_int(&args[0], &option))
return 0;
pid->pid_gid = make_kgid(current_user_ns(), option);
break;
case Opt_hidepid:
if (match_int(&args[0], &option))
return 0;
if (option < HIDEPID_OFF ||
option > HIDEPID_INVISIBLE) {
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
pid->hide_pid = option;
break;
default:
pr_err("proc: unrecognized mount option \"%s\" "
"or missing value\n", p);
return 0;
}
struct proc_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
int opt;
opt = fs_parse(fc, &proc_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case Opt_gid:
ctx->gid = result.uint_32;
break;
case Opt_hidepid:
ctx->hidepid = result.uint_32;
if (ctx->hidepid < HIDEPID_OFF ||
ctx->hidepid > HIDEPID_INVISIBLE)
return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
break;
default:
return -EINVAL;
}
return 1;
ctx->mask |= 1 << opt;
return 0;
}
int proc_remount(struct super_block *sb, int *flags, char *data)
static void proc_apply_options(struct super_block *s,
struct fs_context *fc,
struct pid_namespace *pid_ns,
struct user_namespace *user_ns)
{
struct proc_fs_context *ctx = fc->fs_private;
if (ctx->mask & (1 << Opt_gid))
pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
if (ctx->mask & (1 << Opt_hidepid))
pid_ns->hide_pid = ctx->hidepid;
}
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
{
struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
proc_apply_options(s, fc, pid_ns, current_user_ns());
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
/*
* procfs isn't actually a stacking filesystem; however, there is
* too much magic going on inside it to permit stacking things on
* top of it
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
/* procfs dentries and inodes don't require IO to create */
s->s_shrink.seeks = 0;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
if (!root_inode) {
pr_err("proc_fill_super: get root inode failed\n");
return -ENOMEM;
}
s->s_root = d_make_root(root_inode);
if (!s->s_root) {
pr_err("proc_fill_super: allocate dentry failed\n");
return -ENOMEM;
}
ret = proc_setup_self(s);
if (ret) {
return ret;
}
return proc_setup_thread_self(s);
}
static int proc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
struct pid_namespace *pid = sb->s_fs_info;
sync_filesystem(sb);
return !proc_parse_options(data, pid);
proc_apply_options(sb, fc, pid, current_user_ns());
return 0;
}
static struct dentry *proc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
static int proc_get_tree(struct fs_context *fc)
{
struct pid_namespace *ns;
struct proc_fs_context *ctx = fc->fs_private;
if (flags & SB_KERNMOUNT) {
ns = data;
data = NULL;
} else {
ns = task_active_pid_ns(current);
}
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
fc->s_fs_info = ctx->pid_ns;
return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
}
return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super);
static void proc_fs_context_free(struct fs_context *fc)
{
struct proc_fs_context *ctx = fc->fs_private;
if (ctx->pid_ns)
put_pid_ns(ctx->pid_ns);
kfree(ctx);
}
static const struct fs_context_operations proc_fs_context_ops = {
.free = proc_fs_context_free,
.parse_param = proc_parse_param,
.get_tree = proc_get_tree,
.reconfigure = proc_reconfigure,
};
static int proc_init_fs_context(struct fs_context *fc)
{
struct proc_fs_context *ctx;
ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->fs_private = ctx;
fc->ops = &proc_fs_context_ops;
return 0;
}
static void proc_kill_sb(struct super_block *sb)
......@@ -115,10 +207,11 @@ static void proc_kill_sb(struct super_block *sb)
}
static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
.name = "proc",
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
......@@ -156,7 +249,7 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
{
if (!proc_pid_lookup(dentry, flags))
return NULL;
return proc_lookup(dir, dentry, flags);
}
......@@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = {
int pid_ns_prepare_proc(struct pid_namespace *ns)
{
struct proc_fs_context *ctx;
struct fs_context *fc;
struct vfsmount *mnt;
mnt = kern_mount_data(&proc_fs_type, ns);
fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
if (IS_ERR(fc))
return PTR_ERR(fc);
if (fc->user_ns != ns->user_ns) {
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ns->user_ns);
}
ctx = fc->fs_private;
if (ctx->pid_ns != ns) {
put_pid_ns(ctx->pid_ns);
get_pid_ns(ns);
ctx->pid_ns = ns;
}
mnt = fc_mount(fc);
put_fs_context(fc);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
......
此差异已折叠。
......@@ -13,34 +13,69 @@
#include <linux/magic.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
#include <linux/fs_context.h>
#include <net/net_namespace.h>
#include "sysfs.h"
static struct kernfs_root *sysfs_root;
struct kernfs_node *sysfs_root_kn;
static struct dentry *sysfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
static int sysfs_get_tree(struct fs_context *fc)
{
struct dentry *root;
void *ns;
bool new_sb = false;
struct kernfs_fs_context *kfc = fc->fs_private;
int ret;
if (!(flags & SB_KERNMOUNT)) {
ret = kernfs_get_tree(fc);
if (ret)
return ret;
if (kfc->new_sb_created)
fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return 0;
}
static void sysfs_fs_context_free(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
if (kfc->ns_tag)
kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag);
kernfs_free_fs_context(fc);
kfree(kfc);
}
static const struct fs_context_operations sysfs_fs_context_ops = {
.free = sysfs_fs_context_free,
.get_tree = sysfs_get_tree,
};
static int sysfs_init_fs_context(struct fs_context *fc)
{
struct kernfs_fs_context *kfc;
struct net *netns;
if (!(fc->sb_flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
return ERR_PTR(-EPERM);
return -EPERM;
}
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
SYSFS_MAGIC, &new_sb, ns);
if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL);
if (!kfc)
return -ENOMEM;
return root;
kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
kfc->root = sysfs_root;
kfc->magic = SYSFS_MAGIC;
fc->fs_private = kfc;
fc->ops = &sysfs_fs_context_ops;
if (fc->user_ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(netns->user_ns);
fc->global = true;
return 0;
}
static void sysfs_kill_sb(struct super_block *sb)
......@@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb)
}
static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.mount = sysfs_mount,
.kill_sb = sysfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
.name = "sysfs",
.init_fs_context = sysfs_init_fs_context,
.kill_sb = sysfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
};
int __init sysfs_init(void)
......
......@@ -18,6 +18,7 @@
#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
#define EPROBE_DEFER 517 /* Driver requests probe retry */
#define EOPENSTALE 518 /* open found a stale dentry */
#define ENOPARAM 519 /* Parameter not supported */
/* Defined for the NFSv3 protocol */
#define EBADHANDLE 521 /* Illegal NFS file handle */
......
......@@ -64,6 +64,8 @@ struct workqueue_struct;
struct iov_iter;
struct fscrypt_info;
struct fscrypt_operations;
struct fs_context;
struct fs_parameter_description;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
......@@ -1349,6 +1351,7 @@ extern int send_sigurg(struct fown_struct *fown);
/* These sb flags are internal to the kernel */
#define SB_SUBMOUNT (1<<26)
#define SB_FORCE (1<<27)
#define SB_NOSEC (1<<28)
#define SB_BORN (1<<29)
#define SB_ACTIVE (1<<30)
......@@ -1459,7 +1462,7 @@ struct super_block {
* Filesystem subtype. If non-empty the filesystem type field
* in /proc/mounts will be "type.subtype"
*/
char *s_subtype;
const char *s_subtype;
const struct dentry_operations *s_d_op; /* default d_op for dentries */
......@@ -2170,6 +2173,8 @@ struct file_system_type {
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_description *parameters;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
......@@ -2225,8 +2230,12 @@ void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
void deactivate_locked_super(struct super_block *sb);
int set_anon_super(struct super_block *s, void *data);
int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
int get_anon_bdev(dev_t *);
void free_anon_bdev(dev_t);
struct super_block *sget_fc(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
int (*set)(struct super_block *, struct fs_context *));
struct super_block *sget_userns(struct file_system_type *type,
int (*test)(struct super_block *,void *),
int (*set)(struct super_block *,void *),
......@@ -2269,8 +2278,7 @@ mount_pseudo(struct file_system_type *fs_type, char *name,
extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
#define kern_mount(type) kern_mount_data(type, NULL)
extern struct vfsmount *kern_mount(struct file_system_type *);
extern void kern_unmount(struct vfsmount *mnt);
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
......
此差异已折叠。
此差异已折叠。
......@@ -26,7 +26,9 @@ struct vm_area_struct;
struct super_block;
struct file_system_type;
struct poll_table_struct;
struct fs_context;
struct kernfs_fs_context;
struct kernfs_open_node;
struct kernfs_iattrs;
......@@ -168,7 +170,6 @@ struct kernfs_node {
* kernfs_node parameter.
*/
struct kernfs_syscall_ops {
int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
int (*mkdir)(struct kernfs_node *parent, const char *name,
......@@ -272,6 +273,18 @@ struct kernfs_ops {
#endif
};
/*
* The kernfs superblock creation/mount parameter context.
*/
struct kernfs_fs_context {
struct kernfs_root *root; /* Root of the hierarchy being mounted */
void *ns_tag; /* Namespace tag of the mount (or NULL) */
unsigned long magic; /* File system specific magic number */
/* The following are set/used by kernfs_mount() */
bool new_sb_created; /* Set to T if we allocated a new sb */
};
#ifdef CONFIG_KERNFS
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
......@@ -359,11 +372,9 @@ __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
void kernfs_notify(struct kernfs_node *kn);
const void *kernfs_super_ns(struct super_block *sb);
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns);
int kernfs_get_tree(struct fs_context *fc);
void kernfs_free_fs_context(struct fs_context *fc);
void kernfs_kill_sb(struct super_block *sb);
struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
void kernfs_init(void);
......@@ -465,11 +476,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { }
static inline const void *kernfs_super_ns(struct super_block *sb)
{ return NULL; }
static inline struct dentry *
kernfs_mount_ns(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns)
{ return ERR_PTR(-ENOSYS); }
static inline int kernfs_get_tree(struct fs_context *fc)
{ return -ENOSYS; }
static inline void kernfs_free_fs_context(struct fs_context *fc) { }
static inline void kernfs_kill_sb(struct super_block *sb) { }
......@@ -552,13 +562,4 @@ static inline int kernfs_rename(struct kernfs_node *kn,
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
}
static inline struct dentry *
kernfs_mount(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created)
{
return kernfs_mount_ns(fs_type, flags, root,
magic, new_sb_created, NULL);
}
#endif /* __LINUX_KERNFS_H */
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
goto fail;
err = -ENOMEM;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL)
goto fail_dec;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部