提交 64f29d88 编写于 作者: L Linus Torvalds

Merge tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The highlight is the new mount "device" string syntax implemented by
  Venky Shankar. It solves some long-standing issues with using
  different auth entities and/or mounting different CephFS filesystems
  from the same cluster, remounting and also misleading /proc/mounts
  contents. The existing syntax of course remains to be maintained.

  On top of that, there is a couple of fixes for edge cases in quota and
  a new mount option for turning on unbuffered I/O mode globally instead
  of on a per-file basis with ioctl(CEPH_IOC_SYNCIO)"

* tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client:
  ceph: move CEPH_SUPER_MAGIC definition to magic.h
  ceph: remove redundant Lsx caps check
  ceph: add new "nopagecache" option
  ceph: don't check for quotas on MDS stray dirs
  ceph: drop send metrics debug message
  rbd: make const pointer spaces a static const array
  ceph: Fix incorrect statfs report for small quota
  ceph: mount syntax module parameter
  doc: document new CephFS mount device syntax
  ceph: record updated mon_addr on remount
  ceph: new device mount syntax
  libceph: rename parse_fsid() to ceph_parse_fsid() and export
  libceph: generalize addr/ip parsing based on delimiter
...@@ -82,7 +82,7 @@ Mount Syntax ...@@ -82,7 +82,7 @@ Mount Syntax
The basic mount syntax is:: The basic mount syntax is::
# mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt # mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]]
You only need to specify a single monitor, as the client will get the You only need to specify a single monitor, as the client will get the
full list when it connects. (However, if the monitor you specify full list when it connects. (However, if the monitor you specify
...@@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.) The port can be left ...@@ -90,16 +90,35 @@ happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at off if the monitor is using the default. So if the monitor is at
1.2.3.4:: 1.2.3.4::
# mount -t ceph 1.2.3.4:/ /mnt/ceph # mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4
is sufficient. If /sbin/mount.ceph is installed, a hostname can be is sufficient. If /sbin/mount.ceph is installed, a hostname can be
used instead of an IP address. used instead of an IP address and the cluster FSID can be left out
(as the mount helper will fill it in by reading the ceph configuration
file)::
# mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr
Multiple monitor addresses can be passed by separating each address with a slash (`/`)::
# mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101
When using the mount helper, monitor address can be read from ceph
configuration file if available. Note that, the cluster FSID (passed as part
of the device string) is validated by checking it with the FSID reported by
the monitor.
Mount Options Mount Options
============= =============
mon_addr=ip_address[:port][/ip_address[:port]]
Monitor address to the cluster. This is used to bootstrap the
connection to the cluster. Once connection is established, the
monitor addresses in the monitor map are followed.
fsid=cluster-id
FSID of the cluster (from `ceph fsid` command).
ip=A.B.C.D[:N] ip=A.B.C.D[:N]
Specify the IP and/or port the client should bind to locally. Specify the IP and/or port the client should bind to locally.
There is normally not much reason to do this. If the IP is not There is normally not much reason to do this. If the IP is not
......
...@@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf) ...@@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf)
* These are the characters that produce nonzero for * These are the characters that produce nonzero for
* isspace() in the "C" and "POSIX" locales. * isspace() in the "C" and "POSIX" locales.
*/ */
const char *spaces = " \f\n\r\t\v"; static const char spaces[] = " \f\n\r\t\v";
*buf += strspn(*buf, spaces); /* Find start of token */ *buf += strspn(*buf, spaces); /* Find start of token */
...@@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf, ...@@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT; pctx.opts->trim = RBD_TRIM_DEFAULT;
ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL); ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
',');
if (ret) if (ret)
goto out_err; goto out_err;
......
...@@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode, ...@@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_LINK_SHARED) && if ((newcaps & CEPH_CAP_LINK_SHARED) &&
(extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) { (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
set_nlink(inode, le32_to_cpu(grant->nlink)); set_nlink(inode, le32_to_cpu(grant->nlink));
if (inode->i_nlink == 0 && if (inode->i_nlink == 0)
(newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
deleted_inode = true; deleted_inode = true;
} }
......
...@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, ...@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
int fmode, bool isdir) int fmode, bool isdir)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mount_options *opt =
ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi; struct ceph_file_info *fi;
dout("%s %p %p 0%o (%s)\n", __func__, inode, file, dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
...@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, ...@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
if (!fi) if (!fi)
return -ENOMEM; return -ENOMEM;
if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
fi->flags |= CEPH_F_SYNC;
file->private_data = fi; file->private_data = fi;
} }
...@@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool direct_lock = iocb->ki_flags & IOCB_DIRECT; bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret; ssize_t ret;
int want, got = 0; int want = 0, got = 0;
int retry_op = 0, read = 0; int retry_op = 0, read = 0;
again: again:
...@@ -1556,13 +1561,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -1556,13 +1561,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
else else
ceph_start_io_read(inode); ceph_start_io_read(inode);
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY) if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; want |= CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got); ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) { if (ret < 0) {
if (iocb->ki_flags & IOCB_DIRECT) if (direct_lock)
ceph_end_io_direct(inode); ceph_end_io_direct(inode);
else else
ceph_end_io_read(inode); ceph_end_io_read(inode);
...@@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf; struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0; ssize_t count, written = 0;
int err, want, got; int err, want = 0, got;
bool direct_lock = false; bool direct_lock = false;
u32 map_flags; u32 map_flags;
u64 pool_flags; u64 pool_flags;
...@@ -1771,10 +1777,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1771,10 +1777,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode)); inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_BUFFER;
if (fi->fmode & CEPH_FILE_MODE_LAZY) if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; want |= CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
got = 0; got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got); err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0) if (err < 0)
......
...@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, ...@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
msg->hdr.version = cpu_to_le16(1); msg->hdr.version = cpu_to_le16(1);
msg->hdr.compat_version = cpu_to_le16(1); msg->hdr.compat_version = cpu_to_le16(1);
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
dout("client%llu send metrics to mds%d\n",
ceph_client_gid(mdsc->fsc->client), s->s_mds);
ceph_con_send(&s->s_con, msg); ceph_con_send(&s->s_con, msg);
return true; return true;
......
...@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode) ...@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
/* if root is the real CephFS root, we don't have quota realms */ /* if root is the real CephFS root, we don't have quota realms */
if (root && ceph_ino(root) == CEPH_INO_ROOT) if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false; return false;
/* MDS stray dirs have no quota realms */
if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
return false;
/* otherwise, we can't know for sure */ /* otherwise, we can't know for sure */
return true; return true;
} }
...@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) ...@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
if (ci->i_max_bytes) { if (ci->i_max_bytes) {
total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
/* For quota size less than 4MB, use 4KB block size */
if (!total) {
total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
}
/* It is possible for a quota to be exceeded. /* It is possible for a quota to be exceeded.
* Report 'zero' in that case * Report 'zero' in that case
*/ */
free = total > used ? total - used : 0; free = total > used ? total - used : 0;
/* For quota size less than 4KB, report the
* total=used=4KB,free=0 when quota is full
* and total=free=4KB, used=0 otherwise */
if (!total) {
total = 1;
free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
}
} }
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (total) { if (total) {
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h> #include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h> #include <linux/ceph/debugfs.h>
#include <uapi/linux/magic.h>
static DEFINE_SPINLOCK(ceph_fsc_lock); static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list); static LIST_HEAD(ceph_fsc_list);
...@@ -146,6 +148,7 @@ enum { ...@@ -146,6 +148,7 @@ enum {
Opt_mds_namespace, Opt_mds_namespace,
Opt_recover_session, Opt_recover_session,
Opt_source, Opt_source,
Opt_mon_addr,
/* string args above */ /* string args above */
Opt_dirstat, Opt_dirstat,
Opt_rbytes, Opt_rbytes,
...@@ -159,6 +162,7 @@ enum { ...@@ -159,6 +162,7 @@ enum {
Opt_quotadf, Opt_quotadf,
Opt_copyfrom, Opt_copyfrom,
Opt_wsync, Opt_wsync,
Opt_pagecache,
}; };
enum ceph_recover_session_mode { enum ceph_recover_session_mode {
...@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = { ...@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize), fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname), fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source), fsparam_string ("source", Opt_source),
fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize), fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync), fsparam_flag_no ("wsync", Opt_wsync),
fsparam_flag_no ("pagecache", Opt_pagecache),
{} {}
}; };
...@@ -228,9 +234,92 @@ static void canonicalize_path(char *path) ...@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
} }
/* /*
* Parse the source parameter. Distinguish the server list from the path. * Check if the mds namespace in ceph_mount_options matches
* the passed in namespace string. First time match (when
* ->mds_namespace is NULL) is treated specially, since
* ->mds_namespace needs to be initialized by the caller.
*/
static int namespace_equals(struct ceph_mount_options *fsopt,
const char *namespace, size_t len)
{
return !(fsopt->mds_namespace &&
(strlen(fsopt->mds_namespace) != len ||
strncmp(fsopt->mds_namespace, namespace, len)));
}
static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
struct fs_context *fc)
{
int r;
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
if (*dev_name_end != ':')
return invalfc(fc, "separator ':' missing in source");
r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
pctx->copts, fc->log.log, ',');
if (r)
return r;
fsopt->new_dev_syntax = false;
return 0;
}
static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
struct fs_context *fc)
{
size_t len;
struct ceph_fsid fsid;
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
char *fsid_start, *fs_name_start;
if (*dev_name_end != '=') {
dout("separator '=' missing in source");
return -EINVAL;
}
fsid_start = strchr(dev_name, '@');
if (!fsid_start)
return invalfc(fc, "missing cluster fsid");
++fsid_start; /* start of cluster fsid */
fs_name_start = strchr(fsid_start, '.');
if (!fs_name_start)
return invalfc(fc, "missing file system name");
if (ceph_parse_fsid(fsid_start, &fsid))
return invalfc(fc, "Invalid FSID");
++fs_name_start; /* start of file system name */
len = dev_name_end - fs_name_start;
if (!namespace_equals(fsopt, fs_name_start, len))
return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
if (!fsopt->mds_namespace)
return -ENOMEM;
dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
fsopt->new_dev_syntax = true;
return 0;
}
/*
* Parse the source parameter for new device format. Distinguish the device
* spec from the path. Try parsing new device format and fallback to old
* format if needed.
*
* New device syntax will looks like:
* <device_spec>=/<path>
* where
* <device_spec> is name@fsid.fsname
* <path> is optional, but if present must begin with '/'
* (monitor addresses are passed via mount option)
* *
* The source will look like: * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>] * <server_spec>[,<server_spec>...]:[<path>]
* where * where
* <server_spec> is <ip>[:<port>] * <server_spec> is <ip>[:<port>]
...@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) ...@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name); dev_name_end = dev_name + strlen(dev_name);
} }
dev_name_end--; /* back up to ':' separator */ dev_name_end--; /* back up to separator */
if (dev_name_end < dev_name || *dev_name_end != ':') if (dev_name_end < dev_name)
return invalfc(fc, "No path or : separator in source"); return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path) if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path); dout("server path '%s'\n", fsopt->server_path);
ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, dout("trying new device syntax");
pctx->copts, fc->log.log); ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
if (ret) if (ret) {
return ret; if (ret != -EINVAL)
return ret;
dout("trying old device syntax");
ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
if (ret)
return ret;
}
fc->source = param->string; fc->source = param->string;
param->string = NULL; param->string = NULL;
return 0; return 0;
} }
static int ceph_parse_mon_addr(struct fs_parameter *param,
struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
kfree(fsopt->mon_addr);
fsopt->mon_addr = param->string;
param->string = NULL;
return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
pctx->copts, fc->log.log, '/');
}
static int ceph_parse_mount_param(struct fs_context *fc, static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param) struct fs_parameter *param)
{ {
...@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc, ...@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL; param->string = NULL;
break; break;
case Opt_mds_namespace: case Opt_mds_namespace:
if (!namespace_equals(fsopt, param->string, strlen(param->string)))
return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace); kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string; fsopt->mds_namespace = param->string;
param->string = NULL; param->string = NULL;
...@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc, ...@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source) if (fc->source)
return invalfc(fc, "Multiple sources specified"); return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc); return ceph_parse_source(param, fc);
case Opt_mon_addr:
return ceph_parse_mon_addr(param, fc);
case Opt_wsize: case Opt_wsize:
if (result.uint_32 < PAGE_SIZE || if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE) result.uint_32 > CEPH_MAX_WRITE_SIZE)
...@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc, ...@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break; break;
case Opt_pagecache:
if (result.negated)
fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
else
fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
break;
default: default:
BUG(); BUG();
} }
...@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args) ...@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace); kfree(args->mds_namespace);
kfree(args->server_path); kfree(args->server_path);
kfree(args->fscache_uniq); kfree(args->fscache_uniq);
kfree(args->mon_addr);
kfree(args); kfree(args);
} }
...@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ...@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret) if (ret)
return ret; return ret;
ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
if (ret)
return ret;
return ceph_compare_options(new_opt, fsc->client); return ceph_compare_options(new_opt, fsc->client);
} }
...@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) ...@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom"); seq_puts(m, ",copyfrom");
if (fsopt->mds_namespace) /* dump mds_namespace when old device syntax is in use */
if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace); seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
if (fsopt->mon_addr)
seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean"); seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)) if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync"); seq_puts(m, ",wsync");
if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
seq_puts(m, ",nopagecache");
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize); seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE) if (fsopt->rsize != CEPH_MAX_READ_SIZE)
...@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) ...@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc) static int ceph_get_tree(struct fs_context *fc)
{ {
struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb; struct super_block *sb;
struct ceph_fs_client *fsc; struct ceph_fs_client *fsc;
struct dentry *res; struct dentry *res;
...@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc) ...@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source) if (!fc->source)
return invalfc(fc, "No source"); return invalfc(fc, "No source");
if (fsopt->new_dev_syntax && !fsopt->mon_addr)
return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */ /* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts); fsc = create_fs_client(pctx->opts, pctx->copts);
...@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc) ...@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS); ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
kfree(fsc->mount_options->mon_addr);
fsc->mount_options->mon_addr = fsopt->mon_addr;
fsopt->mon_addr = NULL;
pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
}
sync_filesystem(fc->root->d_sb); sync_filesystem(fc->root->d_sb);
return 0; return 0;
} }
...@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false; ...@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644); module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
/* for both v1 and v2 syntax */
static bool mount_support = true;
static const struct kernel_param_ops param_ops_mount_syntax = {
.get = param_get_bool,
};
module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
module_init(init_ceph); module_init(init_ceph);
module_exit(exit_ceph); module_exit(exit_ceph);
......
...@@ -24,13 +24,11 @@ ...@@ -24,13 +24,11 @@
#include <linux/fscache.h> #include <linux/fscache.h>
#endif #endif
/* f_type in struct statfs */
#define CEPH_SUPER_MAGIC 0x00c36400
/* large granularity for statfs utilization stats to facilitate /* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */ * large volume sizes on 32-bit machines. */
#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK_SHIFT 22 /* 4 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
#define CEPH_4K_BLOCK_SHIFT 12 /* 4 KB */
#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */ #define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
...@@ -44,6 +42,7 @@ ...@@ -44,6 +42,7 @@
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */ #define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */ #define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
#define CEPH_MOUNT_OPT_DEFAULT \ #define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \ (CEPH_MOUNT_OPT_DCACHE | \
...@@ -88,6 +87,8 @@ struct ceph_mount_options { ...@@ -88,6 +87,8 @@ struct ceph_mount_options {
unsigned int max_readdir; /* max readdir result (entries) */ unsigned int max_readdir; /* max readdir result (entries) */
unsigned int max_readdir_bytes; /* max readdir result (bytes) */ unsigned int max_readdir_bytes; /* max readdir result (bytes) */
bool new_dev_syntax;
/* /*
* everything above this point can be memcmp'd; everything below * everything above this point can be memcmp'd; everything below
* is handled in compare_mount_options() * is handled in compare_mount_options()
...@@ -97,6 +98,7 @@ struct ceph_mount_options { ...@@ -97,6 +98,7 @@ struct ceph_mount_options {
char *mds_namespace; /* default NULL */ char *mds_namespace; /* default NULL */
char *server_path; /* default NULL (means "/") */ char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */ char *fscache_uniq; /* default NULL */
char *mon_addr;
}; };
struct ceph_fs_client { struct ceph_fs_client {
...@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data) ...@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
* *
* These come from src/mds/mdstypes.h in the ceph sources. * These come from src/mds/mdstypes.h in the ceph sources.
*/ */
#define CEPH_MAX_MDS 0x100 #define CEPH_MAX_MDS 0x100
#define CEPH_NUM_STRAY 10 #define CEPH_NUM_STRAY 10
#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS) #define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS)
#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY)) #define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino) static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
{ {
if (vino.ino < CEPH_INO_SYSTEM_BASE && if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) { vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino); return false;
return true;
} /* Don't warn on mdsdirs */
return false; WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
"Attempt to access reserved inode number 0x%llx",
vino.ino);
return true;
} }
static inline struct inode *ceph_find_inode(struct super_block *sb, static inline struct inode *ceph_find_inode(struct super_block *sb,
......
...@@ -295,12 +295,13 @@ extern bool libceph_compatible(void *data); ...@@ -295,12 +295,13 @@ extern bool libceph_compatible(void *data);
extern const char *ceph_msg_type_name(int type); extern const char *ceph_msg_type_name(int type);
extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
extern int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid);
struct fs_parameter; struct fs_parameter;
struct fc_log; struct fc_log;
struct ceph_options *ceph_alloc_options(void); struct ceph_options *ceph_alloc_options(void);
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
struct fc_log *l); struct fc_log *l, char delim);
int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
struct fc_log *l); struct fc_log *l);
int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
......
...@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); ...@@ -532,7 +532,7 @@ extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr, struct ceph_entity_addr *addr,
int max_count, int *count); int max_count, int *count, char delim);
extern int ceph_msgr_init(void); extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void); extern void ceph_msgr_exit(void);
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define AFFS_SUPER_MAGIC 0xadff #define AFFS_SUPER_MAGIC 0xadff
#define AFS_SUPER_MAGIC 0x5346414F #define AFS_SUPER_MAGIC 0x5346414F
#define AUTOFS_SUPER_MAGIC 0x0187 #define AUTOFS_SUPER_MAGIC 0x0187
#define CEPH_SUPER_MAGIC 0x00c36400
#define CODA_SUPER_MAGIC 0x73757245 #define CODA_SUPER_MAGIC 0x73757245
#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */
......
...@@ -190,14 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt, ...@@ -190,14 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
} }
EXPORT_SYMBOL(ceph_compare_options); EXPORT_SYMBOL(ceph_compare_options);
static int parse_fsid(const char *str, struct ceph_fsid *fsid) int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{ {
int i = 0; int i = 0;
char tmp[3]; char tmp[3];
int err = -EINVAL; int err = -EINVAL;
int d; int d;
dout("parse_fsid '%s'\n", str); dout("%s '%s'\n", __func__, str);
tmp[2] = 0; tmp[2] = 0;
while (*str && i < 16) { while (*str && i < 16) {
if (ispunct(*str)) { if (ispunct(*str)) {
...@@ -217,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid) ...@@ -217,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16) if (i == 16)
err = 0; err = 0;
dout("parse_fsid ret %d got fsid %pU\n", err, fsid); dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err; return err;
} }
EXPORT_SYMBOL(ceph_parse_fsid);
/* /*
* ceph options * ceph options
...@@ -395,14 +396,14 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name, ...@@ -395,14 +396,14 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name,
} }
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
struct fc_log *l) struct fc_log *l, char delim)
{ {
struct p_log log = {.prefix = "libceph", .log = l}; struct p_log log = {.prefix = "libceph", .log = l};
int ret; int ret;
/* ip1[:port1][,ip2[:port2]...] */ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON, ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
&opt->num_mon); &opt->num_mon, delim);
if (ret) { if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret); error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret; return ret;
...@@ -428,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, ...@@ -428,8 +429,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip: case Opt_ip:
err = ceph_parse_ips(param->string, err = ceph_parse_ips(param->string,
param->string + param->size, param->string + param->size,
&opt->my_addr, &opt->my_addr, 1, NULL, ',');
1, NULL);
if (err) { if (err) {
error_plog(&log, "Failed to parse ip: %d", err); error_plog(&log, "Failed to parse ip: %d", err);
return err; return err;
...@@ -438,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, ...@@ -438,7 +438,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break; break;
case Opt_fsid: case Opt_fsid:
err = parse_fsid(param->string, &opt->fsid); err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) { if (err) {
error_plog(&log, "Failed to parse fsid: %d", err); error_plog(&log, "Failed to parse fsid: %d", err);
return err; return err;
......
...@@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen, ...@@ -1267,30 +1267,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/ */
int ceph_parse_ips(const char *c, const char *end, int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr, struct ceph_entity_addr *addr,
int max_count, int *count) int max_count, int *count, char delim)
{ {
int i, ret = -EINVAL; int i, ret = -EINVAL;
const char *p = c; const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c); dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) { for (i = 0; i < max_count; i++) {
char cur_delim = delim;
const char *ipend; const char *ipend;
int port; int port;
char delim = ',';
if (*p == '[') { if (*p == '[') {
delim = ']'; cur_delim = ']';
p++; p++;
} }
ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
&ipend);
if (ret) if (ret)
goto bad; goto bad;
ret = -EINVAL; ret = -EINVAL;
p = ipend; p = ipend;
if (delim == ']') { if (cur_delim == ']') {
if (*p != ']') { if (*p != ']') {
dout("missing matching ']'\n"); dout("missing matching ']'\n");
goto bad; goto bad;
...@@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end, ...@@ -1326,11 +1327,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0; addr[i].nonce = 0;
dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end) if (p == end)
break; break;
if (*p != ',') if (*p != delim)
goto bad; goto bad;
p++; p++;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册