提交 0a78ac4b 编写于 作者: L Linus Torvalds

Merge tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main things are support for cephx v2 authentication protocol and
  basic support for rbd images within namespaces (myself).

  Also included are y2038 conversion patches from Arnd, a pile of
  miscellaneous fixes from Chengguang and Zheng's feature bit
  infrastructure for the filesystem"

* tag 'ceph-for-4.19-rc1' of git://github.com/ceph/ceph-client: (40 commits)
  ceph: don't drop message if it contains more data than expected
  ceph: support cephfs' own feature bits
  crush: fix using plain integer as NULL warning
  libceph: remove unnecessary non NULL check for request_key
  ceph: refactor error handling code in ceph_reserve_caps()
  ceph: refactor ceph_unreserve_caps()
  ceph: change to void return type for __do_request()
  ceph: compare fsc->max_file_size and inode->i_size for max file size limit
  ceph: add additional size check in ceph_setattr()
  ceph: add additional offset check in ceph_write_iter()
  ceph: add additional range check in ceph_fallocate()
  ceph: add new field max_file_size in ceph_fs_client
  libceph: weaken sizeof check in ceph_x_verify_authorizer_reply()
  libceph: check authorizer reply/challenge length before reading
  libceph: implement CEPHX_V2 calculation mode
  libceph: add authorizer challenge
  libceph: factor out encrypt_authorizer()
  libceph: factor out __ceph_x_decrypt()
  libceph: factor out __prepare_write_connect()
  libceph: store ceph_auth_handshake pointer in ceph_connection
  ...
......@@ -181,6 +181,7 @@ struct rbd_image_header {
struct rbd_spec {
u64 pool_id;
const char *pool_name;
const char *pool_ns; /* NULL if default, never "" */
const char *image_id;
const char *image_name;
......@@ -735,6 +736,7 @@ enum {
Opt_lock_timeout,
Opt_last_int,
/* int args above */
Opt_pool_ns,
Opt_last_string,
/* string args above */
Opt_read_only,
......@@ -749,6 +751,7 @@ static match_table_t rbd_opts_tokens = {
{Opt_queue_depth, "queue_depth=%d"},
{Opt_lock_timeout, "lock_timeout=%d"},
/* int args above */
{Opt_pool_ns, "_pool_ns=%s"},
/* string args above */
{Opt_read_only, "read_only"},
{Opt_read_only, "ro"}, /* Alternate spelling */
......@@ -776,9 +779,14 @@ struct rbd_options {
#define RBD_EXCLUSIVE_DEFAULT false
#define RBD_TRIM_DEFAULT true
struct parse_rbd_opts_ctx {
struct rbd_spec *spec;
struct rbd_options *opts;
};
static int parse_rbd_opts_token(char *c, void *private)
{
struct rbd_options *rbd_opts = private;
struct parse_rbd_opts_ctx *pctx = private;
substring_t argstr[MAX_OPT_ARGS];
int token, intval, ret;
......@@ -786,7 +794,7 @@ static int parse_rbd_opts_token(char *c, void *private)
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) at '%s'\n", c);
pr_err("bad option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
......@@ -802,7 +810,7 @@ static int parse_rbd_opts_token(char *c, void *private)
pr_err("queue_depth out of range\n");
return -EINVAL;
}
rbd_opts->queue_depth = intval;
pctx->opts->queue_depth = intval;
break;
case Opt_lock_timeout:
/* 0 is "wait forever" (i.e. infinite timeout) */
......@@ -810,22 +818,28 @@ static int parse_rbd_opts_token(char *c, void *private)
pr_err("lock_timeout out of range\n");
return -EINVAL;
}
rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000);
pctx->opts->lock_timeout = msecs_to_jiffies(intval * 1000);
break;
case Opt_pool_ns:
kfree(pctx->spec->pool_ns);
pctx->spec->pool_ns = match_strdup(argstr);
if (!pctx->spec->pool_ns)
return -ENOMEM;
break;
case Opt_read_only:
rbd_opts->read_only = true;
pctx->opts->read_only = true;
break;
case Opt_read_write:
rbd_opts->read_only = false;
pctx->opts->read_only = false;
break;
case Opt_lock_on_read:
rbd_opts->lock_on_read = true;
pctx->opts->lock_on_read = true;
break;
case Opt_exclusive:
rbd_opts->exclusive = true;
pctx->opts->exclusive = true;
break;
case Opt_notrim:
rbd_opts->trim = false;
pctx->opts->trim = false;
break;
default:
/* libceph prints "bad option" msg */
......@@ -1452,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
struct ceph_osd_request *osd_req = obj_request->osd_req;
osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts(&osd_req->r_mtime);
ktime_get_real_ts64(&osd_req->r_mtime);
osd_req->r_data_offset = obj_request->ex.oe_off;
}
......@@ -1475,7 +1489,13 @@ rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
req->r_callback = rbd_osd_req_callback;
req->r_priv = obj_req;
/*
* Data objects may be stored in a separate pool, but always in
* the same namespace in that pool as the header in its pool.
*/
ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
req->r_base_oloc.pool = rbd_dev->layout.pool_id;
if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
goto err_req;
......@@ -4119,6 +4139,14 @@ static ssize_t rbd_pool_id_show(struct device *dev,
(unsigned long long) rbd_dev->spec->pool_id);
}
static ssize_t rbd_pool_ns_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
}
static ssize_t rbd_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
......@@ -4217,6 +4245,7 @@ static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
static DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL);
static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
......@@ -4235,6 +4264,7 @@ static struct attribute *rbd_attrs[] = {
&dev_attr_config_info.attr,
&dev_attr_pool.attr,
&dev_attr_pool_id.attr,
&dev_attr_pool_ns.attr,
&dev_attr_name.attr,
&dev_attr_image_id.attr,
&dev_attr_current_snap.attr,
......@@ -4295,6 +4325,7 @@ static void rbd_spec_free(struct kref *kref)
struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
kfree(spec->pool_name);
kfree(spec->pool_ns);
kfree(spec->image_id);
kfree(spec->image_name);
kfree(spec->snap_name);
......@@ -4353,6 +4384,12 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
rbd_dev->header.data_pool_id = CEPH_NOPOOL;
ceph_oid_init(&rbd_dev->header_oid);
rbd_dev->header_oloc.pool = spec->pool_id;
if (spec->pool_ns) {
WARN_ON(!*spec->pool_ns);
rbd_dev->header_oloc.pool_ns =
ceph_find_or_create_string(spec->pool_ns,
strlen(spec->pool_ns));
}
mutex_init(&rbd_dev->watch_mutex);
rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
......@@ -4633,6 +4670,17 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
parent_spec->pool_id = pool_id;
parent_spec->image_id = image_id;
parent_spec->snap_id = snap_id;
/* TODO: support cloning across namespaces */
if (rbd_dev->spec->pool_ns) {
parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
GFP_KERNEL);
if (!parent_spec->pool_ns) {
ret = -ENOMEM;
goto out_err;
}
}
rbd_dev->parent_spec = parent_spec;
parent_spec = NULL; /* rbd_dev now owns this */
} else {
......@@ -5146,8 +5194,7 @@ static int rbd_add_parse_args(const char *buf,
const char *mon_addrs;
char *snap_name;
size_t mon_addrs_size;
struct rbd_spec *spec = NULL;
struct rbd_options *rbd_opts = NULL;
struct parse_rbd_opts_ctx pctx = { 0 };
struct ceph_options *copts;
int ret;
......@@ -5171,22 +5218,22 @@ static int rbd_add_parse_args(const char *buf,
goto out_err;
}
spec = rbd_spec_alloc();
if (!spec)
pctx.spec = rbd_spec_alloc();
if (!pctx.spec)
goto out_mem;
spec->pool_name = dup_token(&buf, NULL);
if (!spec->pool_name)
pctx.spec->pool_name = dup_token(&buf, NULL);
if (!pctx.spec->pool_name)
goto out_mem;
if (!*spec->pool_name) {
if (!*pctx.spec->pool_name) {
rbd_warn(NULL, "no pool name provided");
goto out_err;
}
spec->image_name = dup_token(&buf, NULL);
if (!spec->image_name)
pctx.spec->image_name = dup_token(&buf, NULL);
if (!pctx.spec->image_name)
goto out_mem;
if (!*spec->image_name) {
if (!*pctx.spec->image_name) {
rbd_warn(NULL, "no image name provided");
goto out_err;
}
......@@ -5207,24 +5254,24 @@ static int rbd_add_parse_args(const char *buf,
if (!snap_name)
goto out_mem;
*(snap_name + len) = '\0';
spec->snap_name = snap_name;
pctx.spec->snap_name = snap_name;
/* Initialize all rbd options to the defaults */
rbd_opts = kzalloc(sizeof (*rbd_opts), GFP_KERNEL);
if (!rbd_opts)
pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
if (!pctx.opts)
goto out_mem;
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
rbd_opts->trim = RBD_TRIM_DEFAULT;
pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1,
parse_rbd_opts_token, rbd_opts);
mon_addrs + mon_addrs_size - 1,
parse_rbd_opts_token, &pctx);
if (IS_ERR(copts)) {
ret = PTR_ERR(copts);
goto out_err;
......@@ -5232,15 +5279,15 @@ static int rbd_add_parse_args(const char *buf,
kfree(options);
*ceph_opts = copts;
*opts = rbd_opts;
*rbd_spec = spec;
*opts = pctx.opts;
*rbd_spec = pctx.spec;
return 0;
out_mem:
ret = -ENOMEM;
out_err:
kfree(rbd_opts);
rbd_spec_put(spec);
kfree(pctx.opts);
rbd_spec_put(pctx.spec);
kfree(options);
return ret;
......@@ -5586,8 +5633,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_register_watch(rbd_dev);
if (ret) {
if (ret == -ENOENT)
pr_info("image %s/%s does not exist\n",
pr_info("image %s/%s%s%s does not exist\n",
rbd_dev->spec->pool_name,
rbd_dev->spec->pool_ns ?: "",
rbd_dev->spec->pool_ns ? "/" : "",
rbd_dev->spec->image_name);
goto err_out_format;
}
......@@ -5609,8 +5658,10 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
ret = rbd_spec_fill_names(rbd_dev);
if (ret) {
if (ret == -ENOENT)
pr_info("snap %s/%s@%s does not exist\n",
pr_info("snap %s/%s%s%s@%s does not exist\n",
rbd_dev->spec->pool_name,
rbd_dev->spec->pool_ns ?: "",
rbd_dev->spec->pool_ns ? "/" : "",
rbd_dev->spec->image_name,
rbd_dev->spec->snap_name);
goto err_out_probe;
......
......@@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode,
struct posix_acl *ceph_get_acl(struct inode *inode, int type)
{
int size;
unsigned int retry_cnt = 0;
const char *name;
char *value = NULL;
struct posix_acl *acl;
......@@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
BUG();
}
retry:
size = __ceph_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
......@@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
size = __ceph_getxattr(inode, name, value, size);
}
if (size > 0)
if (size == -ERANGE && retry_cnt < 10) {
retry_cnt++;
kfree(value);
value = NULL;
goto retry;
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
else if (size == -ERANGE || size == -ENODATA || size == 0)
} else if (size == -ENODATA || size == 0) {
acl = NULL;
else
} else {
pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n",
ceph_vinop(inode), size);
acl = ERR_PTR(-EIO);
}
kfree(value);
......@@ -89,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
const char *name = NULL;
char *value = NULL;
struct iattr newattrs;
struct timespec64 old_ctime = inode->i_ctime;
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
switch (type) {
......@@ -133,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (new_mode != old_mode) {
newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
ret = __ceph_setattr(inode, &newattrs);
if (ret)
goto out_free;
......@@ -142,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = __ceph_setxattr(inode, name, value, size, 0);
if (ret) {
if (new_mode != old_mode) {
newattrs.ia_ctime = old_ctime;
newattrs.ia_mode = old_mode;
newattrs.ia_valid = ATTR_MODE;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
__ceph_setattr(inode, &newattrs);
}
goto out_free;
......@@ -171,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
return err;
if (acl) {
int ret = posix_acl_equiv_mode(acl, mode);
if (ret < 0)
err = posix_acl_equiv_mode(acl, mode);
if (err < 0)
goto out_err;
if (ret == 0) {
if (err == 0) {
posix_acl_release(acl);
acl = NULL;
}
......
......@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
*/
static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
{
struct timespec ts;
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_fs_client *fsc;
......@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
set_page_writeback(page);
ts = timespec64_to_timespec(inode->i_mtime);
err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, snapc, page_off, len,
ceph_wbc.truncate_seq,
ceph_wbc.truncate_size,
&ts, &page, 1);
&inode->i_mtime, &page, 1);
if (err < 0) {
struct writeback_control tmp_wbc;
if (!wbc)
......@@ -1134,7 +1132,7 @@ static int ceph_writepages_start(struct address_space *mapping,
pages = NULL;
}
req->r_mtime = timespec64_to_timespec(inode->i_mtime);
req->r_mtime = inode->i_mtime;
rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
BUG_ON(rc);
req = NULL;
......@@ -1431,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset)
/*
* vm ops
*/
static int ceph_filemap_fault(struct vm_fault *vmf)
static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
......@@ -1439,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_SHIFT;
int want, got, ret;
int want, got, err;
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;
ceph_block_sigs(&oldset);
......@@ -1452,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
if (ret < 0)
err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
if (err < 0)
goto out_restore;
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
......@@ -1465,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
inode, off, (size_t)PAGE_SIZE,
ceph_cap_string(got), ret);
} else
ret = -EAGAIN;
err = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
if (pinned_page)
put_page(pinned_page);
ceph_put_cap_refs(ci, got);
if (ret != -EAGAIN)
if (err != -EAGAIN)
goto out_restore;
/* read inline data */
......@@ -1482,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
/* does not support inline data > PAGE_SIZE */
ret = VM_FAULT_SIGBUS;
} else {
int ret1;
struct address_space *mapping = inode->i_mapping;
struct page *page = find_or_create_page(mapping, 0,
mapping_gfp_constraint(mapping,
......@@ -1491,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_OOM;
goto out_inline;
}
ret1 = __ceph_do_getattr(inode, page,
err = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, true);
if (ret1 < 0 || off >= i_size_read(inode)) {
if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
if (ret1 < 0)
ret = ret1;
if (err == -ENOMEM)
ret = VM_FAULT_OOM;
else
ret = VM_FAULT_SIGBUS;
goto out_inline;
}
if (ret1 < PAGE_SIZE)
zero_user_segment(page, ret1, PAGE_SIZE);
if (err < PAGE_SIZE)
zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
inode, off, (size_t)PAGE_SIZE, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
if (ret < 0)
ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
if (err < 0)
ret = vmf_error(err);
return ret;
}
......@@ -1524,7 +1523,7 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
/*
* Reuse write_begin here for simplicity.
*/
static int ceph_page_mkwrite(struct vm_fault *vmf)
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
......@@ -1535,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
loff_t off = page_offset(page);
loff_t size = i_size_read(inode);
size_t len;
int want, got, ret;
int want, got, err;
sigset_t oldset;
vm_fault_t ret = VM_FAULT_SIGBUS;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
......@@ -1550,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
lock_page(page);
locked_page = page;
}
ret = ceph_uninline_data(vma->vm_file, locked_page);
err = ceph_uninline_data(vma->vm_file, locked_page);
if (locked_page)
unlock_page(locked_page);
if (ret < 0)
if (err < 0)
goto out_free;
}
......@@ -1570,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
&got, NULL);
if (ret < 0)
if (err < 0)
goto out_free;
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
......@@ -1590,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
break;
}
ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
if (ret >= 0) {
err = ceph_update_writeable_page(vma->vm_file, off, len, page);
if (err >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
}
} while (ret == -EAGAIN);
} while (err == -EAGAIN);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
......@@ -1610,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf)
__mark_inode_dirty(inode, dirty);
}
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
out_free:
ceph_restore_sigs(&oldset);
ceph_free_cap_flush(prealloc_cf);
if (ret < 0)
ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
if (err < 0)
ret = vmf_error(err);
return ret;
}
......@@ -1734,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out;
}
req->r_mtime = timespec64_to_timespec(inode->i_mtime);
req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
......@@ -1776,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
goto out_put;
}
req->r_mtime = timespec64_to_timespec(inode->i_mtime);
req->r_mtime = inode->i_mtime;
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
......@@ -1937,7 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
0, false, true);
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
wr_req->r_mtime = ci->vfs_inode.i_mtime;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
......
......@@ -25,8 +25,9 @@
#include "cache.h"
struct ceph_aux_inode {
u64 version;
struct timespec mtime;
u64 version;
u64 mtime_sec;
u64 mtime_nsec;
};
struct fscache_netfs ceph_cache_netfs = {
......@@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
aux.mtime = timespec64_to_timespec(inode->i_mtime);
aux.mtime_sec = inode->i_mtime.tv_sec;
aux.mtime_nsec = inode->i_mtime.tv_nsec;
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
......@@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
if (!ci->fscache) {
memset(&aux, 0, sizeof(aux));
aux.version = ci->i_version;
aux.mtime = timespec64_to_timespec(inode->i_mtime);
aux.mtime_sec = inode->i_mtime.tv_sec;
aux.mtime_nsec = inode->i_mtime.tv_nsec;
ci->fscache = fscache_acquire_cookie(fsc->fscache,
&ceph_fscache_inode_object_def,
&ci->i_vino, sizeof(ci->i_vino),
......
......@@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
spin_unlock(&mdsc->caps_list_lock);
}
static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
{
struct ceph_cap *cap;
int i;
if (nr_caps) {
BUG_ON(mdsc->caps_reserve_count < nr_caps);
mdsc->caps_reserve_count -= nr_caps;
if (mdsc->caps_avail_count >=
mdsc->caps_reserve_count + mdsc->caps_min_count) {
mdsc->caps_total_count -= nr_caps;
for (i = 0; i < nr_caps; i++) {
cap = list_first_entry(&mdsc->caps_list,
struct ceph_cap, caps_item);
list_del(&cap->caps_item);
kmem_cache_free(ceph_cap_cachep, cap);
}
} else {
mdsc->caps_avail_count += nr_caps;
}
dout("%s: caps %d = %d used + %d resv + %d avail\n",
__func__,
mdsc->caps_total_count, mdsc->caps_use_count,
mdsc->caps_reserve_count, mdsc->caps_avail_count);
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
}
}
/*
* Called under mdsc->mutex.
*/
......@@ -167,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
int have;
int alloc = 0;
int max_caps;
int err = 0;
bool trimmed = false;
struct ceph_mds_session *s;
LIST_HEAD(newcaps);
......@@ -233,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
ctx, need, have + alloc);
goto out_nomem;
err = -ENOMEM;
break;
}
if (!err) {
BUG_ON(have + alloc != need);
ctx->count = need;
}
BUG_ON(have + alloc != need);
spin_lock(&mdsc->caps_list_lock);
mdsc->caps_total_count += alloc;
......@@ -245,77 +282,26 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
if (err)
__ceph_unreserve_caps(mdsc, have + alloc);
spin_unlock(&mdsc->caps_list_lock);
ctx->count = need;
dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
ctx, mdsc->caps_total_count, mdsc->caps_use_count,
mdsc->caps_reserve_count, mdsc->caps_avail_count);
return 0;
out_nomem:
spin_lock(&mdsc->caps_list_lock);
mdsc->caps_avail_count += have;
mdsc->caps_reserve_count -= have;
while (!list_empty(&newcaps)) {
cap = list_first_entry(&newcaps,
struct ceph_cap, caps_item);
list_del(&cap->caps_item);
/* Keep some preallocated caps around (ceph_min_count), to
* avoid lots of free/alloc churn. */
if (mdsc->caps_avail_count >=
mdsc->caps_reserve_count + mdsc->caps_min_count) {
kmem_cache_free(ceph_cap_cachep, cap);
} else {
mdsc->caps_avail_count++;
mdsc->caps_total_count++;
list_add(&cap->caps_item, &mdsc->caps_list);
}
}
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
spin_unlock(&mdsc->caps_list_lock);
return -ENOMEM;
return err;
}
int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx)
{
int i;
struct ceph_cap *cap;
dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
if (ctx->count) {
spin_lock(&mdsc->caps_list_lock);
BUG_ON(mdsc->caps_reserve_count < ctx->count);
mdsc->caps_reserve_count -= ctx->count;
if (mdsc->caps_avail_count >=
mdsc->caps_reserve_count + mdsc->caps_min_count) {
mdsc->caps_total_count -= ctx->count;
for (i = 0; i < ctx->count; i++) {
cap = list_first_entry(&mdsc->caps_list,
struct ceph_cap, caps_item);
list_del(&cap->caps_item);
kmem_cache_free(ceph_cap_cachep, cap);
}
} else {
mdsc->caps_avail_count += ctx->count;
}
ctx->count = 0;
dout("unreserve caps %d = %d used + %d resv + %d avail\n",
mdsc->caps_total_count, mdsc->caps_use_count,
mdsc->caps_reserve_count, mdsc->caps_avail_count);
BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
mdsc->caps_reserve_count +
mdsc->caps_avail_count);
spin_unlock(&mdsc->caps_list_lock);
}
return 0;
spin_lock(&mdsc->caps_list_lock);
__ceph_unreserve_caps(mdsc, ctx->count);
ctx->count = 0;
spin_unlock(&mdsc->caps_list_lock);
}
struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
......@@ -1125,7 +1111,7 @@ struct cap_msg_args {
u64 flush_tid, oldest_flush_tid, size, max_size;
u64 xattr_version;
struct ceph_buffer *xattr_buf;
struct timespec atime, mtime, ctime;
struct timespec64 atime, mtime, ctime;
int op, caps, wanted, dirty;
u32 seq, issue_seq, mseq, time_warp_seq;
u32 flags;
......@@ -1146,7 +1132,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
struct ceph_msg *msg;
void *p;
size_t extra_len;
struct timespec zerotime = {0};
struct timespec64 zerotime = {0};
struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
......@@ -1186,9 +1172,9 @@ static int send_cap_msg(struct cap_msg_args *arg)
fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
ceph_encode_timespec(&fc->mtime, &arg->mtime);
ceph_encode_timespec(&fc->atime, &arg->atime);
ceph_encode_timespec(&fc->ctime, &arg->ctime);
ceph_encode_timespec64(&fc->mtime, &arg->mtime);
ceph_encode_timespec64(&fc->atime, &arg->atime);
ceph_encode_timespec64(&fc->ctime, &arg->ctime);
fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);
fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
......@@ -1237,7 +1223,7 @@ static int send_cap_msg(struct cap_msg_args *arg)
* We just zero these out for now, as the MDS ignores them unless
* the requisite feature flags are set (which we don't do yet).
*/
ceph_encode_timespec(p, &zerotime);
ceph_encode_timespec64(p, &zerotime);
p += sizeof(struct ceph_timespec);
ceph_encode_64(&p, 0);
......@@ -1360,9 +1346,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
arg.xattr_buf = NULL;
}
arg.mtime = timespec64_to_timespec(inode->i_mtime);
arg.atime = timespec64_to_timespec(inode->i_atime);
arg.ctime = timespec64_to_timespec(inode->i_ctime);
arg.mtime = inode->i_mtime;
arg.atime = inode->i_atime;
arg.ctime = inode->i_ctime;
arg.op = op;
arg.caps = cap->implemented;
......@@ -3148,11 +3134,11 @@ static void handle_cap_grant(struct inode *inode,
}
if (newcaps & CEPH_CAP_ANY_RD) {
struct timespec mtime, atime, ctime;
struct timespec64 mtime, atime, ctime;
/* ctime/mtime/atime? */
ceph_decode_timespec(&mtime, &grant->mtime);
ceph_decode_timespec(&atime, &grant->atime);
ceph_decode_timespec(&ctime, &grant->ctime);
ceph_decode_timespec64(&mtime, &grant->mtime);
ceph_decode_timespec64(&atime, &grant->atime);
ceph_decode_timespec64(&ctime, &grant->ctime);
ceph_fill_file_time(inode, extra_info->issued,
le32_to_cpu(grant->time_warp_seq),
&ctime, &mtime, &atime);
......
......@@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
if (ceph_quota_is_max_files_exceeded(dir))
return -EDQUOT;
if (ceph_quota_is_max_files_exceeded(dir)) {
err = -EDQUOT;
goto out;
}
err = ceph_pre_init_acls(dir, &mode, &acls);
if (err < 0)
return err;
goto out;
dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
dir, dentry, mode, rdev);
......@@ -883,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
if (ceph_quota_is_max_files_exceeded(dir))
return -EDQUOT;
if (ceph_quota_is_max_files_exceeded(dir)) {
err = -EDQUOT;
goto out;
}
dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
......@@ -1393,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
" rfiles: %20lld\n"
" rsubdirs: %20lld\n"
"rbytes: %20lld\n"
"rctime: %10ld.%09ld\n",
"rctime: %10lld.%09ld\n",
ci->i_files + ci->i_subdirs,
ci->i_files,
ci->i_subdirs,
......@@ -1401,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
ci->i_rfiles,
ci->i_rsubdirs,
ci->i_rbytes,
(long)ci->i_rctime.tv_sec,
(long)ci->i_rctime.tv_nsec);
ci->i_rctime.tv_sec,
ci->i_rctime.tv_nsec);
}
if (*ppos >= dfi->dir_info_len)
......
......@@ -720,7 +720,7 @@ struct ceph_aio_request {
struct list_head osd_reqs;
unsigned num_reqs;
atomic_t pending_reqs;
struct timespec mtime;
struct timespec64 mtime;
struct ceph_cap_flush *prealloc_cf;
};
......@@ -922,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int num_pages = 0;
int flags;
int ret;
struct timespec mtime = timespec64_to_timespec(current_time(inode));
struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
......@@ -1130,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int flags;
int ret;
bool check_caps = false;
struct timespec mtime = timespec64_to_timespec(current_time(inode));
struct timespec64 mtime = current_time(inode);
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
......@@ -1383,12 +1383,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
......@@ -1414,6 +1414,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
pos = iocb->ki_pos;
if (unlikely(pos >= limit)) {
err = -EFBIG;
goto out;
} else {
iov_iter_truncate(from, limit - pos);
}
count = iov_iter_count(from);
if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
err = -EDQUOT;
......@@ -1435,7 +1442,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
/* FIXME: not complete since it doesn't account for being at quota */
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
goto out;
}
......@@ -1525,7 +1532,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
if (written >= 0) {
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL))
if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
}
......@@ -1546,6 +1553,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
loff_t i_size;
loff_t ret;
......@@ -1590,7 +1598,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
break;
}
ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size));
out:
inode_unlock(inode);
......@@ -1662,7 +1670,7 @@ static int ceph_zero_partial_object(struct inode *inode,
goto out;
}
req->r_mtime = timespec64_to_timespec(inode->i_mtime);
req->r_mtime = inode->i_mtime;
ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!ret) {
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
......@@ -1727,8 +1735,7 @@ static long ceph_fallocate(struct file *file, int mode,
struct ceph_file_info *fi = file->private_data;
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_cap_flush *prealloc_cf;
int want, got = 0;
int dirty;
......@@ -1736,6 +1743,9 @@ static long ceph_fallocate(struct file *file, int mode,
loff_t endoff = 0;
loff_t size;
if ((offset + length) > max(i_size_read(inode), fsc->max_file_size))
return -EFBIG;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
......@@ -1759,7 +1769,7 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) &&
!(mode & FALLOC_FL_PUNCH_HOLE)) {
ret = -ENOSPC;
goto unlock;
......
......@@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
}
void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec *ctime,
struct timespec *mtime, struct timespec *atime)
u64 time_warp_seq, struct timespec64 *ctime,
struct timespec64 *mtime, struct timespec64 *atime)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct timespec64 ctime64 = timespec_to_timespec64(*ctime);
struct timespec64 mtime64 = timespec_to_timespec64(*mtime);
struct timespec64 atime64 = timespec_to_timespec64(*atime);
int warn = 0;
if (issued & (CEPH_CAP_FILE_EXCL|
......@@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
timespec64_compare(&ctime64, &inode->i_ctime) > 0) {
timespec64_compare(ctime, &inode->i_ctime) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
(long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
(long long)ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = ctime64;
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = *ctime;
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
dout("mtime %lld.%09ld -> %lld.%09ld "
"tw %d -> %d\n",
(long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
(long long)mtime->tv_sec, mtime->tv_nsec,
inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
mtime->tv_sec, mtime->tv_nsec,
ci->i_time_warp_seq, (int)time_warp_seq);
inode->i_mtime = mtime64;
inode->i_atime = atime64;
inode->i_mtime = *mtime;
inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else if (time_warp_seq == ci->i_time_warp_seq) {
/* nobody did utimes(); take the max */
if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) {
if (timespec64_compare(mtime, &inode->i_mtime) > 0) {
dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
(long long)inode->i_mtime.tv_sec,
inode->i_mtime.tv_sec,
inode->i_mtime.tv_nsec,
(long long)mtime->tv_sec, mtime->tv_nsec);
inode->i_mtime = mtime64;
mtime->tv_sec, mtime->tv_nsec);
inode->i_mtime = *mtime;
}
if (timespec64_compare(&atime64, &inode->i_atime) > 0) {
if (timespec64_compare(atime, &inode->i_atime) > 0) {
dout("atime %lld.%09ld -> %lld.%09ld inc\n",
(long long)inode->i_atime.tv_sec,
inode->i_atime.tv_sec,
inode->i_atime.tv_nsec,
(long long)atime->tv_sec, atime->tv_nsec);
inode->i_atime = atime64;
atime->tv_sec, atime->tv_nsec);
inode->i_atime = *atime;
}
} else if (issued & CEPH_CAP_FILE_EXCL) {
/* we did a utimes(); ignore mds values */
......@@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else {
/* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
inode->i_ctime = ctime64;
inode->i_mtime = mtime64;
inode->i_atime = atime64;
inode->i_ctime = *ctime;
inode->i_mtime = *mtime;
inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
} else {
warn = 1;
......@@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued, new_issued, info_caps;
struct timespec mtime, atime, ctime;
struct timespec64 mtime, atime, ctime;
struct ceph_buffer *xattr_blob = NULL;
struct ceph_string *pool_ns = NULL;
struct ceph_cap *new_cap = NULL;
......@@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
/* be careful with mtime, atime, size */
ceph_decode_timespec(&atime, &info->atime);
ceph_decode_timespec(&mtime, &info->mtime);
ceph_decode_timespec(&ctime, &info->ctime);
ceph_decode_timespec64(&atime, &info->atime);
ceph_decode_timespec64(&mtime, &info->mtime);
ceph_decode_timespec64(&ctime, &info->ctime);
ceph_fill_file_time(inode, issued,
le32_to_cpu(info->time_warp_seq),
&ctime, &mtime, &atime);
......@@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
ci->i_rbytes = le64_to_cpu(info->rbytes);
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime);
ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
......@@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int err = 0;
int inode_dirty_flags = 0;
bool lock_snap_rwsem = false;
struct timespec ts;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
......@@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (ia_valid & ATTR_ATIME) {
dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
(long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
(long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_atime = attr->ia_atime;
......@@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_atime, &attr->ia_atime)) {
ts = timespec64_to_timespec(attr->ia_atime);
ceph_encode_timespec(&req->r_args.setattr.atime, &ts);
ceph_encode_timespec64(&req->r_args.setattr.atime,
&attr->ia_atime);
mask |= CEPH_SETATTR_ATIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
......@@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
if (ia_valid & ATTR_MTIME) {
dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
(long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
(long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_mtime = attr->ia_mtime;
......@@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
dirtied |= CEPH_CAP_FILE_WR;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
!timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) {
ts = timespec64_to_timespec(attr->ia_mtime);
ceph_encode_timespec(&req->r_args.setattr.mtime, &ts);
ceph_encode_timespec64(&req->r_args.setattr.mtime,
&attr->ia_mtime);
mask |= CEPH_SETATTR_MTIME;
release |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
......@@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
(long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
(long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
if (only) {
/*
......@@ -2140,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
req->r_stamp = timespec64_to_timespec(attr->ia_ctime);
req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
......@@ -2161,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int err;
if (ceph_snap(inode) != CEPH_NOSNAP)
......@@ -2170,6 +2167,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (err != 0)
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size > max(inode->i_size, fsc->max_file_size))
return -EFBIG;
if ((attr->ia_valid & ATTR_SIZE) &&
ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
return -EDQUOT;
......
......@@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
return msg;
}
static void encode_supported_features(void **p, void *end)
{
static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
static const size_t count = ARRAY_SIZE(bits);
if (count > 0) {
size_t i;
size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8;
BUG_ON(*p + 4 + size > end);
ceph_encode_32(p, size);
memset(*p, 0, size);
for (i = 0; i < count; i++)
((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8);
*p += size;
} else {
BUG_ON(*p + 4 > end);
ceph_encode_32(p, 0);
}
}
/*
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
* to include additional client metadata fields.
......@@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
int i = -1;
int metadata_bytes = 0;
int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
void *p;
void *p, *end;
const char* metadata[][2] = {
{"hostname", mdsc->nodename},
......@@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
};
/* Calculate serialized length of metadata */
metadata_bytes = 4; /* map length */
extra_bytes = 4; /* map length */
for (i = 0; metadata[i][0]; ++i) {
metadata_bytes += 8 + strlen(metadata[i][0]) +
extra_bytes += 8 + strlen(metadata[i][0]) +
strlen(metadata[i][1]);
metadata_key_count++;
}
/* supported feature */
extra_bytes += 4 + 8;
/* Allocate the message */
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes,
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
GFP_NOFS, false);
if (!msg) {
pr_err("create_session_msg ENOMEM creating msg\n");
return NULL;
}
h = msg->front.iov_base;
p = msg->front.iov_base;
end = p + msg->front.iov_len;
h = p;
h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
h->seq = cpu_to_le64(seq);
......@@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
*
* ClientSession messages with metadata are v2
*/
msg->hdr.version = cpu_to_le16(2);
msg->hdr.version = cpu_to_le16(3);
msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
p = msg->front.iov_base + sizeof(*h);
p += sizeof(*h);
/* Number of entries in the map */
ceph_encode_32(&p, metadata_key_count);
......@@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
p += val_len;
}
encode_supported_features(&p, end);
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
return msg;
}
......@@ -1779,6 +1809,7 @@ struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
struct timespec64 ts;
if (!req)
return ERR_PTR(-ENOMEM);
......@@ -1797,7 +1828,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
ktime_get_coarse_real_ts64(&ts);
req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
......@@ -2094,7 +2126,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
/* time stamp */
{
struct ceph_timespec ts;
ceph_encode_timespec(&ts, &req->r_stamp);
ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
......@@ -2187,7 +2219,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
p = msg->front.iov_base + req->r_request_release_offset;
{
struct ceph_timespec ts;
ceph_encode_timespec(&ts, &req->r_stamp);
ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(&p, &ts, sizeof(ts));
}
......@@ -2225,7 +2257,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* send request, or put it on the appropriate wait list.
*/
static int __do_request(struct ceph_mds_client *mdsc,
static void __do_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
struct ceph_mds_session *session = NULL;
......@@ -2235,7 +2267,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
goto out;
return;
}
if (req->r_timeout &&
......@@ -2258,7 +2290,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (mdsc->mdsmap->m_epoch == 0) {
dout("do_request no mdsmap, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
goto finish;
return;
}
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
......@@ -2276,7 +2308,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
dout("do_request no mds or not active, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map);
goto out;
return;
}
/* get, open session */
......@@ -2326,8 +2358,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
complete_request(mdsc, req);
__unregister_request(mdsc, req);
}
out:
return err;
return;
}
/*
......@@ -2748,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session,
int wake = 0;
/* decode */
if (msg->front.iov_len != sizeof(*h))
if (msg->front.iov_len < sizeof(*h))
goto bad;
op = le32_to_cpu(h->op);
seq = le64_to_cpu(h->seq);
......@@ -2958,15 +2989,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
struct timespec ts;
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
rec.v1.size = cpu_to_le64(inode->i_size);
ts = timespec64_to_timespec(inode->i_mtime);
ceph_encode_timespec(&rec.v1.mtime, &ts);
ts = timespec64_to_timespec(inode->i_atime);
ceph_encode_timespec(&rec.v1.atime, &ts);
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
rec.v1.pathbase = cpu_to_le64(pathbase);
}
......@@ -3378,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
dname.len = get_unaligned_le32(h + 1);
if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len)
goto bad;
dname.name = (void *)(h + 1) + sizeof(u32);
/* lookup inode */
inode = ceph_find_inode(sb, vino);
......@@ -3644,8 +3672,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
init_rwsem(&mdsc->pool_perm_rwsem);
mdsc->pool_perm_tree = RB_ROOT;
strncpy(mdsc->nodename, utsname()->nodename,
sizeof(mdsc->nodename) - 1);
strscpy(mdsc->nodename, utsname()->nodename,
sizeof(mdsc->nodename));
return 0;
}
......@@ -4019,7 +4047,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
} else {
mdsc->mdsmap = newmap; /* first mds map */
}
mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size,
MAX_LFS_FILESIZE);
__wake_requests(mdsc, &mdsc->waiting_for_map);
ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
......@@ -4155,6 +4184,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
static int add_authorizer_challenge(struct ceph_connection *con,
void *challenge_buf, int challenge_buf_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
challenge_buf, challenge_buf_len);
}
static int verify_authorizer_reply(struct ceph_connection *con)
{
......@@ -4218,6 +4257,7 @@ static const struct ceph_connection_operations mds_con_ops = {
.put = con_put,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
.add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
......
......@@ -16,6 +16,18 @@
#include <linux/ceph/mdsmap.h>
#include <linux/ceph/auth.h>
/* The first 8 bits are reserved for old ceph releases */
#define CEPHFS_FEATURE_MIMIC 8
#define CEPHFS_FEATURES_ALL { \
0, 1, 2, 3, 4, 5, 6, 7, \
CEPHFS_FEATURE_MIMIC, \
}
#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
/*
* Some lock dependencies:
*
......@@ -229,7 +241,7 @@ struct ceph_mds_request {
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
struct timespec r_stamp;
struct timespec64 r_stamp;
/* for choosing which mds to send this request to */
int r_direct_mode;
......
......@@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct inode *inode;
struct ceph_inode_info *ci;
if (msg->front.iov_len != sizeof(*h)) {
if (msg->front.iov_len < sizeof(*h)) {
pr_err("%s corrupt message mds%d len %d\n", __func__,
session->s_mds, (int)msg->front.iov_len);
ceph_msg_dump(msg);
......
......@@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
BUG_ON(capsnap->writing);
capsnap->size = inode->i_size;
capsnap->mtime = timespec64_to_timespec(inode->i_mtime);
capsnap->atime = timespec64_to_timespec(inode->i_atime);
capsnap->ctime = timespec64_to_timespec(inode->i_ctime);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
capsnap->ctime = inode->i_ctime;
capsnap->time_warp_seq = ci->i_time_warp_seq;
capsnap->truncate_size = ci->i_truncate_size;
capsnap->truncate_seq = ci->i_truncate_seq;
......
......@@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private)
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) "
"at '%s'\n", c);
pr_err("bad option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
......@@ -941,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data)
dout("set_super %p data %p\n", s, data);
s->s_flags = fsc->mount_options->sb_flags;
s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */
s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_xattr = ceph_xattr_handlers;
s->s_fs_info = fsc;
fsc->sb = s;
fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
s->s_op = &ceph_super_ops;
s->s_d_op = &ceph_dentry_ops;
......
......@@ -98,6 +98,7 @@ struct ceph_fs_client {
unsigned long mount_state;
int min_caps; /* min caps i added */
loff_t max_file_size;
struct ceph_mds_client *mdsc;
......@@ -193,7 +194,7 @@ struct ceph_cap_snap {
u64 xattr_version;
u64 size;
struct timespec mtime, atime, ctime;
struct timespec64 mtime, atime, ctime;
u64 time_warp_seq;
u64 truncate_size;
u32 truncate_seq;
......@@ -307,7 +308,7 @@ struct ceph_inode_info {
char *i_symlink;
/* for dirs */
struct timespec i_rctime;
struct timespec64 i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_files, i_subdirs;
......@@ -655,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx, int need);
extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_reservation *ctx);
extern void ceph_reservation_status(struct ceph_fs_client *client,
int *total, int *avail, int *used,
......@@ -857,8 +858,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent);
extern int ceph_fill_file_size(struct inode *inode, int issued,
u32 truncate_seq, u64 truncate_size, u64 size);
extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec *ctime,
struct timespec *mtime, struct timespec *atime);
u64 time_warp_seq, struct timespec64 *ctime,
struct timespec64 *mtime,
struct timespec64 *atime);
extern int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
......
......@@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
size_t size)
{
return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
(long)ci->i_rctime.tv_nsec);
return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec,
ci->i_rctime.tv_nsec);
}
/* quotas */
......
......@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
/* ensure that an existing authorizer is up to date */
int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth);
int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
......@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
......
......@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2) // *do not share this bit*
DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
......@@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_SERVER_JEWEL | \
CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_CRUSH_TUNABLES5 | \
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
CEPH_FEATURE_CEPHX_V2)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
......
......@@ -194,16 +194,22 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad)
} while (0)
/*
* struct ceph_timespec <-> struct timespec
* struct ceph_timespec <-> struct timespec64
*/
static inline void ceph_decode_timespec(struct timespec *ts,
const struct ceph_timespec *tv)
static inline void ceph_decode_timespec64(struct timespec64 *ts,
const struct ceph_timespec *tv)
{
ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
/*
* This will still overflow in year 2106. We could extend
* the protocol to steal two more bits from tv_nsec to
* add three more 136 year epochs after that the way ext4
* does if necessary.
*/
ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec);
ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
}
static inline void ceph_encode_timespec(struct ceph_timespec *tv,
const struct timespec *ts)
static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
const struct timespec64 *ts)
{
tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
......
......@@ -31,6 +31,9 @@ struct ceph_connection_operations {
struct ceph_auth_handshake *(*get_authorizer) (
struct ceph_connection *con,
int *proto, int force_new);
int (*add_authorizer_challenge)(struct ceph_connection *con,
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply) (struct ceph_connection *con);
int (*invalidate_authorizer)(struct ceph_connection *con);
......@@ -286,9 +289,8 @@ struct ceph_connection {
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
void *auth_reply_buf; /* where to put the authorizer reply */
int auth_reply_buf_len;
struct mutex mutex;
......@@ -330,7 +332,7 @@ struct ceph_connection {
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
......
......@@ -91,7 +91,7 @@ struct ceph_entity_inst {
#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */
#define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */
#define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */
/*
* connection negotiation
......
......@@ -199,7 +199,7 @@ struct ceph_osd_request {
/* set by submitter */
u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */
struct ceph_snap_context *r_snapc; /* for writes */
struct timespec r_mtime; /* ditto */
struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
bool r_linger; /* don't resend on failure */
......@@ -253,7 +253,7 @@ struct ceph_osd_linger_request {
struct ceph_osd_request_target t;
u32 map_dne_bound;
struct timespec mtime;
struct timespec64 mtime;
struct kref kref;
struct mutex lock;
......@@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
struct ceph_snap_context *sc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec *mtime,
struct timespec64 *mtime,
struct page **pages, int nr_pages);
/* watch/notify */
......@@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
u64 notify_id,
u64 cookie,
void *payload,
size_t payload_len);
u32 payload_len);
int ceph_osdc_notify(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
void *payload,
size_t payload_len,
u32 payload_len,
u32 timeout,
struct page ***preply_pages,
size_t *preply_len);
......
......@@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
return ceph_pagelist_append(pl, &v, 1);
}
static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
char *s, size_t len)
char *s, u32 len)
{
int ret = ceph_pagelist_encode_32(pl, len);
if (ret)
......
......@@ -41,4 +41,3 @@ config CEPH_LIB_USE_DNS_RESOLVER
Documentation/networking/dns_resolver.txt
If unsure, say N.
......@@ -15,4 +15,3 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
auth_x.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
......@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->add_authorizer_challenge)
ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
challenge_buf_len);
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
{
......
......@@ -142,4 +142,3 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
ac->ops = &ceph_auth_none_ops;
return 0;
}
......@@ -26,4 +26,3 @@ struct ceph_auth_none_info {
int ceph_auth_none_init(struct ceph_auth_client *ac);
#endif
......@@ -9,6 +9,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
......@@ -70,25 +71,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
return sizeof(u32) + ciphertext_len;
}
static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
int ciphertext_len)
{
struct ceph_x_encrypt_header *hdr = p;
int plaintext_len;
int ret;
ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
&plaintext_len);
if (ret)
return ret;
if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
pr_err("%s bad magic\n", __func__);
return -EINVAL;
}
return plaintext_len - sizeof(*hdr);
}
static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
{
struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
int ciphertext_len, plaintext_len;
int ciphertext_len;
int ret;
ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
ceph_decode_need(p, end, ciphertext_len, e_inval);
ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
&plaintext_len);
if (ret)
ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
if (ret < 0)
return ret;
if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
return -EPERM;
*p += ciphertext_len;
return plaintext_len - sizeof(struct ceph_x_encrypt_header);
return ret;
e_inval:
return -EINVAL;
......@@ -149,12 +165,12 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *dp, *dend;
int dlen;
char is_enc;
struct timespec validity;
struct timespec64 validity;
void *tp, *tpend;
void **ptp;
struct ceph_crypto_key new_session_key = { 0 };
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
time64_t new_expires, new_renew_after;
u64 new_secret_id;
int ret;
......@@ -189,11 +205,11 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
ceph_decode_timespec(&validity, dp);
ceph_decode_timespec64(&validity, dp);
dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_expires = ktime_get_real_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
dout(" expires=%llu renew_after=%llu\n", new_expires,
new_renew_after);
/* ticket blob for service */
......@@ -275,6 +291,51 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
return -EINVAL;
}
/*
* Encode and encrypt the second part (ceph_x_authorize_b) of the
* authorizer. The first part (ceph_x_authorize_a) should already be
* encoded.
*/
static int encrypt_authorizer(struct ceph_x_authorizer *au,
u64 *server_challenge)
{
struct ceph_x_authorize_a *msg_a;
struct ceph_x_authorize_b *msg_b;
void *p, *end;
int ret;
msg_a = au->buf->vec.iov_base;
WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
end = au->buf->vec.iov_base + au->buf->vec.iov_len;
msg_b = p + ceph_x_encrypt_offset();
msg_b->struct_v = 2;
msg_b->nonce = cpu_to_le64(au->nonce);
if (server_challenge) {
msg_b->have_challenge = 1;
msg_b->server_challenge_plus_one =
cpu_to_le64(*server_challenge + 1);
} else {
msg_b->have_challenge = 0;
msg_b->server_challenge_plus_one = 0;
}
ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
if (ret < 0)
return ret;
p += ret;
if (server_challenge) {
WARN_ON(p != end);
} else {
WARN_ON(p > end);
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
}
return 0;
}
static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
{
ceph_crypto_key_destroy(&au->session_key);
......@@ -291,7 +352,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
int maxlen;
struct ceph_x_authorize_a *msg_a;
struct ceph_x_authorize_b *msg_b;
void *p, *end;
int ret;
int ticket_blob_len =
(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
......@@ -335,21 +395,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
le64_to_cpu(msg_a->ticket_blob.secret_id));
p = msg_a + 1;
p += ticket_blob_len;
end = au->buf->vec.iov_base + au->buf->vec.iov_len;
msg_b = p + ceph_x_encrypt_offset();
msg_b->struct_v = 1;
get_random_bytes(&au->nonce, sizeof(au->nonce));
msg_b->nonce = cpu_to_le64(au->nonce);
ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
if (ret < 0)
ret = encrypt_authorizer(au, NULL);
if (ret) {
pr_err("failed to encrypt authorizer: %d", ret);
goto out_au;
}
p += ret;
WARN_ON(p > end);
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
(int)au->buf->vec.iov_len);
return 0;
......@@ -385,13 +437,13 @@ static bool need_key(struct ceph_x_ticket_handler *th)
if (!th->have_key)
return true;
return get_seconds() >= th->renew_after;
return ktime_get_real_seconds() >= th->renew_after;
}
static bool have_key(struct ceph_x_ticket_handler *th)
{
if (th->have_key) {
if (get_seconds() >= th->expires)
if (ktime_get_real_seconds() >= th->expires)
th->have_key = false;
}
......@@ -626,6 +678,54 @@ static int ceph_x_update_authorizer(
return 0;
}
static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
void *challenge_buf,
int challenge_buf_len,
u64 *server_challenge)
{
struct ceph_x_authorize_challenge *ch =
challenge_buf + sizeof(struct ceph_x_encrypt_header);
int ret;
/* no leading len */
ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
challenge_buf_len);
if (ret < 0)
return ret;
if (ret < sizeof(*ch)) {
pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
return -EINVAL;
}
*server_challenge = le64_to_cpu(ch->server_challenge);
return 0;
}
static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 server_challenge;
int ret;
ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
&server_challenge);
if (ret) {
pr_err("failed to decrypt authorize challenge: %d", ret);
return ret;
}
ret = encrypt_authorizer(au, &server_challenge);
if (ret) {
pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
return ret;
}
return 0;
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
{
......@@ -637,8 +737,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
if (ret < 0)
return ret;
if (ret != sizeof(*reply))
return -EPERM;
if (ret < sizeof(*reply)) {
pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
return -EINVAL;
}
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
......@@ -704,26 +806,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
__le64 *psig)
{
void *enc_buf = au->enc_buf;
struct {
__le32 len;
__le32 header_crc;
__le32 front_crc;
__le32 middle_crc;
__le32 data_crc;
} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
int ret;
sigblock->len = cpu_to_le32(4*sizeof(u32));
sigblock->header_crc = msg->hdr.crc;
sigblock->front_crc = msg->footer.front_crc;
sigblock->middle_crc = msg->footer.middle_crc;
sigblock->data_crc = msg->footer.data_crc;
ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
sizeof(*sigblock));
if (ret < 0)
return ret;
if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
struct {
__le32 len;
__le32 header_crc;
__le32 front_crc;
__le32 middle_crc;
__le32 data_crc;
} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
sigblock->len = cpu_to_le32(4*sizeof(u32));
sigblock->header_crc = msg->hdr.crc;
sigblock->front_crc = msg->footer.front_crc;
sigblock->middle_crc = msg->footer.middle_crc;
sigblock->data_crc = msg->footer.data_crc;
ret = ceph_x_encrypt(&au->session_key, enc_buf,
CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
if (ret < 0)
return ret;
*psig = *(__le64 *)(enc_buf + sizeof(u32));
} else {
struct {
__le32 header_crc;
__le32 front_crc;
__le32 front_len;
__le32 middle_crc;
__le32 middle_len;
__le32 data_crc;
__le32 data_len;
__le32 seq_lower_word;
} __packed *sigblock = enc_buf;
struct {
__le64 a, b, c, d;
} __packed *penc = enc_buf;
int ciphertext_len;
sigblock->header_crc = msg->hdr.crc;
sigblock->front_crc = msg->footer.front_crc;
sigblock->front_len = msg->hdr.front_len;
sigblock->middle_crc = msg->footer.middle_crc;
sigblock->middle_len = msg->hdr.middle_len;
sigblock->data_crc = msg->footer.data_crc;
sigblock->data_len = msg->hdr.data_len;
sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
/* no leading len, no ceph_x_encrypt_header */
ret = ceph_crypt(&au->session_key, true, enc_buf,
CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
&ciphertext_len);
if (ret)
return ret;
*psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
}
*psig = *(__le64 *)(enc_buf + sizeof(u32));
return 0;
}
......@@ -778,6 +918,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
.update_authorizer = ceph_x_update_authorizer,
.add_authorizer_challenge = ceph_x_add_authorizer_challenge,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
......@@ -823,5 +964,3 @@ int ceph_x_init(struct ceph_auth_client *ac)
out:
return ret;
}
......@@ -22,7 +22,7 @@ struct ceph_x_ticket_handler {
u64 secret_id;
struct ceph_buffer *ticket_blob;
unsigned long renew_after, expires;
time64_t renew_after, expires;
};
#define CEPHX_AU_ENC_BUF_LEN 128 /* big enough for encrypted blob */
......@@ -52,4 +52,3 @@ struct ceph_x_info {
int ceph_x_init(struct ceph_auth_client *ac);
#endif
......@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
struct ceph_x_authorize_b {
__u8 struct_v;
__le64 nonce;
__u8 have_challenge;
__le64 server_challenge_plus_one;
} __attribute__ ((packed));
struct ceph_x_authorize_challenge {
__u8 struct_v;
__le64 server_challenge;
} __attribute__ ((packed));
struct ceph_x_authorize_reply {
......
......@@ -304,7 +304,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
struct ceph_crypto_key *ckey;
ukey = request_key(&key_type_ceph, name, NULL);
if (!ukey || IS_ERR(ukey)) {
if (IS_ERR(ukey)) {
/* request_key errors don't map nicely to mount(2)
errors; don't even try, but still printk */
key_err = PTR_ERR(ukey);
......@@ -379,7 +379,7 @@ ceph_parse_options(char *options, const char *dev_name,
/* parse mount options */
while ((c = strsep(&options, ",")) != NULL) {
int token, intval, ret;
int token, intval;
if (!*c)
continue;
err = -EINVAL;
......@@ -394,11 +394,10 @@ ceph_parse_options(char *options, const char *dev_name,
continue;
}
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
pr_err("bad mount option arg (not int) "
"at '%s'\n", c);
continue;
err = match_int(&argstr[0], &intval);
if (err < 0) {
pr_err("bad option arg (not int) at '%s'\n", c);
goto out;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
......
......@@ -32,7 +32,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
int desc_len = strlen(desc);
void *p, *end;
struct page *lock_op_page;
struct timespec mtime;
struct timespec64 mtime;
int ret;
lock_op_buf_size = name_len + sizeof(__le32) +
......@@ -63,7 +63,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
ceph_encode_string(&p, end, desc, desc_len);
/* only support infinite duration */
memset(&mtime, 0, sizeof(mtime));
ceph_encode_timespec(p, &mtime);
ceph_encode_timespec64(p, &mtime);
p += sizeof(struct ceph_timespec);
ceph_encode_8(&p, flags);
......
......@@ -514,7 +514,7 @@ static int crush_choose_firstn(const struct crush_map *map,
in, work->work[-1-in->id],
x, r,
(choose_args ?
&choose_args[-1-in->id] : 0),
&choose_args[-1-in->id] : NULL),
outpos);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
......@@ -725,7 +725,7 @@ static void crush_choose_indep(const struct crush_map *map,
in, work->work[-1-in->id],
x, r,
(choose_args ?
&choose_args[-1-in->id] : 0),
&choose_args[-1-in->id] : NULL),
outpos);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
......
......@@ -1417,11 +1417,11 @@ static void prepare_write_keepalive(struct ceph_connection *con)
dout("prepare_write_keepalive %p\n", con);
con_out_kvec_reset(con);
if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
struct timespec now;
struct timespec64 now;
ktime_get_real_ts(&now);
ktime_get_real_ts64(&now);
con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
ceph_encode_timespec(&con->out_temp_keepalive2, &now);
ceph_encode_timespec64(&con->out_temp_keepalive2, &now);
con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
&con->out_temp_keepalive2);
} else {
......@@ -1434,24 +1434,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
int *auth_proto)
static int get_connect_authorizer(struct ceph_connection *con)
{
struct ceph_auth_handshake *auth;
int auth_proto;
if (!con->ops->get_authorizer) {
con->auth = NULL;
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
return NULL;
return 0;
}
auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
if (IS_ERR(auth))
return auth;
return PTR_ERR(auth);
con->auth_reply_buf = auth->authorizer_reply_buf;
con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
return auth;
con->auth = auth;
con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
return 0;
}
/*
......@@ -1467,12 +1469,22 @@ static void prepare_write_banner(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
static void __prepare_write_connect(struct ceph_connection *con)
{
con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
if (con->auth)
con_out_kvec_add(con, con->auth->authorizer_buf_len,
con->auth->authorizer_buf);
con->out_more = 0;
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
static int prepare_write_connect(struct ceph_connection *con)
{
unsigned int global_seq = get_global_seq(con->msgr, 0);
int proto;
int auth_proto;
struct ceph_auth_handshake *auth;
int ret;
switch (con->peer_name.type) {
case CEPH_ENTITY_TYPE_MON:
......@@ -1499,24 +1511,11 @@ static int prepare_write_connect(struct ceph_connection *con)
con->out_connect.protocol_version = cpu_to_le32(proto);
con->out_connect.flags = 0;
auth_proto = CEPH_AUTH_UNKNOWN;
auth = get_connect_authorizer(con, &auth_proto);
if (IS_ERR(auth))
return PTR_ERR(auth);
con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
con->out_connect.authorizer_len = auth ?
cpu_to_le32(auth->authorizer_buf_len) : 0;
con_out_kvec_add(con, sizeof (con->out_connect),
&con->out_connect);
if (auth && auth->authorizer_buf_len)
con_out_kvec_add(con, auth->authorizer_buf_len,
auth->authorizer_buf);
con->out_more = 0;
con_flag_set(con, CON_FLAG_WRITE_PENDING);
ret = get_connect_authorizer(con);
if (ret)
return ret;
__prepare_write_connect(con);
return 0;
}
......@@ -1781,11 +1780,21 @@ static int read_partial_connect(struct ceph_connection *con)
if (ret <= 0)
goto out;
size = le32_to_cpu(con->in_reply.authorizer_len);
end += size;
ret = read_partial(con, end, size, con->auth_reply_buf);
if (ret <= 0)
goto out;
if (con->auth) {
size = le32_to_cpu(con->in_reply.authorizer_len);
if (size > con->auth->authorizer_reply_buf_len) {
pr_err("authorizer reply too big: %d > %zu\n", size,
con->auth->authorizer_reply_buf_len);
ret = -EINVAL;
goto out;
}
end += size;
ret = read_partial(con, end, size,
con->auth->authorizer_reply_buf);
if (ret <= 0)
goto out;
}
dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
con, (int)con->in_reply.tag,
......@@ -1793,7 +1802,6 @@ static int read_partial_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.global_seq));
out:
return ret;
}
/*
......@@ -2076,12 +2084,27 @@ static int process_connect(struct ceph_connection *con)
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
if (con->auth_reply_buf) {
if (con->auth) {
/*
* Any connection that defines ->get_authorizer()
* should also define ->verify_authorizer_reply().
* should also define ->add_authorizer_challenge() and
* ->verify_authorizer_reply().
*
* See get_connect_authorizer().
*/
if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
ret = con->ops->add_authorizer_challenge(
con, con->auth->authorizer_reply_buf,
le32_to_cpu(con->in_reply.authorizer_len));
if (ret < 0)
return ret;
con_out_kvec_reset(con);
__prepare_write_connect(con);
prepare_read_connect(con);
return 0;
}
ret = con->ops->verify_authorizer_reply(con);
if (ret < 0) {
con->error_msg = "bad authorize reply";
......@@ -2555,7 +2578,7 @@ static int read_keepalive_ack(struct ceph_connection *con)
int ret = read_partial(con, size, size, &ceph_ts);
if (ret <= 0)
return ret;
ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
prepare_read_tag(con);
return 1;
}
......@@ -3223,12 +3246,12 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
{
if (interval > 0 &&
(con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
struct timespec now;
struct timespec ts;
ktime_get_real_ts(&now);
jiffies_to_timespec(interval, &ts);
ts = timespec_add(con->last_keepalive_ack, ts);
return timespec_compare(&now, &ts) >= 0;
struct timespec64 now;
struct timespec64 ts;
ktime_get_real_ts64(&now);
jiffies_to_timespec64(interval, &ts);
ts = timespec64_add(con->last_keepalive_ack, ts);
return timespec64_compare(&now, &ts) >= 0;
}
return false;
}
......
......@@ -1249,7 +1249,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
if (monc->client->extra_mon_dispatch &&
monc->client->extra_mon_dispatch(monc->client, msg) == 0)
break;
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
......
......@@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req,
p += sizeof(struct ceph_blkin_trace_info);
ceph_encode_32(&p, 0); /* client_inc, always 0 */
ceph_encode_timespec(p, &req->r_mtime);
ceph_encode_timespec64(p, &req->r_mtime);
p += sizeof(struct ceph_timespec);
encode_oloc(&p, end, &req->r_t.target_oloc);
......@@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
ceph_oid_copy(&lreq->t.base_oid, oid);
ceph_oloc_copy(&lreq->t.base_oloc, oloc);
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts(&lreq->mtime);
ktime_get_real_ts64(&lreq->mtime);
lreq->reg_req = alloc_linger_request(lreq);
if (!lreq->reg_req) {
......@@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts(&req->r_mtime);
ktime_get_real_ts64(&req->r_mtime);
osd_req_op_watch_init(req, 0, lreq->linger_id,
CEPH_OSD_WATCH_OP_UNWATCH);
......@@ -4591,7 +4591,7 @@ EXPORT_SYMBOL(ceph_osdc_unwatch);
static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
u64 notify_id, u64 cookie, void *payload,
size_t payload_len)
u32 payload_len)
{
struct ceph_osd_req_op *op;
struct ceph_pagelist *pl;
......@@ -4628,7 +4628,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
u64 notify_id,
u64 cookie,
void *payload,
size_t payload_len)
u32 payload_len)
{
struct ceph_osd_request *req;
int ret;
......@@ -4661,7 +4661,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack);
static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
u64 cookie, u32 prot_ver, u32 timeout,
void *payload, size_t payload_len)
void *payload, u32 payload_len)
{
struct ceph_osd_req_op *op;
struct ceph_pagelist *pl;
......@@ -4701,7 +4701,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
void *payload,
size_t payload_len,
u32 payload_len,
u32 timeout,
struct page ***preply_pages,
size_t *preply_len)
......@@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
struct ceph_snap_context *snapc,
u64 off, u64 len,
u32 truncate_seq, u64 truncate_size,
struct timespec *mtime,
struct timespec64 *mtime,
struct page **pages, int num_pages)
{
struct ceph_osd_request *req;
......@@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
static int add_authorizer_challenge(struct ceph_connection *con,
void *challenge_buf, int challenge_buf_len)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
challenge_buf, challenge_buf_len);
}
static int verify_authorizer_reply(struct ceph_connection *con)
{
......@@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.put = put_osd_con,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
.add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
......
......@@ -197,4 +197,3 @@ void ceph_zero_page_vector_range(int off, int len, struct page **pages)
}
}
EXPORT_SYMBOL(ceph_zero_page_vector_range);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册