diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0f1410c94827e95d1985eba0cd4b7f42d42712ee..85b4234b80906efd6a872827531f13c788dee6a8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -271,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -412,6 +414,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + + now = ktime_get(); + start = timer->start_time; + timer->start_time = now; + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static bool +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) +{ + static const ktime_t notime = {0}; + ktime_t now = ktime_get(); + + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -906,6 +1048,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -959,15 +1105,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -979,6 +1125,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1080,6 +1230,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1113,15 +1267,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1131,6 +1285,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1149,8 +1308,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1158,10 +1326,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1202,14 +1390,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1253,7 +1441,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1382,6 +1569,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, @@ -1485,6 +1673,247 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1507,6 +1936,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d33883445038d4888538845555198..6fcd8d5e8e3d686bdac8d86cda3ed09514292b00 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -15,6 +15,9 @@ * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -41,19 +44,42 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf3e299ee39c6490640f40567a70b7..ff66ae700b8991eeed513e210397f6639c0e5f70 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30e60e5a4ae974864c63fb003b969d..06c74cd9387566ff57c2b8a23109b7895d215755 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -165,3 +170,79 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + /* well, we don't care about errors at all! */ + if (task->tk_status) + dprintk("%s server returns %d\n", __func__, task->tk_status); +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + +static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2ff5fd0026b00a3032589ee8cdff5..9aae0205ef11d965e12e07dd1a550c62f6e7c729 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,28 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_LAYOUTSTATS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + + res->rpc_status = be32_to_cpup(p++); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* * Decode ALLOCATE request */ @@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3c6120ddc7d9e379e49647b7b24fb..ea3bee919a765840a267f8fc59ccdec4ef61f676 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9cd45f1d60fe476f7e143ff854fee3b535aac03..643ce3a91b2299c197292a4c5f6956320464a7f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -476,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a07555d6968c4c1dde12f599912160ce61af6098..558cd65dbdb752d111b5b85649b72bae36fdf040 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d47c188682b163b15e72dd933deb96ba6540b72f..40bacebb5b97deda58036d78e06ead361dc7c128 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -2250,3 +2251,60 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +#if IS_ENABLED(CONFIG_NFS_V4_2) +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3d5887de850e72226233e4f2138d2..3e6ab7bfbabd428425227b6f9d2a94711edd371e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890433817f1fac83026d95fa760b996..b8e72aad919cfc72ea6710ac3786dc35bbe47201 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083891325b6b8defa3f9995851cf76b..f91b5ade30c98fe8b03d06bc40deb0c434edb633 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c959e7eb5bd4ccf96a96c33c59a1380dabf0ada7..7bbe50504211d65cc096baa8bc6d45e2e9449125 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier;