提交 aeb96447 编写于 作者: Y yu kuai 提交者: Chen Jun

fs/dirty_pages: dump the number of dirty pages for each inode

euler inclusion
category: feature
bugzilla: 46858
CVE: NA

---------------------------

In order to analysing the IO performance when using buffer IO, it's
useful to obtain the number of dirty pages for a inode in the filesystem.

This feather is migrated from redhat-7.2. It create 3 interfaces by using
profs. /proc/dirty/buffer_size for buffer allocation and release;
/proc/dirty/page_threshold to filter result; /proc/dirty/dirty_list
to get dirty pages.

Visit http://openeuler.huawei.com/bugzilla/show_bug.cgi?id=23941 for
details about modifications and implementations.
Signed-off-by: Nyu kuai <yukuai3@huawei.com>
Reviewed-by: Nzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: Nzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: NDianfang Zhang <zhangdianfang@huawei.com>
Acked-by: NXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: NChen Jun <chenjun102@huawei.com>
上级 bf5b10b3
......@@ -52,6 +52,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_COREDUMP) += coredump.o
obj-$(CONFIG_SYSCTL) += drop_caches.o
obj-$(CONFIG_SYSCTL) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += iomap/
......
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/kdev_t.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include "internal.h"
static char *buf_dirty; /* buffer to store number of dirty pages */
static unsigned long buf_size; /* size of buffer in bytes */
static long buff_num; /* size of buffer in number of pages */
static int buff_limit; /* filter threshold of dirty pages*/
static spinlock_t inode_sb_list_lock;
static struct proc_dir_entry *dirty_dir;
static bool warn_once; /* print warn message once */
static bool buff_used; /* buffer is in used */
static struct mutex buff_lock; /* lock when buffer is changed */
/* proc root directory */
#define DIRTY_ROOT "dirty"
/* proc file for buffer allocation and release */
#define DIRTY_SWITCH "buffer_size"
/* proc file to obtain diry pages of each inode */
#define DIRTY_PAGES "dirty_list"
/* proc file to filter result */
#define DIRTY_LIMIT "page_threshold"
static void seq_set_overflow(struct seq_file *m)
{
m->count = m->size;
}
static unsigned long dump_dirtypages_inode(struct inode *inode)
{
struct pagevec pvec;
unsigned long nr_dirtys = 0;
unsigned int nr_pages;
pgoff_t index = 0;
pagevec_init(&pvec);
while (1) {
nr_pages = pagevec_lookup_range_tag(&pvec, inode->i_mapping,
&index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY);
if (!nr_pages)
break;
pagevec_release(&pvec);
cond_resched();
nr_dirtys += nr_pages;
}
return nr_dirtys;
}
static char *inode_filename(struct inode *inode, char *tmpname)
{
struct dentry *dentry;
char *filename;
dentry = d_find_alias(inode);
if (!dentry)
return ERR_PTR(-ENOENT);
tmpname[PATH_MAX-1] = '\0';
filename = dentry_path_raw(dentry, tmpname, PATH_MAX);
dput(dentry);
return filename;
}
static inline bool is_sb_writable(struct super_block *sb)
{
if (sb_rdonly(sb))
return false;
if (sb->s_writers.frozen == SB_FREEZE_COMPLETE)
return false;
return true;
}
/*
* dump_dirtypages_sb - dump the dirty pages of each inode in the sb
* @sb the super block
* @m the seq_file witch is initialized in proc_dpages_open
*
* For each inode in the sb, call dump_dirtypages_pages to get the number
* of dirty pages. And use seq_printf to store the result in the buffer
* if it's not less than the threshold. The inode in unusual state will
* be skipped.
*/
static void dump_dirtypages_sb(struct super_block *sb, struct seq_file *m)
{
struct inode *inode, *toput_inode = NULL;
unsigned long nr_dirtys;
const char *fstype;
char *filename;
char *tmpname;
int limit = READ_ONCE(buff_limit);
if (warn_once)
return;
if (!is_sb_writable(sb))
return;
tmpname = kmalloc(PATH_MAX, GFP_KERNEL);
if (!tmpname)
return;
spin_lock(&inode_sb_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
/*
* We must skip inodes in unusual state. We may also skip
* inodes without pages but we deliberately won't in case
* we need to reschedule to avoid softlockups.
*/
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
(inode->i_mapping->nrpages == 0 && !need_resched())) {
spin_unlock(&inode->i_lock);
continue;
}
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock);
cond_resched();
nr_dirtys = dump_dirtypages_inode(inode);
if (!nr_dirtys || nr_dirtys < limit)
goto skip;
filename = inode_filename(inode, tmpname);
if (IS_ERR_OR_NULL(filename))
filename = "unknown";
if (sb->s_type && sb->s_type->name)
fstype = sb->s_type->name;
else
fstype = "unknown";
/*
* seq_printf return nothing, if the buffer is exhausted
* (m->size <= m->count), seq_printf will not store
* anything, just set m->count = m->size and return. In
* that case, log a warn message in buffer to remind users.
*/
if (!warn_once && m->size <= m->count) {
warn_once = true;
seq_set_overflow(m);
strncpy(m->buf+m->count-12, "terminated\n\0", 12);
goto done;
}
seq_printf(m, "FSType: %s, Dev ID: %u(%u:%u) ino %lu, dirty pages %lu, path %s\n",
fstype, sb->s_dev, MAJOR(sb->s_dev),
MINOR(sb->s_dev), inode->i_ino,
nr_dirtys, filename);
skip:
iput(toput_inode);
toput_inode = inode;
spin_lock(&inode_sb_list_lock);
}
spin_unlock(&inode_sb_list_lock);
done:
iput(toput_inode);
kfree(tmpname);
}
static int proc_dpages_show(struct seq_file *m, void *v)
{
iterate_supers((void *)dump_dirtypages_sb, (void *)m);
return 0;
}
static ssize_t seq_read_dirty(
struct file *file,
char __user *buf,
size_t size,
loff_t *ppos)
{
struct seq_file *m = (struct seq_file *)file->private_data;
size_t copied = 0;
size_t n;
int err = 0;
buff_used = true;
if (m->count == 0) {
err = m->op->show(m, NULL);
if (err < 0)
goto done;
}
n = min(m->count - m->from, size);
err = simple_read_from_buffer(buf, n,
(loff_t *) &m->from, m->buf, m->count);
if (err < 0) {
err = -EFAULT;
goto done;
}
copied += n;
done:
if (!copied)
copied = err;
else
*ppos += copied;
buff_used = false;
return copied;
}
static void free_buf_dirty(void)
{
if (buf_dirty != NULL) {
vfree(buf_dirty);
buf_dirty = NULL;
buf_size = 0;
}
}
static ssize_t write_proc(
struct file *filp,
const char *buf,
size_t count,
loff_t *offp)
{
char *msg;
int ret = 0;
long old_buff_num;
msg = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
if (count > PAGE_SIZE) {
ret = -EINVAL;
goto error;
}
msg[count] = '\0';
if (copy_from_user(msg, buf, count)) {
ret = -EINVAL;
goto error;
}
old_buff_num = buff_num;
ret = kstrtol(msg, 10, &buff_num);
if (ret != 0 || buff_num < 0 || buff_num > 102400) {
buff_num = 0;
ret = -EINVAL;
goto error;
}
mutex_lock(&buff_lock);
if (buff_used) {
ret = -EBUSY;
goto out;
}
buff_used = true;
ret = count;
if (buff_num == 0) {
free_buf_dirty();
goto out;
}
if (buff_num == old_buff_num)
goto out;
free_buf_dirty();
buf_size = PAGE_SIZE * buff_num;
buf_dirty = vmalloc(buf_size);
if (!buf_dirty) {
ret = -ENOMEM;
goto out;
}
out:
buff_used = false;
mutex_unlock(&buff_lock);
error:
kfree(msg);
return ret;
}
static int proc_dpages_open(struct inode *inode, struct file *filp)
{
int ret;
struct seq_file *m;
ret = single_open(filp, proc_dpages_show, NULL);
m = filp->private_data;
mutex_lock(&buff_lock);
if (buff_used) {
ret = -EBUSY;
goto out;
}
if (!ret) {
if (buf_dirty == NULL || buf_size == 0) {
pr_info("please allocate buffer before getting dirty pages\n");
ret = -ENOMEM;
goto out;
} else {
warn_once = false;
memset(buf_dirty, 0, buf_size);
if (!m->buf) {
m->size = buf_size;
m->buf = buf_dirty;
}
}
}
out:
mutex_unlock(&buff_lock);
return ret;
}
static int seq_release_dirty(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
buff_used = false;
/* we don't want to free the buf */
m->buf = NULL;
single_release(inode, file);
return 0;
}
static const struct proc_ops proc_dpages_operations = {
.proc_open = proc_dpages_open,
.proc_read = seq_read_dirty,
.proc_release = seq_release_dirty,
};
static int proc_switch_show(struct seq_file *m, void *v)
{
seq_printf(m, "%ld\n", buff_num);
return 0;
}
static int proc_limit_show(struct seq_file *m, void *v)
{
seq_printf(m, "%d\n", READ_ONCE(buff_limit));
return 0;
}
static int proc_switch_open(struct inode *inode, struct file *filp)
{
return single_open(filp, proc_switch_show, NULL);
}
static int proc_limit_open(struct inode *inode, struct file *filp)
{
return single_open(filp, proc_limit_show, NULL);
}
static ssize_t write_limit_proc(
struct file *filp,
const char *buf,
size_t count,
loff_t *offp)
{
char *msg;
int ret = 0;
long temp;
msg = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
if (count > PAGE_SIZE) {
ret = -EINVAL;
goto error;
}
msg[count] = '\0';
if (copy_from_user(msg, buf, count)) {
ret = -EINVAL;
goto error;
}
ret = kstrtol(msg, 10, &temp);
if (ret != 0 || temp < 0) {
ret = -EINVAL;
goto error;
}
WRITE_ONCE(buff_limit, temp);
ret = count;
error:
kfree(msg);
return ret;
}
static const struct proc_ops proc_switch_operations = {
.proc_open = proc_switch_open,
.proc_read = seq_read,
.proc_write = write_proc,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static const struct proc_ops proc_limit_operations = {
.proc_open = proc_limit_open,
.proc_read = seq_read,
.proc_write = write_limit_proc,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static int __init dpages_proc_init(void)
{
static struct proc_dir_entry *proc_file;
dirty_dir = proc_mkdir(DIRTY_ROOT, NULL);
if (!dirty_dir)
goto fail_dir;
proc_file = proc_create(DIRTY_PAGES, 0440,
dirty_dir, &proc_dpages_operations);
if (!proc_file)
goto fail_pages;
proc_file = proc_create(DIRTY_SWITCH, 0640,
dirty_dir, &proc_switch_operations);
if (!proc_file)
goto fail_switch;
proc_file = proc_create(DIRTY_LIMIT, 0640,
dirty_dir, &proc_limit_operations);
if (!proc_file)
goto fail_limit;
mutex_init(&buff_lock);
return 0;
fail_limit:
remove_proc_entry(DIRTY_SWITCH, dirty_dir);
fail_switch:
remove_proc_entry(DIRTY_PAGES, dirty_dir);
fail_pages:
remove_proc_entry(DIRTY_ROOT, NULL);
fail_dir:
return -ENOMEM;
}
static void dpages_proc_exit(void)
{
mutex_lock(&buff_lock);
free_buf_dirty();
mutex_unlock(&buff_lock);
remove_proc_entry(DIRTY_PAGES, dirty_dir);
remove_proc_entry(DIRTY_SWITCH, dirty_dir);
remove_proc_entry(DIRTY_LIMIT, dirty_dir);
remove_proc_entry(DIRTY_ROOT, NULL);
}
module_init(dpages_proc_init);
module_exit(dpages_proc_exit);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册