提交 f34d3606 编写于 作者: L Linus Torvalds

Merge branch 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - tracepoints for basic cgroup management operations added

 - kernfs and cgroup path formatting functions updated to behave in the
   style of strlcpy()

 - non-critical bug fixes

* 'for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  blkcg: Unlock blkcg_pol_mutex only once when cpd == NULL
  cgroup: fix error handling regressions in proc_cgroup_show() and cgroup_release_agent()
  cpuset: fix error handling regression in proc_cpuset_show()
  cgroup: add tracepoints for basic operations
  cgroup: make cgroup_path() and friends behave in the style of strlcpy()
  kernfs: remove kernfs_path_len()
  kernfs: make kernfs_path*() behave in the style of strlcpy()
  kernfs: add dummy implementation of kernfs_path_from_node()
......@@ -1340,10 +1340,8 @@ int blkcg_policy_register(struct blkcg_policy *pol)
struct blkcg_policy_data *cpd;
cpd = pol->cpd_alloc_fn(GFP_KERNEL);
if (!cpd) {
mutex_unlock(&blkcg_pol_mutex);
if (!cpd)
goto err_free_cpds;
}
blkcg->cpd[pol->plid] = cpd;
cpd->blkcg = blkcg;
......
......@@ -110,8 +110,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
* kn_to: /n1/n2/n3 [depth=3]
* result: /../..
*
* return value: length of the string. If greater than buflen,
* then contents of buf are undefined. On error, -1 is returned.
* Returns the length of the full path. If the full length is equal to or
* greater than @buflen, @buf contains the truncated path with the trailing
* '\0'. On error, -errno is returned.
*/
static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
struct kernfs_node *kn_from,
......@@ -119,9 +120,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
{
struct kernfs_node *kn, *common;
const char parent_str[] = "/..";
size_t depth_from, depth_to, len = 0, nlen = 0;
char *p;
int i;
size_t depth_from, depth_to, len = 0;
int i, j;
if (!kn_from)
kn_from = kernfs_root(kn_to)->kn;
......@@ -131,7 +131,7 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
common = kernfs_common_ancestor(kn_from, kn_to);
if (WARN_ON(!common))
return -1;
return -EINVAL;
depth_to = kernfs_depth(common, kn_to);
depth_from = kernfs_depth(common, kn_from);
......@@ -144,22 +144,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
len < buflen ? buflen - len : 0);
/* Calculate how many bytes we need for the rest */
for (kn = kn_to; kn != common; kn = kn->parent)
nlen += strlen(kn->name) + 1;
if (len + nlen >= buflen)
return len + nlen;
p = buf + len + nlen;
*p = '\0';
for (kn = kn_to; kn != common; kn = kn->parent) {
size_t tmp = strlen(kn->name);
p -= tmp;
memcpy(p, kn->name, tmp);
*(--p) = '/';
for (i = depth_to - 1; i >= 0; i--) {
for (kn = kn_to, j = 0; j < i; j++)
kn = kn->parent;
len += strlcpy(buf + len, "/",
len < buflen ? buflen - len : 0);
len += strlcpy(buf + len, kn->name,
len < buflen ? buflen - len : 0);
}
return len + nlen;
return len;
}
/**
......@@ -185,29 +179,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
return ret;
}
/**
* kernfs_path_len - determine the length of the full path of a given node
* @kn: kernfs_node of interest
*
* The returned length doesn't include the space for the terminating '\0'.
*/
size_t kernfs_path_len(struct kernfs_node *kn)
{
size_t len = 0;
unsigned long flags;
spin_lock_irqsave(&kernfs_rename_lock, flags);
do {
len += strlen(kn->name) + 1;
kn = kn->parent;
} while (kn && kn->parent);
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
return len;
}
/**
* kernfs_path_from_node - build path of node @to relative to @from.
* @from: parent kernfs_node relative to which we need to build the path
......@@ -220,8 +191,9 @@ size_t kernfs_path_len(struct kernfs_node *kn)
* path (which includes '..'s) as needed to reach from @from to @to is
* returned.
*
* If @buf isn't long enough, the return value will be greater than @buflen
* and @buf contents are undefined.
* Returns the length of the full path. If the full length is equal to or
* greater than @buflen, @buf contains the truncated path with the trailing
* '\0'. On error, -errno is returned.
*/
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
char *buf, size_t buflen)
......@@ -236,28 +208,6 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
/**
* kernfs_path - build full path of a given node
* @kn: kernfs_node of interest
* @buf: buffer to copy @kn's name into
* @buflen: size of @buf
*
* Builds and returns the full path of @kn in @buf of @buflen bytes. The
* path is built from the end of @buf so the returned pointer usually
* doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
* and %NULL is returned.
*/
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
{
int ret;
ret = kernfs_path_from_node(kn, NULL, buf, buflen);
if (ret < 0 || ret >= buflen)
return NULL;
return buf;
}
EXPORT_SYMBOL_GPL(kernfs_path);
/**
* pr_cont_kernfs_name - pr_cont name of a kernfs_node
* @kn: kernfs_node of interest
......
......@@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock);
void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
char *buf, *path = NULL;
char *buf;
buf = kzalloc(PATH_MAX, GFP_KERNEL);
if (buf)
path = kernfs_path(parent, buf, PATH_MAX);
kernfs_path(parent, buf, PATH_MAX);
WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
path, name);
buf, name);
kfree(buf);
}
......
......@@ -343,16 +343,7 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
*/
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
char *p;
p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
if (!p) {
strncpy(buf, "<unavailable>", buflen);
return -ENAMETOOLONG;
}
memmove(buf, p, buf + buflen - p);
return 0;
return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
}
/**
......
......@@ -97,7 +97,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk);
......@@ -555,8 +555,7 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
return kernfs_name(cgrp->kn, buf, buflen);
}
static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
size_t buflen)
static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
{
return kernfs_path(cgrp->kn, buf, buflen);
}
......@@ -658,7 +657,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
struct user_namespace *user_ns,
struct cgroup_namespace *old_ns);
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns);
#else /* !CONFIG_CGROUPS */
......
......@@ -269,10 +269,8 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
}
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
size_t kernfs_path_len(struct kernfs_node *kn);
int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
char *buf, size_t buflen);
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
void pr_cont_kernfs_path(struct kernfs_node *kn);
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
......@@ -341,12 +339,10 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{ return -ENOSYS; }
static inline size_t kernfs_path_len(struct kernfs_node *kn)
{ return 0; }
static inline char *kernfs_path(struct kernfs_node *kn, char *buf,
size_t buflen)
{ return NULL; }
static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
struct kernfs_node *kn,
char *buf, size_t buflen)
{ return -ENOSYS; }
static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
......@@ -436,6 +432,22 @@ static inline void kernfs_init(void) { }
#endif /* CONFIG_KERNFS */
/**
* kernfs_path - build full path of a given node
* @kn: kernfs_node of interest
* @buf: buffer to copy @kn's name into
* @buflen: size of @buf
*
* Builds and returns the full path of @kn in @buf of @buflen bytes. The
* path is built from the end of @buf so the returned pointer usually
* doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
* and %NULL is returned.
*/
static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
{
return kernfs_path_from_node(kn, NULL, buf, buflen);
}
static inline struct kernfs_node *
kernfs_find_and_get(struct kernfs_node *kn, const char *name)
{
......
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cgroup
#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_CGROUP_H
#include <linux/cgroup.h>
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(cgroup_root,
TP_PROTO(struct cgroup_root *root),
TP_ARGS(root),
TP_STRUCT__entry(
__field( int, root )
__field( u16, ss_mask )
__string( name, root->name )
),
TP_fast_assign(
__entry->root = root->hierarchy_id;
__entry->ss_mask = root->subsys_mask;
__assign_str(name, root->name);
),
TP_printk("root=%d ss_mask=%#x name=%s",
__entry->root, __entry->ss_mask, __get_str(name))
);
DEFINE_EVENT(cgroup_root, cgroup_setup_root,
TP_PROTO(struct cgroup_root *root),
TP_ARGS(root)
);
DEFINE_EVENT(cgroup_root, cgroup_destroy_root,
TP_PROTO(struct cgroup_root *root),
TP_ARGS(root)
);
DEFINE_EVENT(cgroup_root, cgroup_remount,
TP_PROTO(struct cgroup_root *root),
TP_ARGS(root)
);
DECLARE_EVENT_CLASS(cgroup,
TP_PROTO(struct cgroup *cgrp),
TP_ARGS(cgrp),
TP_STRUCT__entry(
__field( int, root )
__field( int, id )
__field( int, level )
__dynamic_array(char, path,
cgrp->kn ? cgroup_path(cgrp, NULL, 0) + 1
: strlen("(null)"))
),
TP_fast_assign(
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgrp->id;
__entry->level = cgrp->level;
if (cgrp->kn)
cgroup_path(cgrp, __get_dynamic_array(path),
__get_dynamic_array_len(path));
else
__assign_str(path, "(null)");
),
TP_printk("root=%d id=%d level=%d path=%s",
__entry->root, __entry->id, __entry->level, __get_str(path))
);
DEFINE_EVENT(cgroup, cgroup_mkdir,
TP_PROTO(struct cgroup *cgroup),
TP_ARGS(cgroup)
);
DEFINE_EVENT(cgroup, cgroup_rmdir,
TP_PROTO(struct cgroup *cgroup),
TP_ARGS(cgroup)
);
DEFINE_EVENT(cgroup, cgroup_release,
TP_PROTO(struct cgroup *cgroup),
TP_ARGS(cgroup)
);
DEFINE_EVENT(cgroup, cgroup_rename,
TP_PROTO(struct cgroup *cgroup),
TP_ARGS(cgroup)
);
DECLARE_EVENT_CLASS(cgroup_migrate,
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
TP_ARGS(dst_cgrp, task, threadgroup),
TP_STRUCT__entry(
__field( int, dst_root )
__field( int, dst_id )
__field( int, dst_level )
__dynamic_array(char, dst_path,
dst_cgrp->kn ? cgroup_path(dst_cgrp, NULL, 0) + 1
: strlen("(null)"))
__field( int, pid )
__string( comm, task->comm )
),
TP_fast_assign(
__entry->dst_root = dst_cgrp->root->hierarchy_id;
__entry->dst_id = dst_cgrp->id;
__entry->dst_level = dst_cgrp->level;
if (dst_cgrp->kn)
cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
__get_dynamic_array_len(dst_path));
else
__assign_str(dst_path, "(null)");
__entry->pid = task->pid;
__assign_str(comm, task->comm);
),
TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
__entry->dst_root, __entry->dst_id, __entry->dst_level,
__get_str(dst_path), __entry->pid, __get_str(comm))
);
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
TP_ARGS(dst_cgrp, task, threadgroup)
);
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
TP_ARGS(dst_cgrp, task, threadgroup)
);
#endif /* _TRACE_CGROUP_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
......@@ -64,6 +64,9 @@
#include <linux/file.h>
#include <net/sock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/cgroup.h>
/*
* pidlists linger the following amount before being destroyed. The goal
* is avoiding frequent destruction in the middle of consecutive read calls
......@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
struct cgroup *cgrp = &root->cgrp;
struct cgrp_cset_link *link, *tmp_link;
trace_cgroup_destroy_root(root);
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
BUG_ON(atomic_read(&root->nr_cgrps));
......@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
strcpy(root->release_agent_path, opts.release_agent);
spin_unlock(&release_agent_path_lock);
}
trace_cgroup_remount(root);
out_unlock:
kfree(opts.release_agent);
kfree(opts.name);
......@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret)
goto destroy_root;
trace_cgroup_setup_root(root);
/*
* There must be no failure case after here, since rebinding takes
* care of subsystems' refcounts, which are explicitly dropped in
......@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns)
{
struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
int ret;
ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
if (ret < 0 || ret >= buflen)
return NULL;
return buf;
return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
}
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
struct cgroup_namespace *ns)
{
char *ret;
int ret;
mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
......@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
*
* Return value is the same as kernfs_path().
*/
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroup_root *root;
struct cgroup *cgrp;
int hierarchy_id = 1;
char *path = NULL;
int ret;
mutex_lock(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
......@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
if (root) {
cgrp = task_cgroup_from_root(task, root);
path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
} else {
/* if no hierarchy exists, everyone is in "/" */
if (strlcpy(buf, "/", buflen) < buflen)
path = buf;
ret = strlcpy(buf, "/", buflen);
}
spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex);
return path;
return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
......@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
cgroup_migrate_finish(&preloaded_csets);
if (!ret)
trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
return ret;
}
......@@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
mutex_lock(&cgroup_mutex);
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
trace_cgroup_rename(cgrp);
mutex_unlock(&cgroup_mutex);
......@@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (task) {
ret = cgroup_migrate(task, false, to->root);
if (!ret)
trace_cgroup_transfer_tasks(to, task, false);
put_task_struct(task);
}
} while (task && !ret);
......@@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
ss->css_released(css);
} else {
/* cgroup release path */
trace_cgroup_release(cgrp);
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
......@@ -5332,6 +5347,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
if (ret)
goto out_destroy;
trace_cgroup_mkdir(cgrp);
/* let's create and online css's */
kernfs_activate(kn);
......@@ -5507,6 +5524,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
ret = cgroup_destroy_locked(cgrp);
if (!ret)
trace_cgroup_rmdir(cgrp);
cgroup_kn_unlock(kn);
return ret;
}
......@@ -5743,7 +5763,7 @@ core_initcall(cgroup_wq_init);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk)
{
char *buf, *path;
char *buf;
int retval;
struct cgroup_root *root;
......@@ -5786,18 +5806,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
* " (deleted)" is appended to the cgroup path.
*/
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
current->nsproxy->cgroup_ns);
if (!path) {
if (retval >= PATH_MAX)
retval = -ENAMETOOLONG;
if (retval < 0)
goto out_unlock;
}
seq_puts(m, buf);
} else {
path = "/";
seq_puts(m, "/");
}
seq_puts(m, path);
if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
seq_puts(m, " (deleted)\n");
else
......@@ -6062,8 +6082,9 @@ static void cgroup_release_agent(struct work_struct *work)
{
struct cgroup *cgrp =
container_of(work, struct cgroup, release_agent_work);
char *pathbuf = NULL, *agentbuf = NULL, *path;
char *pathbuf = NULL, *agentbuf = NULL;
char *argv[3], *envp[3];
int ret;
mutex_lock(&cgroup_mutex);
......@@ -6073,13 +6094,13 @@ static void cgroup_release_agent(struct work_struct *work)
goto out;
spin_lock_irq(&css_set_lock);
path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
spin_unlock_irq(&css_set_lock);
if (!path)
if (ret < 0 || ret >= PATH_MAX)
goto out;
argv[0] = agentbuf;
argv[1] = path;
argv[1] = pathbuf;
argv[2] = NULL;
/* minimal command environment */
......
......@@ -2715,7 +2715,7 @@ void __cpuset_memory_pressure_bump(void)
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk)
{
char *buf, *p;
char *buf;
struct cgroup_subsys_state *css;
int retval;
......@@ -2724,14 +2724,15 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
if (!buf)
goto out;
retval = -ENAMETOOLONG;
css = task_get_css(tsk, cpuset_cgrp_id);
p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
current->nsproxy->cgroup_ns);
css_put(css);
if (!p)
if (retval >= PATH_MAX)
retval = -ENAMETOOLONG;
if (retval < 0)
goto out_free;
seq_puts(m, p);
seq_puts(m, buf);
seq_putc(m, '\n');
retval = 0;
out_free:
......
......@@ -415,7 +415,8 @@ static char *task_group_path(struct task_group *tg)
if (autogroup_path(tg, group_path, PATH_MAX))
return group_path;
return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
return group_path;
}
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册