提交 c09d7a3d 编写于 作者: F Frederic Weisbecker

Merge branch '/tip/perf/filter' of...

Merge branch '/tip/perf/filter' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git into perf/core
...@@ -208,7 +208,6 @@ struct ftrace_event_call { ...@@ -208,7 +208,6 @@ struct ftrace_event_call {
#define PERF_MAX_TRACE_SIZE 2048 #define PERF_MAX_TRACE_SIZE 2048
#define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
extern void destroy_preds(struct ftrace_event_call *call); extern void destroy_preds(struct ftrace_event_call *call);
......
...@@ -661,8 +661,10 @@ struct ftrace_event_field { ...@@ -661,8 +661,10 @@ struct ftrace_event_field {
}; };
struct event_filter { struct event_filter {
int n_preds; int n_preds; /* Number assigned */
struct filter_pred **preds; int a_preds; /* allocated */
struct filter_pred *preds;
struct filter_pred *root;
char *filter_string; char *filter_string;
}; };
...@@ -674,11 +676,23 @@ struct event_subsystem { ...@@ -674,11 +676,23 @@ struct event_subsystem {
int nr_events; int nr_events;
}; };
#define FILTER_PRED_INVALID ((unsigned short)-1)
#define FILTER_PRED_IS_RIGHT (1 << 15)
#define FILTER_PRED_FOLD (1 << 15)
/*
* The max preds is the size of unsigned short with
* two flags at the MSBs. One bit is used for both the IS_RIGHT
* and FOLD flags. The other is reserved.
*
* 2^14 preds is way more than enough.
*/
#define MAX_FILTER_PRED 16384
struct filter_pred; struct filter_pred;
struct regex; struct regex;
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
int val1, int val2);
typedef int (*regex_match_func)(char *str, struct regex *r, int len); typedef int (*regex_match_func)(char *str, struct regex *r, int len);
...@@ -700,11 +714,23 @@ struct filter_pred { ...@@ -700,11 +714,23 @@ struct filter_pred {
filter_pred_fn_t fn; filter_pred_fn_t fn;
u64 val; u64 val;
struct regex regex; struct regex regex;
/*
* Leaf nodes use field_name, ops is used by AND and OR
* nodes. The field_name is always freed when freeing a pred.
* We can overload field_name for ops and have it freed
* as well.
*/
union {
char *field_name; char *field_name;
unsigned short *ops;
};
int offset; int offset;
int not; int not;
int op; int op;
int pop_n; unsigned short index;
unsigned short parent;
unsigned short left;
unsigned short right;
}; };
extern struct list_head ftrace_common_fields; extern struct list_head ftrace_common_fields;
......
...@@ -123,9 +123,13 @@ struct filter_parse_state { ...@@ -123,9 +123,13 @@ struct filter_parse_state {
} operand; } operand;
}; };
struct pred_stack {
struct filter_pred **preds;
int index;
};
#define DEFINE_COMPARISON_PRED(type) \ #define DEFINE_COMPARISON_PRED(type) \
static int filter_pred_##type(struct filter_pred *pred, void *event, \ static int filter_pred_##type(struct filter_pred *pred, void *event) \
int val1, int val2) \
{ \ { \
type *addr = (type *)(event + pred->offset); \ type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \ type val = (type)pred->val; \
...@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \ ...@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
} }
#define DEFINE_EQUALITY_PRED(size) \ #define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event, \ static int filter_pred_##size(struct filter_pred *pred, void *event) \
int val1, int val2) \
{ \ { \
u##size *addr = (u##size *)(event + pred->offset); \ u##size *addr = (u##size *)(event + pred->offset); \
u##size val = (u##size)pred->val; \ u##size val = (u##size)pred->val; \
...@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32); ...@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8); DEFINE_EQUALITY_PRED(8);
static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
void *event __attribute((unused)),
int val1, int val2)
{
return val1 && val2;
}
static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
void *event __attribute((unused)),
int val1, int val2)
{
return val1 || val2;
}
/* Filter predicate for fixed sized arrays of characters */ /* Filter predicate for fixed sized arrays of characters */
static int filter_pred_string(struct filter_pred *pred, void *event, static int filter_pred_string(struct filter_pred *pred, void *event)
int val1, int val2)
{ {
char *addr = (char *)(event + pred->offset); char *addr = (char *)(event + pred->offset);
int cmp, match; int cmp, match;
...@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event, ...@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
} }
/* Filter predicate for char * pointers */ /* Filter predicate for char * pointers */
static int filter_pred_pchar(struct filter_pred *pred, void *event, static int filter_pred_pchar(struct filter_pred *pred, void *event)
int val1, int val2)
{ {
char **addr = (char **)(event + pred->offset); char **addr = (char **)(event + pred->offset);
int cmp, match; int cmp, match;
...@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, ...@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
* and add it to the address of the entry, and at last we have * and add it to the address of the entry, and at last we have
* the address of the string. * the address of the string.
*/ */
static int filter_pred_strloc(struct filter_pred *pred, void *event, static int filter_pred_strloc(struct filter_pred *pred, void *event)
int val1, int val2)
{ {
u32 str_item = *(u32 *)(event + pred->offset); u32 str_item = *(u32 *)(event + pred->offset);
int str_loc = str_item & 0xffff; int str_loc = str_item & 0xffff;
...@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event, ...@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
return match; return match;
} }
static int filter_pred_none(struct filter_pred *pred, void *event, static int filter_pred_none(struct filter_pred *pred, void *event)
int val1, int val2)
{ {
return 0; return 0;
} }
...@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred) ...@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
pred->not ^= not; pred->not ^= not;
} }
enum move_type {
MOVE_DOWN,
MOVE_UP_FROM_LEFT,
MOVE_UP_FROM_RIGHT
};
static struct filter_pred *
get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
int index, enum move_type *move)
{
if (pred->parent & FILTER_PRED_IS_RIGHT)
*move = MOVE_UP_FROM_RIGHT;
else
*move = MOVE_UP_FROM_LEFT;
pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
return pred;
}
/*
* A series of AND or ORs where found together. Instead of
* climbing up and down the tree branches, an array of the
* ops were made in order of checks. We can just move across
* the array and short circuit if needed.
*/
static int process_ops(struct filter_pred *preds,
struct filter_pred *op, void *rec)
{
struct filter_pred *pred;
int type;
int match;
int i;
/*
* Micro-optimization: We set type to true if op
* is an OR and false otherwise (AND). Then we
* just need to test if the match is equal to
* the type, and if it is, we can short circuit the
* rest of the checks:
*
* if ((match && op->op == OP_OR) ||
* (!match && op->op == OP_AND))
* return match;
*/
type = op->op == OP_OR;
for (i = 0; i < op->val; i++) {
pred = &preds[op->ops[i]];
match = pred->fn(pred, rec);
if (!!match == type)
return match;
}
return match;
}
/* return 1 if event matches, 0 otherwise (discard) */ /* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec) int filter_match_preds(struct event_filter *filter, void *rec)
{ {
int match, top = 0, val1 = 0, val2 = 0; int match = -1;
int stack[MAX_FILTER_PRED]; enum move_type move = MOVE_DOWN;
struct filter_pred *preds;
struct filter_pred *pred; struct filter_pred *pred;
int i; struct filter_pred *root;
int n_preds;
int done = 0;
/* no filter is considered a match */
if (!filter)
return 1;
n_preds = filter->n_preds;
if (!n_preds)
return 1;
/*
* n_preds, root and filter->preds are protect with preemption disabled.
*/
preds = rcu_dereference_sched(filter->preds);
root = rcu_dereference_sched(filter->root);
if (!root)
return 1;
pred = root;
/* match is currently meaningless */
match = -1;
for (i = 0; i < filter->n_preds; i++) { do {
pred = filter->preds[i]; switch (move) {
if (!pred->pop_n) { case MOVE_DOWN:
match = pred->fn(pred, rec, val1, val2); /* only AND and OR have children */
stack[top++] = match; if (pred->left != FILTER_PRED_INVALID) {
/* If ops is set, then it was folded. */
if (!pred->ops) {
/* keep going to down the left side */
pred = &preds[pred->left];
continue; continue;
} }
if (pred->pop_n > top) { /* We can treat folded ops as a leaf node */
WARN_ON_ONCE(1); match = process_ops(preds, pred, rec);
return 0; } else
match = pred->fn(pred, rec);
/* If this pred is the only pred */
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
case MOVE_UP_FROM_LEFT:
/*
* Check for short circuits.
*
* Optimization: !!match == (pred->op == OP_OR)
* is the same as:
* if ((match && pred->op == OP_OR) ||
* (!match && pred->op == OP_AND))
*/
if (!!match == (pred->op == OP_OR)) {
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
} }
val1 = stack[--top]; /* now go down the right side of the tree. */
val2 = stack[--top]; pred = &preds[pred->right];
match = pred->fn(pred, rec, val1, val2); move = MOVE_DOWN;
stack[top++] = match; continue;
case MOVE_UP_FROM_RIGHT:
/* We finished this equation. */
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
} }
done = 1;
} while (!done);
return stack[--top]; return match;
} }
EXPORT_SYMBOL_GPL(filter_match_preds); EXPORT_SYMBOL_GPL(filter_match_preds);
...@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos) ...@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
static void remove_filter_string(struct event_filter *filter) static void remove_filter_string(struct event_filter *filter)
{ {
if (!filter)
return;
kfree(filter->filter_string); kfree(filter->filter_string);
filter->filter_string = NULL; filter->filter_string = NULL;
} }
...@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps, ...@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{ {
struct event_filter *filter = call->filter; struct event_filter *filter;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
filter = call->filter;
if (filter && filter->filter_string) if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string); trace_seq_printf(s, "%s\n", filter->filter_string);
else else
...@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) ...@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
void print_subsystem_event_filter(struct event_subsystem *system, void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s) struct trace_seq *s)
{ {
struct event_filter *filter = system->filter; struct event_filter *filter;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
filter = system->filter;
if (filter && filter->filter_string) if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string); trace_seq_printf(s, "%s\n", filter->filter_string);
else else
...@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred) ...@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
pred->regex.len = 0; pred->regex.len = 0;
} }
static int filter_set_pred(struct filter_pred *dest, static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
{
stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
if (!stack->preds)
return -ENOMEM;
stack->index = n_preds;
return 0;
}
static void __free_pred_stack(struct pred_stack *stack)
{
kfree(stack->preds);
stack->index = 0;
}
static int __push_pred_stack(struct pred_stack *stack,
struct filter_pred *pred)
{
int index = stack->index;
if (WARN_ON(index == 0))
return -ENOSPC;
stack->preds[--index] = pred;
stack->index = index;
return 0;
}
static struct filter_pred *
__pop_pred_stack(struct pred_stack *stack)
{
struct filter_pred *pred;
int index = stack->index;
pred = stack->preds[index++];
if (!pred)
return NULL;
stack->index = index;
return pred;
}
static int filter_set_pred(struct event_filter *filter,
int idx,
struct pred_stack *stack,
struct filter_pred *src, struct filter_pred *src,
filter_pred_fn_t fn) filter_pred_fn_t fn)
{ {
struct filter_pred *dest = &filter->preds[idx];
struct filter_pred *left;
struct filter_pred *right;
*dest = *src; *dest = *src;
if (src->field_name) { if (src->field_name) {
dest->field_name = kstrdup(src->field_name, GFP_KERNEL); dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
...@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest, ...@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
return -ENOMEM; return -ENOMEM;
} }
dest->fn = fn; dest->fn = fn;
dest->index = idx;
return 0; if (dest->op == OP_OR || dest->op == OP_AND) {
right = __pop_pred_stack(stack);
left = __pop_pred_stack(stack);
if (!left || !right)
return -EINVAL;
/*
* If both children can be folded
* and they are the same op as this op or a leaf,
* then this op can be folded.
*/
if (left->index & FILTER_PRED_FOLD &&
(left->op == dest->op ||
left->left == FILTER_PRED_INVALID) &&
right->index & FILTER_PRED_FOLD &&
(right->op == dest->op ||
right->left == FILTER_PRED_INVALID))
dest->index |= FILTER_PRED_FOLD;
dest->left = left->index & ~FILTER_PRED_FOLD;
dest->right = right->index & ~FILTER_PRED_FOLD;
left->parent = dest->index & ~FILTER_PRED_FOLD;
right->parent = dest->index | FILTER_PRED_IS_RIGHT;
} else {
/*
* Make dest->left invalid to be used as a quick
* way to know this is a leaf node.
*/
dest->left = FILTER_PRED_INVALID;
/* All leafs allow folding the parent ops. */
dest->index |= FILTER_PRED_FOLD;
}
return __push_pred_stack(stack, dest);
} }
static void filter_disable_preds(struct ftrace_event_call *call) static void __free_preds(struct event_filter *filter)
{ {
struct event_filter *filter = call->filter;
int i; int i;
call->flags &= ~TRACE_EVENT_FL_FILTERED; if (filter->preds) {
for (i = 0; i < filter->a_preds; i++)
kfree(filter->preds[i].field_name);
kfree(filter->preds);
filter->preds = NULL;
}
filter->a_preds = 0;
filter->n_preds = 0; filter->n_preds = 0;
for (i = 0; i < MAX_FILTER_PRED; i++)
filter->preds[i]->fn = filter_pred_none;
} }
static void __free_preds(struct event_filter *filter) static void filter_disable(struct ftrace_event_call *call)
{ {
int i; call->flags &= ~TRACE_EVENT_FL_FILTERED;
}
static void __free_filter(struct event_filter *filter)
{
if (!filter) if (!filter)
return; return;
for (i = 0; i < MAX_FILTER_PRED; i++) { __free_preds(filter);
if (filter->preds[i])
filter_free_pred(filter->preds[i]);
}
kfree(filter->preds);
kfree(filter->filter_string); kfree(filter->filter_string);
kfree(filter); kfree(filter);
} }
/*
* Called when destroying the ftrace_event_call.
* The call is being freed, so we do not need to worry about
* the call being currently used. This is for module code removing
* the tracepoints from within it.
*/
void destroy_preds(struct ftrace_event_call *call) void destroy_preds(struct ftrace_event_call *call)
{ {
__free_preds(call->filter); __free_filter(call->filter);
call->filter = NULL; call->filter = NULL;
call->flags &= ~TRACE_EVENT_FL_FILTERED;
} }
static struct event_filter *__alloc_preds(void) static struct event_filter *__alloc_filter(void)
{ {
struct event_filter *filter; struct event_filter *filter;
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
return filter;
}
static int __alloc_preds(struct event_filter *filter, int n_preds)
{
struct filter_pred *pred; struct filter_pred *pred;
int i; int i;
filter = kzalloc(sizeof(*filter), GFP_KERNEL); if (filter->preds)
if (!filter) __free_preds(filter);
return ERR_PTR(-ENOMEM);
filter->n_preds = 0; filter->preds =
kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
if (!filter->preds) if (!filter->preds)
goto oom; return -ENOMEM;
for (i = 0; i < MAX_FILTER_PRED; i++) { filter->a_preds = n_preds;
pred = kzalloc(sizeof(*pred), GFP_KERNEL); filter->n_preds = 0;
if (!pred)
goto oom; for (i = 0; i < n_preds; i++) {
pred = &filter->preds[i];
pred->fn = filter_pred_none; pred->fn = filter_pred_none;
filter->preds[i] = pred;
} }
return filter;
oom:
__free_preds(filter);
return ERR_PTR(-ENOMEM);
}
static int init_preds(struct ftrace_event_call *call)
{
if (call->filter)
return 0;
call->flags &= ~TRACE_EVENT_FL_FILTERED;
call->filter = __alloc_preds();
if (IS_ERR(call->filter))
return PTR_ERR(call->filter);
return 0; return 0;
} }
static int init_subsystem_preds(struct event_subsystem *system) static void filter_free_subsystem_preds(struct event_subsystem *system)
{ {
struct ftrace_event_call *call; struct ftrace_event_call *call;
int err;
list_for_each_entry(call, &ftrace_events, list) { list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0) if (strcmp(call->class->system, system->name) != 0)
continue; continue;
err = init_preds(call); filter_disable(call);
if (err) remove_filter_string(call->filter);
return err;
} }
return 0;
} }
static void filter_free_subsystem_preds(struct event_subsystem *system) static void filter_free_subsystem_filters(struct event_subsystem *system)
{ {
struct ftrace_event_call *call; struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) { list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0) if (strcmp(call->class->system, system->name) != 0)
continue; continue;
__free_filter(call->filter);
filter_disable_preds(call); call->filter = NULL;
remove_filter_string(call->filter);
} }
} }
...@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, ...@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
struct ftrace_event_call *call, struct ftrace_event_call *call,
struct event_filter *filter, struct event_filter *filter,
struct filter_pred *pred, struct filter_pred *pred,
struct pred_stack *stack,
filter_pred_fn_t fn) filter_pred_fn_t fn)
{ {
int idx, err; int idx, err;
if (filter->n_preds == MAX_FILTER_PRED) { if (WARN_ON(filter->n_preds == filter->a_preds)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC; return -ENOSPC;
} }
idx = filter->n_preds; idx = filter->n_preds;
filter_clear_pred(filter->preds[idx]); filter_clear_pred(&filter->preds[idx]);
err = filter_set_pred(filter->preds[idx], pred, fn); err = filter_set_pred(filter, idx, stack, pred, fn);
if (err) if (err)
return err; return err;
...@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps, ...@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
struct ftrace_event_call *call, struct ftrace_event_call *call,
struct event_filter *filter, struct event_filter *filter,
struct filter_pred *pred, struct filter_pred *pred,
struct pred_stack *stack,
bool dry_run) bool dry_run)
{ {
struct ftrace_event_field *field; struct ftrace_event_field *field;
...@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps, ...@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
unsigned long long val; unsigned long long val;
int ret; int ret;
pred->fn = filter_pred_none; fn = pred->fn = filter_pred_none;
if (pred->op == OP_AND) { if (pred->op == OP_AND)
pred->pop_n = 2;
fn = filter_pred_and;
goto add_pred_fn; goto add_pred_fn;
} else if (pred->op == OP_OR) { else if (pred->op == OP_OR)
pred->pop_n = 2;
fn = filter_pred_or;
goto add_pred_fn; goto add_pred_fn;
}
field = find_event_field(call, pred->field_name); field = find_event_field(call, pred->field_name);
if (!field) { if (!field) {
...@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps, ...@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
add_pred_fn: add_pred_fn:
if (!dry_run) if (!dry_run)
return filter_add_pred_fn(ps, call, filter, pred, fn); return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
return 0; return 0;
} }
...@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps) ...@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
return 0; return 0;
} }
static int count_preds(struct filter_parse_state *ps)
{
struct postfix_elt *elt;
int n_preds = 0;
list_for_each_entry(elt, &ps->postfix, list) {
if (elt->op == OP_NONE)
continue;
n_preds++;
}
return n_preds;
}
/*
* The tree is walked at filtering of an event. If the tree is not correctly
* built, it may cause an infinite loop. Check here that the tree does
* indeed terminate.
*/
static int check_pred_tree(struct event_filter *filter,
struct filter_pred *root)
{
struct filter_pred *preds;
struct filter_pred *pred;
enum move_type move = MOVE_DOWN;
int count = 0;
int done = 0;
int max;
/*
* The max that we can hit a node is three times.
* Once going down, once coming up from left, and
* once coming up from right. This is more than enough
* since leafs are only hit a single time.
*/
max = 3 * filter->n_preds;
preds = filter->preds;
if (!preds)
return -EINVAL;
pred = root;
do {
if (WARN_ON(count++ > max))
return -EINVAL;
switch (move) {
case MOVE_DOWN:
if (pred->left != FILTER_PRED_INVALID) {
pred = &preds[pred->left];
continue;
}
/* A leaf at the root is just a leaf in the tree */
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
case MOVE_UP_FROM_LEFT:
pred = &preds[pred->right];
move = MOVE_DOWN;
continue;
case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
}
done = 1;
} while (!done);
/* We are fine. */
return 0;
}
static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
{
struct filter_pred *pred;
enum move_type move = MOVE_DOWN;
int count = 0;
int done = 0;
pred = root;
do {
switch (move) {
case MOVE_DOWN:
if (pred->left != FILTER_PRED_INVALID) {
pred = &preds[pred->left];
continue;
}
/* A leaf at the root is just a leaf in the tree */
if (pred == root)
return 1;
count++;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
case MOVE_UP_FROM_LEFT:
pred = &preds[pred->right];
move = MOVE_DOWN;
continue;
case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
}
done = 1;
} while (!done);
return count;
}
static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
{
struct filter_pred *pred;
enum move_type move = MOVE_DOWN;
int count = 0;
int children;
int done = 0;
/* No need to keep the fold flag */
root->index &= ~FILTER_PRED_FOLD;
/* If the root is a leaf then do nothing */
if (root->left == FILTER_PRED_INVALID)
return 0;
/* count the children */
children = count_leafs(preds, &preds[root->left]);
children += count_leafs(preds, &preds[root->right]);
root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
if (!root->ops)
return -ENOMEM;
root->val = children;
pred = root;
do {
switch (move) {
case MOVE_DOWN:
if (pred->left != FILTER_PRED_INVALID) {
pred = &preds[pred->left];
continue;
}
if (WARN_ON(count == children))
return -EINVAL;
pred->index &= ~FILTER_PRED_FOLD;
root->ops[count++] = pred->index;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
case MOVE_UP_FROM_LEFT:
pred = &preds[pred->right];
move = MOVE_DOWN;
continue;
case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
}
done = 1;
} while (!done);
return 0;
}
/*
* To optimize the processing of the ops, if we have several "ors" or
* "ands" together, we can put them in an array and process them all
* together speeding up the filter logic.
*/
static int fold_pred_tree(struct event_filter *filter,
struct filter_pred *root)
{
struct filter_pred *preds;
struct filter_pred *pred;
enum move_type move = MOVE_DOWN;
int done = 0;
int err;
preds = filter->preds;
if (!preds)
return -EINVAL;
pred = root;
do {
switch (move) {
case MOVE_DOWN:
if (pred->index & FILTER_PRED_FOLD) {
err = fold_pred(preds, pred);
if (err)
return err;
/* Folded nodes are like leafs */
} else if (pred->left != FILTER_PRED_INVALID) {
pred = &preds[pred->left];
continue;
}
/* A leaf at the root is just a leaf in the tree */
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
case MOVE_UP_FROM_LEFT:
pred = &preds[pred->right];
move = MOVE_DOWN;
continue;
case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = get_pred_parent(pred, preds,
pred->parent, &move);
continue;
}
done = 1;
} while (!done);
return 0;
}
static int replace_preds(struct ftrace_event_call *call, static int replace_preds(struct ftrace_event_call *call,
struct event_filter *filter, struct event_filter *filter,
struct filter_parse_state *ps, struct filter_parse_state *ps,
...@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call, ...@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
{ {
char *operand1 = NULL, *operand2 = NULL; char *operand1 = NULL, *operand2 = NULL;
struct filter_pred *pred; struct filter_pred *pred;
struct filter_pred *root;
struct postfix_elt *elt; struct postfix_elt *elt;
struct pred_stack stack = { }; /* init to NULL */
int err; int err;
int n_preds = 0; int n_preds = 0;
n_preds = count_preds(ps);
if (n_preds >= MAX_FILTER_PRED) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC;
}
err = check_preds(ps); err = check_preds(ps);
if (err) if (err)
return err; return err;
if (!dry_run) {
err = __alloc_pred_stack(&stack, n_preds);
if (err)
return err;
err = __alloc_preds(filter, n_preds);
if (err)
goto fail;
}
n_preds = 0;
list_for_each_entry(elt, &ps->postfix, list) { list_for_each_entry(elt, &ps->postfix, list) {
if (elt->op == OP_NONE) { if (elt->op == OP_NONE) {
if (!operand1) if (!operand1)
...@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call, ...@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
operand2 = elt->operand; operand2 = elt->operand;
else { else {
parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
return -EINVAL; err = -EINVAL;
goto fail;
} }
continue; continue;
} }
if (n_preds++ == MAX_FILTER_PRED) { if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC; err = -ENOSPC;
goto fail;
} }
if (elt->op == OP_AND || elt->op == OP_OR) { if (elt->op == OP_AND || elt->op == OP_OR) {
...@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call, ...@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
if (!operand1 || !operand2) { if (!operand1 || !operand2) {
parse_error(ps, FILT_ERR_MISSING_FIELD, 0); parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
return -EINVAL; err = -EINVAL;
goto fail;
} }
pred = create_pred(elt->op, operand1, operand2); pred = create_pred(elt->op, operand1, operand2);
add_pred: add_pred:
if (!pred) if (!pred) {
return -ENOMEM; err = -ENOMEM;
err = filter_add_pred(ps, call, filter, pred, dry_run); goto fail;
}
err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
filter_free_pred(pred); filter_free_pred(pred);
if (err) if (err)
return err; goto fail;
operand1 = operand2 = NULL; operand1 = operand2 = NULL;
} }
return 0; if (!dry_run) {
/* We should have one item left on the stack */
pred = __pop_pred_stack(&stack);
if (!pred)
return -EINVAL;
/* This item is where we start from in matching */
root = pred;
/* Make sure the stack is empty */
pred = __pop_pred_stack(&stack);
if (WARN_ON(pred)) {
err = -EINVAL;
filter->root = NULL;
goto fail;
}
err = check_pred_tree(filter, root);
if (err)
goto fail;
/* Optimize the tree */
err = fold_pred_tree(filter, root);
if (err)
goto fail;
/* We don't set root until we know it works */
barrier();
filter->root = root;
}
err = 0;
fail:
__free_pred_stack(&stack);
return err;
} }
struct filter_list {
struct list_head list;
struct event_filter *filter;
};
static int replace_system_preds(struct event_subsystem *system, static int replace_system_preds(struct event_subsystem *system,
struct filter_parse_state *ps, struct filter_parse_state *ps,
char *filter_string) char *filter_string)
{ {
struct ftrace_event_call *call; struct ftrace_event_call *call;
struct filter_list *filter_item;
struct filter_list *tmp;
LIST_HEAD(filter_list);
bool fail = true; bool fail = true;
int err; int err;
list_for_each_entry(call, &ftrace_events, list) { list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter = call->filter;
if (strcmp(call->class->system, system->name) != 0) if (strcmp(call->class->system, system->name) != 0)
continue; continue;
/* try to see if the filter can be applied */ /*
err = replace_preds(call, filter, ps, filter_string, true); * Try to see if the filter can be applied
* (filter arg is ignored on dry_run)
*/
err = replace_preds(call, NULL, ps, filter_string, true);
if (err) if (err)
goto fail;
}
list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter;
if (strcmp(call->class->system, system->name) != 0)
continue; continue;
/* really apply the filter */ filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
filter_disable_preds(call); if (!filter_item)
err = replace_preds(call, filter, ps, filter_string, false); goto fail_mem;
list_add_tail(&filter_item->list, &filter_list);
filter_item->filter = __alloc_filter();
if (!filter_item->filter)
goto fail_mem;
filter = filter_item->filter;
/* Can only fail on no memory */
err = replace_filter_string(filter, filter_string);
if (err) if (err)
filter_disable_preds(call); goto fail_mem;
else {
err = replace_preds(call, filter, ps, filter_string, false);
if (err) {
filter_disable(call);
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
append_filter_err(ps, filter);
} else
call->flags |= TRACE_EVENT_FL_FILTERED; call->flags |= TRACE_EVENT_FL_FILTERED;
replace_filter_string(filter, filter_string); /*
} * Regardless of if this returned an error, we still
* replace the filter for the call.
*/
filter = call->filter;
call->filter = filter_item->filter;
filter_item->filter = filter;
fail = false; fail = false;
} }
if (fail) { if (fail)
goto fail;
/*
* The calls can still be using the old filters.
* Do a synchronize_sched() to ensure all calls are
* done with them before we free them.
*/
synchronize_sched();
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
__free_filter(filter_item->filter);
list_del(&filter_item->list);
kfree(filter_item);
}
return 0;
fail:
/* No call succeeded */
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
list_del(&filter_item->list);
kfree(filter_item);
}
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL; return -EINVAL;
fail_mem:
/* If any call succeeded, we still need to sync */
if (!fail)
synchronize_sched();
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
__free_filter(filter_item->filter);
list_del(&filter_item->list);
kfree(filter_item);
} }
return 0; return -ENOMEM;
} }
int apply_event_filter(struct ftrace_event_call *call, char *filter_string) int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{ {
int err;
struct filter_parse_state *ps; struct filter_parse_state *ps;
struct event_filter *filter;
struct event_filter *tmp;
int err = 0;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
err = init_preds(call);
if (err)
goto out_unlock;
if (!strcmp(strstrip(filter_string), "0")) { if (!strcmp(strstrip(filter_string), "0")) {
filter_disable_preds(call); filter_disable(call);
remove_filter_string(call->filter); filter = call->filter;
if (!filter)
goto out_unlock;
call->filter = NULL;
/* Make sure the filter is not being used */
synchronize_sched();
__free_filter(filter);
goto out_unlock; goto out_unlock;
} }
...@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) ...@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
if (!ps) if (!ps)
goto out_unlock; goto out_unlock;
filter_disable_preds(call); filter = __alloc_filter();
replace_filter_string(call->filter, filter_string); if (!filter) {
kfree(ps);
goto out_unlock;
}
replace_filter_string(filter, filter_string);
parse_init(ps, filter_ops, filter_string); parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps); err = filter_parse(ps);
if (err) { if (err) {
append_filter_err(ps, call->filter); append_filter_err(ps, filter);
goto out; goto out;
} }
err = replace_preds(call, call->filter, ps, filter_string, false); err = replace_preds(call, filter, ps, filter_string, false);
if (err) if (err) {
append_filter_err(ps, call->filter); filter_disable(call);
else append_filter_err(ps, filter);
} else
call->flags |= TRACE_EVENT_FL_FILTERED; call->flags |= TRACE_EVENT_FL_FILTERED;
out: out:
/*
* Always swap the call filter with the new filter
* even if there was an error. If there was an error
* in the filter, we disable the filter and show the error
* string
*/
tmp = call->filter;
call->filter = filter;
if (tmp) {
/* Make sure the call is done with the filter */
synchronize_sched();
__free_filter(tmp);
}
filter_opstack_clear(ps); filter_opstack_clear(ps);
postfix_clear(ps); postfix_clear(ps);
kfree(ps); kfree(ps);
...@@ -1334,18 +1880,21 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) ...@@ -1334,18 +1880,21 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
int apply_subsystem_event_filter(struct event_subsystem *system, int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string) char *filter_string)
{ {
int err;
struct filter_parse_state *ps; struct filter_parse_state *ps;
struct event_filter *filter;
int err = 0;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
err = init_subsystem_preds(system);
if (err)
goto out_unlock;
if (!strcmp(strstrip(filter_string), "0")) { if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system); filter_free_subsystem_preds(system);
remove_filter_string(system->filter); remove_filter_string(system->filter);
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
synchronize_sched();
filter_free_subsystem_filters(system);
__free_filter(filter);
goto out_unlock; goto out_unlock;
} }
...@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system, ...@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
if (!ps) if (!ps)
goto out_unlock; goto out_unlock;
replace_filter_string(system->filter, filter_string); filter = __alloc_filter();
if (!filter)
goto out;
replace_filter_string(filter, filter_string);
/*
* No event actually uses the system filter
* we can free it without synchronize_sched().
*/
__free_filter(system->filter);
system->filter = filter;
parse_init(ps, filter_ops, filter_string); parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps); err = filter_parse(ps);
...@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event) ...@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
struct event_filter *filter = event->filter; struct event_filter *filter = event->filter;
event->filter = NULL; event->filter = NULL;
__free_preds(filter); __free_filter(filter);
} }
int ftrace_profile_set_filter(struct perf_event *event, int event_id, int ftrace_profile_set_filter(struct perf_event *event, int event_id,
...@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter) if (event->filter)
goto out_unlock; goto out_unlock;
filter = __alloc_preds(); filter = __alloc_filter();
if (IS_ERR(filter)) { if (!filter) {
err = PTR_ERR(filter); err = PTR_ERR(filter);
goto out_unlock; goto out_unlock;
} }
...@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
err = -ENOMEM; err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL); ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps) if (!ps)
goto free_preds; goto free_filter;
parse_init(ps, filter_ops, filter_str); parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps); err = filter_parse(ps);
...@@ -1435,9 +1994,9 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, ...@@ -1435,9 +1994,9 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
postfix_clear(ps); postfix_clear(ps);
kfree(ps); kfree(ps);
free_preds: free_filter:
if (err) if (err)
__free_preds(filter); __free_filter(filter);
out_unlock: out_unlock:
mutex_unlock(&event_mutex); mutex_unlock(&event_mutex);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册