提交 83cd9d23 编写于 作者: X Xiaoguang Wang 提交者: Shile Zhang

alinux: jbd2: track slow handle which is preventing transaction committing

While transaction is going to commit, it first sets its state to be
T_LOCKED and waits all outstanding handles to complete, and the
committing transaction will always be in locked state so long as it
has outstanding handles, also the whole fs will be locked and all later
fs modification operations will be stucked in wait_transaction_locked().

It's hard to tell why handles are that slow, so here we add a new staic
tracepoint to track such slow handle, and show io wait time and sched
wait time, output likes below:
  fsstress-20347 [024] ....  1570.305454: jbd2_slow_handle_stats: dev 254,17
tid 15853 type 4 line_no 3101 interval 126 sync 0 requested_blocks 24
dirtied_blocks 0 trans_wait 122 space_wait 0 sched_wait 0 io_wait 126

"trans_wait 122" means that this current committing transaction has been
locked for 122ms, due to this handle is not completed quickly.

From "io_wait 126", we can see that io is the major reason.

In this patch, we also add a per fs control file used to determine
whether a handle can be considered to be slow.
    /proc/fs/jbd2/vdb1-8/stall_thresh
default value is 100ms, users can set new threshold by echoing new value
to this file.

Later I also plan to add a proc file fs per fs to record these info.
Reviewed-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
上级 2864376f
......@@ -422,6 +422,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
write_lock(&journal->j_state_lock);
J_ASSERT(commit_transaction->t_state == T_RUNNING);
commit_transaction->t_state = T_LOCKED;
WRITE_ONCE(commit_transaction->t_locked_time, jiffies);
trace_jbd2_commit_locking(journal, commit_transaction);
stats.run.rs_wait = commit_transaction->t_max_wait;
......
......@@ -1299,6 +1299,55 @@ static const struct file_operations jbd2_seq_force_copy_fops = {
.release = single_release,
};
static int jbd2_seq_stall_thresh_show(struct seq_file *m, void *v)
{
journal_t *journal = m->private;
seq_printf(m, "%lu\n", journal->j_stall_thresh);
return 0;
}
static int jbd2_seq_stall_thresh_open(struct inode *inode, struct file *filp)
{
journal_t *journal = PDE_DATA(inode);
return single_open(filp, jbd2_seq_stall_thresh_show, journal);
}
static ssize_t jbd2_seq_stall_thresh_write(struct file *file,
const char __user *buf, size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
journal_t *journal = PDE_DATA(inode);
char buffer[PROC_NUMBUF];
unsigned long long stall_thresh;
int err;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}
err = kstrtoull(strstrip(buffer), 0, &stall_thresh);
if (err)
goto out;
WRITE_ONCE(journal->j_stall_thresh, stall_thresh);
out:
return err < 0 ? err : count;
}
static const struct file_operations jbd2_seq_stall_thresh_fops = {
.owner = THIS_MODULE,
.open = jbd2_seq_stall_thresh_open,
.read = seq_read,
.write = jbd2_seq_stall_thresh_write,
.llseek = seq_lseek,
.release = single_release,
};
static struct proc_dir_entry *proc_jbd2_stats;
static void jbd2_stats_proc_init(journal_t *journal)
......@@ -1311,6 +1360,8 @@ static void jbd2_stats_proc_init(journal_t *journal)
&jbd2_seq_force_copy_fops, journal);
proc_create_data("stats", S_IRUGO, journal->j_proc_entry,
&jbd2_seq_stats_fops, journal);
proc_create_data("stall_thresh", 0644, journal->j_proc_entry,
&jbd2_seq_stall_thresh_fops, journal);
}
}
......@@ -1319,6 +1370,7 @@ static void jbd2_stats_proc_exit(journal_t *journal)
remove_proc_entry("info", journal->j_proc_entry);
remove_proc_entry("force_copy", journal->j_proc_entry);
remove_proc_entry("stats", journal->j_proc_entry);
remove_proc_entry("stall_thresh", journal->j_proc_entry);
remove_proc_entry(journal->j_devname, proc_jbd2_stats);
}
......@@ -1361,6 +1413,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
journal->j_min_batch_time = 0;
journal->j_max_batch_time = 15000; /* 15ms */
journal->j_stall_thresh = JBD2_DEFAULT_TRANS_STALL_THRESH;
atomic_set(&journal->j_reserved_credits, 0);
/* The journal is marked for error until we succeed with recovery! */
......
......@@ -402,6 +402,9 @@ static handle_t *new_handle(int nblocks)
return NULL;
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
handle->h_pre_start_jiffies = jiffies;
handle->h_sched_wait_sum = current->se.statistics.wait_sum;
handle->h_io_wait_sum = current->se.statistics.iowait_sum;
return handle;
}
......@@ -1803,6 +1806,34 @@ int jbd2_journal_stop(handle_t *handle)
wait_for_commit = 1;
}
do {
unsigned long transaction_locked_time, delta;
unsigned long journal_space_wait;
u64 sched_wait, io_wait;
transaction_locked_time = READ_ONCE(transaction->t_locked_time);
if (!transaction_locked_time)
break;
delta = jiffies_to_msecs(jiffies - transaction_locked_time);
if (delta < READ_ONCE(journal->j_stall_thresh))
break;
journal_space_wait = handle->h_start_jiffies -
handle->h_pre_start_jiffies;
sched_wait = current->se.statistics.wait_sum -
handle->h_sched_wait_sum;
io_wait = current->se.statistics.iowait_sum -
handle->h_io_wait_sum;
trace_jbd2_slow_handle_stats(journal->j_fs_dev->bd_dev,
transaction->t_tid, handle->h_type, handle->h_line_no,
jiffies - handle->h_start_jiffies, handle->h_sync,
handle->h_requested_credits,
handle->h_requested_credits - handle->h_buffer_credits,
delta, jiffies_to_msecs(journal_space_wait),
sched_wait / NSEC_PER_MSEC, io_wait / NSEC_PER_MSEC);
} while (0);
/*
* Once we drop t_updates, if it goes to zero the transaction
* could start committing on us and eventually disappear. So
......
......@@ -519,7 +519,10 @@ struct jbd2_journal_handle
unsigned int h_type: 8;
unsigned int h_line_no: 16;
unsigned long h_pre_start_jiffies;
unsigned long h_start_jiffies;
u64 h_sched_wait_sum;
u64 h_io_wait_sum;
unsigned int h_requested_credits;
unsigned int saved_alloc_context;
......@@ -724,6 +727,9 @@ struct transaction_s
* structures associated with the transaction
*/
struct list_head t_private_list;
/* When this transaction is locked */
unsigned long t_locked_time;
};
struct transaction_run_stats_s {
......@@ -1149,6 +1155,15 @@ struct journal_s
*/
unsigned int j_force_copy;
/**
* @j_stall_thresh: when transaction is locked and there are still
* outstanding handles, such handles will prevent transaction
* committing, trace these handles if they have stalled the transaction
* for @j_stall_thresh time, unit is millisecond, default 100ms.
*/
#define JBD2_DEFAULT_TRANS_STALL_THRESH 100
unsigned long j_stall_thresh;
/**
* @j_failed_commit: Failed journal commit ID.
*/
......
......@@ -231,6 +231,56 @@ TRACE_EVENT(jbd2_handle_stats,
__entry->dirtied_blocks)
);
TRACE_EVENT(jbd2_slow_handle_stats,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int interval, int sync,
int requested_blocks, int dirtied_blocks,
unsigned long trans_wait, unsigned long space_wait,
u64 sched_wait, u64 io_wait),
TP_ARGS(dev, tid, type, line_no, interval, sync,
requested_blocks, dirtied_blocks, trans_wait, space_wait,
sched_wait, io_wait),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned long, tid )
__field( unsigned int, type )
__field( unsigned int, line_no )
__field( int, interval )
__field( int, sync )
__field( int, requested_blocks)
__field( int, dirtied_blocks )
__field( unsigned long, trans_wait )
__field( unsigned long, space_wait )
__field( u64, sched_wait )
__field( u64, io_wait )
),
TP_fast_assign(
__entry->dev = dev;
__entry->tid = tid;
__entry->type = type;
__entry->line_no = line_no;
__entry->interval = interval;
__entry->sync = sync;
__entry->requested_blocks = requested_blocks;
__entry->dirtied_blocks = dirtied_blocks;
__entry->trans_wait = trans_wait;
__entry->space_wait = space_wait;
__entry->sched_wait = sched_wait;
__entry->io_wait = io_wait;
),
TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d "
"sync %d requested_blocks %d dirtied_blocks %d "
"trans_wait %lu space_wait %lu sched_wait %llu io_wait %llu",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
__entry->type, __entry->line_no, __entry->interval,
__entry->sync, __entry->requested_blocks,
__entry->dirtied_blocks, __entry->trans_wait,
__entry->space_wait, __entry->sched_wait, __entry->io_wait)
);
TRACE_EVENT(jbd2_run_stats,
TP_PROTO(dev_t dev, unsigned long tid,
struct transaction_run_stats_s *stats),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册