diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c07f43f8eb939f610725478406b122b8c84bd634..00fd822ac6e4562ddf267fb56b31d5d8dec2f8e0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -891,6 +891,7 @@ struct ext4_inode_info { struct ext4_es_tree i_es_tree; rwlock_t i_es_lock; struct list_head i_es_lru; + unsigned int i_es_all_nr; /* protected by i_es_lock */ unsigned int i_es_lru_nr; /* protected by i_es_lock */ unsigned long i_touch_when; /* jiffies of last accessing */ @@ -1331,8 +1332,7 @@ struct ext4_sb_info { /* Reclaim extents from extent status tree */ struct shrinker s_es_shrinker; struct list_head s_es_lru; - unsigned long s_es_last_sorted; - struct percpu_counter s_extent_cache_cnt; + struct ext4_es_stats s_es_stats; struct mb_cache *s_mb_cache; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 95da65c60d834a81fbe3ca099eb54258a16c78e8..09fd5766726223fbd8ae30caa67cc80c9041fe8c 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -11,6 +11,8 @@ */ #include #include +#include +#include #include "ext4.h" #include "extents_status.h" @@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, */ if (!ext4_es_is_delayed(es)) { EXT4_I(inode)->i_es_lru_nr++; - percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); + percpu_counter_inc(&EXT4_SB(inode->i_sb)-> + s_es_stats.es_stats_lru_cnt); } + EXT4_I(inode)->i_es_all_nr++; + percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); + return es; } static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) { + EXT4_I(inode)->i_es_all_nr--; + percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); + /* Decrease the lru counter when this es is not delayed */ if (!ext4_es_is_delayed(es)) { BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); EXT4_I(inode)->i_es_lru_nr--; - percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); + percpu_counter_dec(&EXT4_SB(inode->i_sb)-> + s_es_stats.es_stats_lru_cnt); } kmem_cache_free(ext4_es_cachep, es); @@ -729,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status *es) { struct ext4_es_tree *tree; + struct ext4_es_stats *stats; struct extent_status *es1 = NULL; struct rb_node *node; int found = 0; @@ -765,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, } out: + stats = &EXT4_SB(inode->i_sb)->s_es_stats; if (found) { BUG_ON(!es1); es->es_lblk = es1->es_lblk; es->es_len = es1->es_len; es->es_pblk = es1->es_pblk; + stats->es_stats_cache_hits++; + } else { + stats->es_stats_cache_misses++; } read_unlock(&EXT4_I(inode)->i_es_lock); @@ -931,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *locked_ei) { struct ext4_inode_info *ei; + struct ext4_es_stats *es_stats; struct list_head *cur, *tmp; LIST_HEAD(skipped); + ktime_t start_time; + u64 scan_time; int nr_shrunk = 0; int retried = 0, skip_precached = 1, nr_skipped = 0; + es_stats = &sbi->s_es_stats; + start_time = ktime_get(); spin_lock(&sbi->s_es_lru_lock); retry: @@ -946,7 +966,8 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. */ - if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) + if (percpu_counter_read_positive( + &es_stats->es_stats_lru_cnt) == 0) break; ei = list_entry(cur, struct ext4_inode_info, i_es_lru); @@ -956,7 +977,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, * time. Normally we try hard to avoid shrinking * precached inodes, but we will as a last resort. */ - if ((sbi->s_es_last_sorted < ei->i_touch_when) || + if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || (skip_precached && ext4_test_inode_state(&ei->vfs_inode, EXT4_STATE_EXT_PRECACHED))) { nr_skipped++; @@ -990,7 +1011,7 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, if ((nr_shrunk == 0) && nr_skipped && !retried) { retried++; list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); - sbi->s_es_last_sorted = jiffies; + es_stats->es_stats_last_sorted = jiffies; ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); /* @@ -1008,6 +1029,22 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, if (locked_ei && nr_shrunk == 0) nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); + scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + if (likely(es_stats->es_stats_scan_time)) + es_stats->es_stats_scan_time = (scan_time + + es_stats->es_stats_scan_time*3) / 4; + else + es_stats->es_stats_scan_time = scan_time; + if (scan_time > es_stats->es_stats_max_scan_time) + es_stats->es_stats_max_scan_time = scan_time; + if (likely(es_stats->es_stats_shrunk)) + es_stats->es_stats_shrunk = (nr_shrunk + + es_stats->es_stats_shrunk*3) / 4; + else + es_stats->es_stats_shrunk = nr_shrunk; + + trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, + nr_skipped, retried); return nr_shrunk; } @@ -1018,7 +1055,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink, struct ext4_sb_info *sbi; sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); - nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); return nr; } @@ -1031,7 +1068,7 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk; - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); if (!nr_to_scan) @@ -1043,19 +1080,148 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, return nr_shrunk; } -void ext4_es_register_shrinker(struct ext4_sb_info *sbi) +static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? NULL : SEQ_START_TOKEN; +} + +static void * +ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) +{ + return NULL; +} + +static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) +{ + struct ext4_sb_info *sbi = seq->private; + struct ext4_es_stats *es_stats = &sbi->s_es_stats; + struct ext4_inode_info *ei, *max = NULL; + unsigned int inode_cnt = 0; + + if (v != SEQ_START_TOKEN) + return 0; + + /* here we just find an inode that has the max nr. of objects */ + spin_lock(&sbi->s_es_lru_lock); + list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { + inode_cnt++; + if (max && max->i_es_all_nr < ei->i_es_all_nr) + max = ei; + else if (!max) + max = ei; + } + spin_unlock(&sbi->s_es_lru_lock); + + seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", + percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), + percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); + seq_printf(seq, " %lu/%lu cache hits/misses\n", + es_stats->es_stats_cache_hits, + es_stats->es_stats_cache_misses); + if (es_stats->es_stats_last_sorted != 0) + seq_printf(seq, " %u ms last sorted interval\n", + jiffies_to_msecs(jiffies - + es_stats->es_stats_last_sorted)); + if (inode_cnt) + seq_printf(seq, " %d inodes on lru list\n", inode_cnt); + + seq_printf(seq, "average:\n %llu us scan time\n", + div_u64(es_stats->es_stats_scan_time, 1000)); + seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); + if (inode_cnt) + seq_printf(seq, + "maximum:\n %lu inode (%u objects, %u reclaimable)\n" + " %llu us max scan time\n", + max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, + div_u64(es_stats->es_stats_max_scan_time, 1000)); + + return 0; +} + +static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations ext4_es_seq_shrinker_info_ops = { + .start = ext4_es_seq_shrinker_info_start, + .next = ext4_es_seq_shrinker_info_next, + .stop = ext4_es_seq_shrinker_info_stop, + .show = ext4_es_seq_shrinker_info_show, +}; + +static int +ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = PDE_DATA(inode); + } + + return ret; +} + +static int +ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) +{ + return seq_release(inode, file); +} + +static const struct file_operations ext4_es_seq_shrinker_info_fops = { + .owner = THIS_MODULE, + .open = ext4_es_seq_shrinker_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ext4_es_seq_shrinker_info_release, +}; + +int ext4_es_register_shrinker(struct ext4_sb_info *sbi) { + int err; + INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); - sbi->s_es_last_sorted = 0; + sbi->s_es_stats.es_stats_last_sorted = 0; + sbi->s_es_stats.es_stats_shrunk = 0; + sbi->s_es_stats.es_stats_cache_hits = 0; + sbi->s_es_stats.es_stats_cache_misses = 0; + sbi->s_es_stats.es_stats_scan_time = 0; + sbi->s_es_stats.es_stats_max_scan_time = 0; + err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0); + if (err) + return err; + err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0); + if (err) + goto err1; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&sbi->s_es_shrinker); + err = register_shrinker(&sbi->s_es_shrinker); + if (err) + goto err2; + + if (sbi->s_proc) + proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, + &ext4_es_seq_shrinker_info_fops, sbi); + + return 0; + +err2: + percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); +err1: + percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); + return err; } void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) { + if (sbi->s_proc) + remove_proc_entry("es_shrinker_info", sbi->s_proc); + percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); + percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); unregister_shrinker(&sbi->s_es_shrinker); } diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f1b62a41992059f3cbebd16ee6caaf25ee156283..efd5f970b5013085dec20e421b8ff3ced53246c2 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -64,6 +64,17 @@ struct ext4_es_tree { struct extent_status *cache_es; /* recently accessed extent */ }; +struct ext4_es_stats { + unsigned long es_stats_last_sorted; + unsigned long es_stats_shrunk; + unsigned long es_stats_cache_hits; + unsigned long es_stats_cache_misses; + u64 es_stats_scan_time; + u64 es_stats_max_scan_time; + struct percpu_counter es_stats_all_cnt; + struct percpu_counter es_stats_lru_cnt; +}; + extern int __init ext4_init_es(void); extern void ext4_exit_es(void); extern void ext4_es_init_tree(struct ext4_es_tree *tree); @@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, (pb & ~ES_MASK)); } -extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); +extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_lru_add(struct inode *inode); extern void ext4_es_lru_del(struct inode *inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7194a51eb2174ee5867eebc01162aec42b9b4cf1..487c65b8cff0e79d5600da441c2ba12e8eb8c516 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -820,7 +820,6 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -885,6 +884,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ext4_es_init_tree(&ei->i_es_tree); rwlock_init(&ei->i_es_lock); INIT_LIST_HEAD(&ei->i_es_lru); + ei->i_es_all_nr = 0; ei->i_es_lru_nr = 0; ei->i_touch_when = 0; ei->i_reserved_data_blocks = 0; @@ -3890,12 +3890,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_err_report.data = (unsigned long) sb; /* Register extent status tree shrinker */ - ext4_es_register_shrinker(sbi); - - if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { - ext4_msg(sb, KERN_ERR, "insufficient memory"); + if (ext4_es_register_shrinker(sbi)) goto failed_mount3; - } sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_extent_max_zeroout_kb = 32; @@ -4225,10 +4221,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } -failed_mount3: ext4_es_unregister_shrinker(sbi); +failed_mount3: del_timer_sync(&sbi->s_err_report); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 849aaba75dc884d42f40e47103513d1dabe9fc97..ff4bd1b35246a33482b997d2093dbedbe6079357 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2450,6 +2450,37 @@ TRACE_EVENT(ext4_collapse_range, __entry->offset, __entry->len) ); +TRACE_EVENT(ext4_es_shrink, + TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, + int skip_precached, int nr_skipped, int retried), + + TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nr_shrunk ) + __field( unsigned long long, scan_time ) + __field( int, skip_precached ) + __field( int, nr_skipped ) + __field( int, retried ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nr_shrunk = nr_shrunk; + __entry->scan_time = div_u64(scan_time, 1000); + __entry->skip_precached = skip_precached; + __entry->nr_skipped = nr_skipped; + __entry->retried = retried; + ), + + TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " + "nr_skipped %d retried %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, + __entry->scan_time, __entry->skip_precached, + __entry->nr_skipped, __entry->retried) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */