diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f85f1fb49df8fec3f6edbd5fe6f7d11bdd02452a..f5f3b6c5824053870672faca1a7a2b9265f29f22 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -864,6 +864,7 @@ struct ext4_inode_info { rwlock_t i_es_lock; struct list_head i_es_lru; unsigned int i_es_lru_nr; /* protected by i_es_lock */ + unsigned long i_touch_when; /* jiffies of last accessing */ /* ialloc */ ext4_group_t i_last_alloc_group; @@ -1303,6 +1304,7 @@ struct ext4_sb_info { /* Reclaim extents from extent status tree */ struct shrinker s_es_shrinker; struct list_head s_es_lru; + unsigned long s_es_last_sorted; struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; }; diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d310eb1ab47793b7c2e984609cebf0b..ee018d5f397e6ba88ec0bf21196e1ad0612d4e53 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -10,6 +10,7 @@ * Ext4 extents status tree core functions. */ #include +#include #include "ext4.h" #include "extents_status.h" #include "ext4_extents.h" @@ -291,7 +292,6 @@ void ext4_es_find_delayed_extent_range(struct inode *inode, read_unlock(&EXT4_I(inode)->i_es_lock); - ext4_es_lru_add(inode); trace_ext4_es_find_delayed_extent_range_exit(inode, es); } @@ -672,7 +672,6 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, error: write_unlock(&EXT4_I(inode)->i_es_lock); - ext4_es_lru_add(inode); ext4_es_print_tree(inode); return err; @@ -734,7 +733,6 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, read_unlock(&EXT4_I(inode)->i_es_lock); - ext4_es_lru_add(inode); trace_ext4_es_lookup_extent_exit(inode, es, found); return found; } @@ -878,12 +876,28 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) EXTENT_STATUS_WRITTEN); } +static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct ext4_inode_info *eia, *eib; + eia = list_entry(a, struct ext4_inode_info, i_es_lru); + eib = list_entry(b, struct ext4_inode_info, i_es_lru); + + if (eia->i_touch_when == eib->i_touch_when) + return 0; + if (time_after(eia->i_touch_when, eib->i_touch_when)) + return 1; + else + return -1; +} + static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) { struct ext4_sb_info *sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); struct ext4_inode_info *ei; - struct list_head *cur, *tmp, scanned; + struct list_head *cur, *tmp; + LIST_HEAD(skiped); int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk = 0; @@ -893,23 +907,41 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) if (!nr_to_scan) return ret; - INIT_LIST_HEAD(&scanned); - spin_lock(&sbi->s_es_lru_lock); + + /* + * If the inode that is at the head of LRU list is newer than + * last_sorted time, that means that we need to sort this list. + */ + ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); + if (sbi->s_es_last_sorted < ei->i_touch_when) { + list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); + sbi->s_es_last_sorted = jiffies; + } + list_for_each_safe(cur, tmp, &sbi->s_es_lru) { - list_move_tail(cur, &scanned); + /* + * If we have already reclaimed all extents from extent + * status tree, just stop the loop immediately. + */ + if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) + break; ei = list_entry(cur, struct ext4_inode_info, i_es_lru); - read_lock(&ei->i_es_lock); - if (ei->i_es_lru_nr == 0) { - read_unlock(&ei->i_es_lock); + /* Skip the inode that is newer than the last_sorted time */ + if (sbi->s_es_last_sorted < ei->i_touch_when) { + list_move_tail(cur, &skiped); continue; } - read_unlock(&ei->i_es_lock); + + if (ei->i_es_lru_nr == 0) + continue; write_lock(&ei->i_es_lock); ret = __es_try_to_reclaim_extents(ei, nr_to_scan); + if (ei->i_es_lru_nr == 0) + list_del_init(&ei->i_es_lru); write_unlock(&ei->i_es_lock); nr_shrunk += ret; @@ -917,7 +949,9 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) if (nr_to_scan == 0) break; } - list_splice_tail(&scanned, &sbi->s_es_lru); + + /* Move the newer inodes into the tail of the LRU list. */ + list_splice_tail(&skiped, &sbi->s_es_lru); spin_unlock(&sbi->s_es_lru_lock); ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); @@ -925,21 +959,19 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) return ret; } -void ext4_es_register_shrinker(struct super_block *sb) +void ext4_es_register_shrinker(struct ext4_sb_info *sbi) { - struct ext4_sb_info *sbi; - - sbi = EXT4_SB(sb); INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); + sbi->s_es_last_sorted = 0; sbi->s_es_shrinker.shrink = ext4_es_shrink; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&sbi->s_es_shrinker); } -void ext4_es_unregister_shrinker(struct super_block *sb) +void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) { - unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); + unregister_shrinker(&sbi->s_es_shrinker); } void ext4_es_lru_add(struct inode *inode) @@ -947,11 +979,14 @@ void ext4_es_lru_add(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + ei->i_touch_when = jiffies; + + if (!list_empty(&ei->i_es_lru)) + return; + spin_lock(&sbi->s_es_lru_lock); if (list_empty(&ei->i_es_lru)) list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); - else - list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); spin_unlock(&sbi->s_es_lru_lock); } diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f740eb03b7079b755d3033225a08546ab450c117..e936730cc5b029c45153d47a026028aab8a23283 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -39,6 +39,7 @@ EXTENT_STATUS_DELAYED | \ EXTENT_STATUS_HOLE) +struct ext4_sb_info; struct ext4_extent; struct extent_status { @@ -119,8 +120,8 @@ static inline void ext4_es_store_status(struct extent_status *es, es->es_pblk = block; } -extern void ext4_es_register_shrinker(struct super_block *sb); -extern void ext4_es_unregister_shrinker(struct super_block *sb); +extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); +extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_lru_add(struct inode *inode); extern void ext4_es_lru_del(struct inode *inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0db830d541ecc16b73a5f339a6e2335241a8c9f0..f9ba51f687776c42a13251ca7d9771ea4339df2a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -514,6 +514,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, "logical block %lu\n", inode->i_ino, flags, map->m_len, (unsigned long) map->m_lblk); + ext4_es_lru_add(inode); + /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { @@ -1526,6 +1528,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, "logical block %lu\n", inode->i_ino, map->m_len, (unsigned long) map->m_lblk); + ext4_es_lru_add(inode); + /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, &es)) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 54701fca45153733a4c6dd5fdc1d3a607c0f34a2..cc8201180b3074a03a6a2e0e9e55da7a9560914a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -773,7 +773,7 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, "Couldn't clean up the journal"); } - ext4_es_unregister_shrinker(sb); + ext4_es_unregister_shrinker(sbi); del_timer(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); @@ -862,6 +862,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) rwlock_init(&ei->i_es_lock); INIT_LIST_HEAD(&ei->i_es_lru); ei->i_es_lru_nr = 0; + ei->i_touch_when = 0; ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; @@ -3799,7 +3800,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_err_report.data = (unsigned long) sb; /* Register extent status tree shrinker */ - ext4_es_register_shrinker(sb); + ext4_es_register_shrinker(sbi); err = percpu_counter_init(&sbi->s_freeclusters_counter, ext4_count_free_clusters(sb)); @@ -4127,7 +4128,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_journal = NULL; } failed_mount3: - ext4_es_unregister_shrinker(sb); + ext4_es_unregister_shrinker(sbi); del_timer(&sbi->s_err_report); if (sbi->s_flex_groups) ext4_kvfree(sbi->s_flex_groups);