diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7d81aa6f567266e76cd7e68e7925b2ee44c2ca7e..a26882144e8fa577624a404f41a2dc4269194812 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5150,6 +5150,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) status = ocfs2_flush_truncate_log(osb); if (status < 0) mlog_errno(status); + else + ocfs2_init_inode_steal_slot(osb); mlog_exit(status); } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index b6d07198118c9c3facb4dab1c2bb6359d39cbbcd..ce0dc147602acdb409c339b0812027fa985731c2 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -447,6 +447,8 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, iput(main_bm_inode); out: + if (!status) + ocfs2_init_inode_steal_slot(osb); mlog_exit(status); return status; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ae9ad9587516641366ddcb5753b61a120d9b137b..ab5a2272d0eb4047fe41e74c602c4e939fc18519 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, fe->i_fs_generation = cpu_to_le32(osb->fs_generation); fe->i_blkno = cpu_to_le64(fe_blkno); fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); - fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); + fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); fe->i_uid = cpu_to_le32(current->fsuid); if (dir->i_mode & S_ISGID) { fe->i_gid = cpu_to_le32(dir->i_gid); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9ff5811345a995bf82279aa42fe6fa8e335524cc..31692379c17059154640ed2bd9ed1e09b0d0ff03 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -208,11 +208,14 @@ struct ocfs2_super u32 s_feature_incompat; u32 s_feature_ro_compat; - /* Protects s_next_generaion, osb_flags. Could protect more on - * osb as it's very short lived. */ + /* Protects s_next_generation, osb_flags and s_inode_steal_slot. + * Could protect more on osb as it's very short lived. + */ spinlock_t osb_lock; u32 s_next_generation; unsigned long osb_flags; + s16 s_inode_steal_slot; + atomic_t s_num_inodes_stolen; unsigned long s_mount_opt; unsigned int s_atime_quantum; @@ -537,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) return pages_per_cluster; } +static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) +{ + spin_lock(&osb->osb_lock); + osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; + spin_unlock(&osb->osb_lock); + atomic_set(&osb->s_num_inodes_stolen, 0); +} + +static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, + s16 slot) +{ + spin_lock(&osb->osb_lock); + osb->s_inode_steal_slot = slot; + spin_unlock(&osb->osb_lock); +} + +static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) +{ + s16 slot; + + spin_lock(&osb->osb_lock); + slot = osb->s_inode_steal_slot; + spin_unlock(&osb->osb_lock); + + return slot; +} + #define ocfs2_set_bit ext2_set_bit #define ocfs2_clear_bit ext2_clear_bit #define ocfs2_test_bit ext2_test_bit diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 33d55734c514f9af8b4435e45ae368ee7d9614d5..d2d278fb9819ee3da92577a367b79f0bdc18681f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -49,6 +49,8 @@ #define NOT_ALLOC_NEW_GROUP 0 #define ALLOC_NEW_GROUP 1 +#define OCFS2_MAX_INODES_TO_STEAL 1024 + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); @@ -109,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, u64 *bg_blkno, u16 *bg_bit_off); -void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) +static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) { struct inode *inode = ac->ac_inode; @@ -120,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) mutex_unlock(&inode->i_mutex); iput(inode); + ac->ac_inode = NULL; } - if (ac->ac_bh) + if (ac->ac_bh) { brelse(ac->ac_bh); + ac->ac_bh = NULL; + } +} + +void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) +{ + ocfs2_free_ac_resource(ac); kfree(ac); } @@ -522,10 +532,42 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, return status; } +static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, + struct ocfs2_alloc_context *ac) +{ + int i, status = -ENOSPC; + s16 slot = ocfs2_get_inode_steal_slot(osb); + + /* Start to steal inodes from the first slot after ours. */ + if (slot == OCFS2_INVALID_SLOT) + slot = osb->slot_num + 1; + + for (i = 0; i < osb->max_slots; i++, slot++) { + if (slot == osb->max_slots) + slot = 0; + + if (slot == osb->slot_num) + continue; + + status = ocfs2_reserve_suballoc_bits(osb, ac, + INODE_ALLOC_SYSTEM_INODE, + slot, NOT_ALLOC_NEW_GROUP); + if (status >= 0) { + ocfs2_set_inode_steal_slot(osb, slot); + break; + } + + ocfs2_free_ac_resource(ac); + } + + return status; +} + int ocfs2_reserve_new_inode(struct ocfs2_super *osb, struct ocfs2_alloc_context **ac) { int status; + s16 slot = ocfs2_get_inode_steal_slot(osb); *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); if (!(*ac)) { @@ -539,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, (*ac)->ac_group_search = ocfs2_block_group_search; + /* + * slot is set when we successfully steal inode from other nodes. + * It is reset in 3 places: + * 1. when we flush the truncate log + * 2. when we complete local alloc recovery. + * 3. when we successfully allocate from our own slot. + * After it is set, we will go on stealing inodes until we find the + * need to check our slots to see whether there is some space for us. + */ + if (slot != OCFS2_INVALID_SLOT && + atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) + goto inode_steal; + + atomic_set(&osb->s_num_inodes_stolen, 0); status = ocfs2_reserve_suballoc_bits(osb, *ac, INODE_ALLOC_SYSTEM_INODE, osb->slot_num, ALLOC_NEW_GROUP); + if (status >= 0) { + status = 0; + + /* + * Some inodes must be freed by us, so try to allocate + * from our own next time. + */ + if (slot != OCFS2_INVALID_SLOT) + ocfs2_init_inode_steal_slot(osb); + goto bail; + } else if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + + ocfs2_free_ac_resource(*ac); + +inode_steal: + status = ocfs2_steal_inode_from_other_nodes(osb, *ac); + atomic_inc(&osb->s_num_inodes_stolen); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 96ebe36d5d77f0e292839e1fe505ebe480204e0b..df63ba20ae9016d7957930da638b729d9ab242bb 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1394,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_LIST_HEAD(&osb->blocked_lock_list); osb->blocked_lock_count = 0; spin_lock_init(&osb->osb_lock); + ocfs2_init_inode_steal_slot(osb); atomic_set(&osb->alloc_stats.moves, 0); atomic_set(&osb->alloc_stats.local_data, 0);