From e90c01e148b967d30caf59e76accb3a58ca6b74b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 12 May 2006 12:09:15 -0400 Subject: [PATCH] [GFS2] Reverse block order in build_height The original code ordered the blocks allocated in the build_height routine backwards causing excessive disk seeks during a read of the metadata. This patch reverses the order to try and reduce disk seeks. Example: A five level metadata tree, I = Inode, P = Pointers, D = Data You need to read the blocks in the order: I P5 P4 P3 P2 P1 D in order to read a single data block. The new code now orders the blocks in this way. The old code used to order them as: I P1 P2 P3 P4 P5 D requiring two extra seeks on average. Note that for files which are grown by gradual extension rather than by truncate or by llseek/write at a large offset, this doesn't apply. In the case of writing to a file linearly, this routine will only be called upon to extend the height of the tree by one block at a time, so the ordering is determined by when its called rather than by the internals of the routine itself. Optimising that part of the ordering is a much harder problem. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 102 ++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 56 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 474b9a16f0f5..31c3e92820e4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -164,72 +164,62 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) * @ip: The GFS2 inode * @height: The height to build to * - * This routine makes sure that the metadata tree is tall enough to hold - * "size" bytes of data. * * Returns: errno */ -static int build_height(struct gfs2_inode *ip, int height) +static int build_height(struct inode *inode, unsigned height) { - struct gfs2_sbd *sdp = ip->i_sbd; - struct buffer_head *bh, *dibh; - uint64_t block = 0, *bp; - unsigned int x; - int new_block; + struct gfs2_inode *ip = inode->u.generic_ip; + unsigned new_height = height - ip->i_di.di_height; + struct buffer_head *dibh; + struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; int error; + u64 *bp; + u64 bn; + unsigned n; - while (ip->i_di.di_height < height) { - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - new_block = 0; - bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); - for (x = 0; x < sdp->sd_diptrs; x++, bp++) - if (*bp) { - new_block = 1; - break; - } - - if (new_block) { - /* Get a new block, fill it with the old direct - pointers, and write it out */ + if (height <= ip->i_di.di_height) + return 0; - block = gfs2_alloc_meta(ip); + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; - bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, - GFS2_METATYPE_IN, + for(n = 0; n < new_height; n++) { + bn = gfs2_alloc_meta(ip); + blocks[n] = gfs2_meta_new(ip->i_gl, bn); + gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); + } + + n = 0; + bn = blocks[0]->b_blocknr; + if (new_height > 1) { + for(; n < new_height-1; n++) { + gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); - gfs2_buffer_copy_tail(bh, - sizeof(struct gfs2_meta_header), - dibh, sizeof(struct gfs2_dinode)); - - brelse(bh); - } - - /* Set up the new direct pointer and write it out to disk */ - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); - - if (new_block) { - *(uint64_t *)(dibh->b_data + - sizeof(struct gfs2_dinode)) = - cpu_to_be64(block); - ip->i_di.di_blocks++; + gfs2_buffer_clear_tail(blocks[n], + sizeof(struct gfs2_meta_header)); + bp = (u64 *)(blocks[n]->b_data + + sizeof(struct gfs2_meta_header)); + *bp = cpu_to_be64(blocks[n+1]->b_blocknr); + brelse(blocks[n]); + blocks[n] = NULL; } - - ip->i_di.di_height++; - - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); } - - return 0; + gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); + gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + brelse(blocks[n]); + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); + *bp = cpu_to_be64(bn); + ip->i_di.di_height += new_height; + ip->i_di.di_blocks += new_height; + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + return error; } /** @@ -416,7 +406,7 @@ static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock, if (!create) goto out; - error = build_height(ip, height); + error = build_height(inode, height); if (error) goto out; } @@ -806,7 +796,7 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) h = calc_tree_height(ip, size); if (ip->i_di.di_height < h) { down_write(&ip->i_rw_mutex); - error = build_height(ip, h); + error = build_height(ip->i_vnode, h); up_write(&ip->i_rw_mutex); if (error) goto out_end_trans; -- GitLab