diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 4e87b8661af0b2df3b82efde6dd2f1d5f77bbe3c..88f92794811385a5c02ad02a66c7e8b3997a10e7 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -10,7 +10,6 @@ gfs2-y := \ glock.o \ glops.o \ inode.o \ - jdata.o \ lm.o \ log.o \ lops.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index bd194f645c52df0a5797575e2158739284ff18e1..4efcd8a39e988f13a453410bf618ddd139371e66 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -18,12 +18,12 @@ #include "bmap.h" #include "glock.h" #include "inode.h" -#include "jdata.h" #include "meta_io.h" #include "page.h" #include "quota.h" #include "rgrp.h" #include "trans.h" +#include "dir.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -90,7 +90,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, { struct buffer_head *bh, *dibh; uint64_t block = 0; - int journaled = gfs2_is_jdata(ip); + int isdir = gfs2_is_dir(ip); int error; down_write(&ip->i_rw_mutex); @@ -103,10 +103,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, /* Get a free block, fill it with the stuffed data, and write it out to disk */ - if (journaled) { + if (isdir) { block = gfs2_alloc_meta(ip); - error = gfs2_jdata_get_buffer(ip, block, 1, &bh); + error = gfs2_dir_get_buffer(ip, block, 1, &bh); if (error) goto out_brelse; gfs2_buffer_copy_tail(bh, @@ -168,7 +168,7 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) if (ip->i_di.di_size > size) size = ip->i_di.di_size; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { arr = sdp->sd_jheightsize; max = sdp->sd_max_jheight; } else { @@ -377,7 +377,7 @@ static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, return; if (height == ip->i_di.di_height - 1 && - !gfs2_is_jdata(ip)) + !gfs2_is_dir(ip)) *block = gfs2_alloc_data(ip); else *block = gfs2_alloc_meta(ip); @@ -430,7 +430,7 @@ int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) goto out; - bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; height = calc_tree_height(ip, (lblock + 1) * bsize); if (ip->i_di.di_height < height) { @@ -618,7 +618,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, sm->sm_first = 0; } - metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); + metadata = (height != ip->i_di.di_height - 1); if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; @@ -814,33 +814,6 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) return error; } -static int truncator_journaled(struct gfs2_inode *ip, uint64_t size) -{ - uint64_t lbn, dbn; - uint32_t off; - struct buffer_head *bh; - int new = 0; - int error; - - lbn = size; - off = do_div(lbn, ip->i_sbd->sd_jbsize); - - error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); - if (error || !dbn) - return error; - - error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh); - if (error) - return error; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off); - - brelse(bh); - - return 0; -} - static int trunc_start(struct gfs2_inode *ip, uint64_t size) { struct gfs2_sbd *sdp = ip->i_sbd; @@ -866,12 +839,7 @@ static int trunc_start(struct gfs2_inode *ip, uint64_t size) error = 1; } else { - if (journaled) { - uint64_t junk = size; - /* we're just interested in the modulus */ - if (do_div(junk, sdp->sd_jbsize)) - error = truncator_journaled(ip, size); - } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) + if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) error = gfs2_block_truncate_page(ip->i_vnode->i_mapping); if (!error) { @@ -900,10 +868,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size) if (!size) lblock = 0; - else if (gfs2_is_jdata(ip)) { - lblock = size - 1; - do_div(lblock, ip->i_sbd->sd_jbsize); - } else + else lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; find_metapath(ip, lblock, &mp); @@ -1051,7 +1016,7 @@ void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, struct gfs2_sbd *sdp = ip->i_sbd; unsigned int tmp; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; *ind_blocks = 3 * (sdp->sd_max_jheight - 1); } else { @@ -1096,7 +1061,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, return 0; } - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { unsigned int bsize = sdp->sd_jbsize; lblock = offset; do_div(lblock, bsize); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index ada283a0f5f306635c7c7797639cde9675bddb78..c77e18048d98217d16e471792450194fba49f0ac 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -86,8 +86,8 @@ typedef int (*leaf_call_t) (struct gfs2_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no, void *data); -static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) { struct buffer_head *bh; int error = 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index ff6d1c597ee94d41e30a49204f94ce5f87df4fb9..5b01497b3ab337ee8e444800052a004f6374dad2 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -45,5 +45,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, int *alloc_required); +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); #endif /* __DIR_DOT_H__ */ diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e42ae38d67781fe1be5d34538b0bb072511cd39c..214975c6bb22ae1828c553e5a9e652a644ca857d 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -20,6 +20,11 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip) return ip->i_di.di_flags & GFS2_DIF_JDATA; } +static inline int gfs2_is_dir(struct gfs2_inode *ip) +{ + return S_ISDIR(ip->i_di.di_mode); +} + void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_inode_attr_out(struct gfs2_inode *ip); struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip); @@ -72,9 +77,9 @@ static inline int gfs2_lookup_simple(struct inode *dip, char *name, err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip); if (err == 0) { *ipp = gfs2_ip2v(ip); + gfs2_inode_put(ip); if (*ipp == NULL) err = -ENOMEM; - gfs2_inode_put(ip); } return err; } diff --git a/fs/gfs2/jdata.c b/fs/gfs2/jdata.c deleted file mode 100644 index e43eaf133f10e0ce9200500de568eab1e31f1afc..0000000000000000000000000000000000000000 --- a/fs/gfs2/jdata.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "bmap.h" -#include "inode.h" -#include "jdata.h" -#include "meta_io.h" -#include "trans.h" - -int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) -{ - return gfs2_jdata_read_mem(ip, buf, *pos, size); -} - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) -{ - struct buffer_head *bh; - int error = 0; - - if (new) { - bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); - } else { - error = gfs2_meta_read(ip->i_gl, block, - DIO_START | DIO_WAIT, &bh); - if (error) - return error; - if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) { - brelse(bh); - return -EIO; - } - } - - *bhp = bh; - - return 0; -} - -/** - * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read() - * @bh: The buffer to copy from, or NULL meaning zero the buffer - * @buf: The buffer to copy/zero - * @offset: The offset in the buffer to copy from - * @size: The amount of data to copy/zero - * - * Returns: errno - */ - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - if (bh) - memcpy(*buf, bh->b_data + offset, size); - else - memset(*buf, 0, size); - *buf += size; - return 0; -} - -/** - * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read() - * @bh: The buffer - * @buf: The destination of the data - * @offset: The offset into the buffer - * @size: The amount of data to copy - * - * Returns: errno - */ - -int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - int error; - - if (bh) - error = copy_to_user(*buf, bh->b_data + offset, size); - else - error = clear_user(*buf, size); - - if (error) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - read_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - error = copy_fn(dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - brelse(dibh); - } - - return (error) ? error : size; -} - -/** - * gfs2_jdata_read - Read a jdata file - * @ip: The GFS2 Inode - * @buf: The buffer to place result into - * @offset: File offset to begin jdata_readng from - * @size: Amount of data to transfer - * @copy_fn: Function to actually perform the copy - * - * The @copy_fn only copies a maximum of a single block at once so - * we are safe calling it with int arguments. It is done so that - * we don't needlessly put 64bit arguments on the stack and it - * also makes the code in the @copy_fn nicer too. - * - * Returns: The amount of data actually copied or the error - */ - -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset, - unsigned int size, read_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (offset >= ip->i_di.di_size) - return 0; - - if ((offset + size) > ip->i_di.di_size) - size = ip->i_di.di_size - offset; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip)) - return jdata_read_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + - sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 0; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - } - - if (extlen > 1) - gfs2_meta_ra(ip->i_gl, dblock, extlen); - - if (dblock) { - error = gfs2_jdata_get_buffer(ip, dblock, new, &bh); - if (error) - goto fail; - dblock++; - extlen--; - } else - bh = NULL; - - error = copy_fn(bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - - o = sizeof(struct gfs2_meta_header); - } - - return copied; - - fail: - return (copied) ? copied : error; -} - -/** - * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh, - const char **buf, unsigned int offset, unsigned int size) -{ - gfs2_trans_add_bh(ip->i_gl, bh, 1); - memcpy(bh->b_data + offset, *buf, size); - - *buf += size; - - return 0; -} - -/** - * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh, - const char __user **buf, unsigned int offset, unsigned int size) -{ - int error = 0; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - if (copy_from_user(bh->b_data + offset, *buf, size)) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - write_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - error = copy_fn(ip, - dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - if (!error) { - if (ip->i_di.di_size < offset + size) - ip->i_di.di_size = offset + size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - } - - brelse(dibh); - - return (error) ? error : size; -} - -/** - * gfs2_jdata_write - Write bytes to a file - * @ip: The GFS2 inode - * @buf: The buffer containing information to be written - * @offset: The file offset to start writing at - * @size: The amount of data to write - * @copy_fn: Function to do the actual copying - * - * Returns: The number of bytes correctly written or error code - */ - -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset, - unsigned int size, write_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - struct buffer_head *dibh; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip) && - offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) - return jdata_write_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL, NULL); - if (error) - return error; - } - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 1; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - error = -EIO; - if (gfs2_assert_withdraw(sdp, dblock)) - goto fail; - } - - error = gfs2_jdata_get_buffer(ip, dblock, - (amount == sdp->sd_jbsize) ? 1 : new, - &bh); - if (error) - goto fail; - - error = copy_fn(ip, bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - dblock++; - extlen--; - - o = sizeof(struct gfs2_meta_header); - } - - out: - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (ip->i_di.di_size < offset + copied) - ip->i_di.di_size = offset + copied; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - - return copied; - - fail: - if (copied) - goto out; - return error; -} - diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h deleted file mode 100644 index 95e18fcb8f82501e80f7b210483c564bcb8d6a3c..0000000000000000000000000000000000000000 --- a/fs/gfs2/jdata.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#ifndef __FILE_DOT_H__ -#define __FILE_DOT_H__ - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp); - -typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -typedef int (*write_copy_fn_t) (struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy2user(struct buffer_head *bh, char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, - uint64_t offset, unsigned int size, - read_copy_fn_t copy_fn); - -int gfs2_copy_from_mem(struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy_from_user(struct gfs2_inode *ip, - struct buffer_head *bh, const char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, - uint64_t offset, unsigned int size, - write_copy_fn_t copy_fn); - -static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem); -} - -static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem); -} - -#endif /* __FILE_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f6d00130f96fd60d5a0bba94826398dfd67e0093..9b4484d366caad28ba02fec3e12aa0d129f02e59 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -387,8 +387,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); atomic_set(&bh->b_count, 1); bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); - set_bh_page(bh, virt_to_page(real->b_data), - ((unsigned long)real->b_data) & (PAGE_SIZE - 1)); + set_bh_page(bh, real->b_page, bh_offset(real)); bh->b_blocknr = blkno; bh->b_size = sdp->sd_sb.sb_bsize; bh->b_bdev = sdp->sd_vfs->s_bdev; @@ -634,6 +633,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a065f7667238c2840fa2b79eb1b2eff4a3cafbca..dd41863810d7cb8c8f9e6b93cec1e91c0ca763f3 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); } +/** + * databuf_lo_add - Add a databuf to the transaction. + * + * This is used in two distinct cases: + * i) In ordered write mode + * We put the data buffer on a list so that we can ensure that its + * synced to disk at the right time + * ii) In journaled data mode + * We need to journal the data block in the same way as metadata in + * the functions above. The difference is that here we have a tag + * which is two __be64's being the block number (as per meta data) + * and a flag which says whether the data block needs escaping or + * not. This means we need a new log entry for each 251 or so data + * blocks, which isn't an enormous overhead but twice as much as + * for normal metadata blocks. + */ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { - get_transaction->tr_touched = 1; + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_trans *tr = get_transaction; + struct address_space *mapping = bd->bd_bh->b_page->mapping; + struct gfs2_inode *ip = get_v2ip(mapping->host); + tr->tr_touched = 1; + if (!list_empty(&bd->bd_list_tr) && + (ip->i_di.di_flags & GFS2_DIF_JDATA)) { + tr->tr_num_buf++; + gfs2_trans_add_gl(bd->bd_gl); + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_pin(sdp, bd->bd_bh); + } else { + clear_buffer_pinned(bd->bd_bh); + } gfs2_log_lock(sdp); + if (ip->i_di.di_flags & GFS2_DIF_JDATA) + sdp->sd_log_num_jdata++; sdp->sd_log_num_databuf++; list_add(&le->le_list, &sdp->sd_log_le_databuf); gfs2_log_unlock(sdp); } +static int gfs2_check_magic(struct buffer_head *bh) +{ + struct page *page = bh->b_page; + void *kaddr; + __be32 *ptr; + int rv = 0; + + kaddr = kmap_atomic(page, KM_USER0); + ptr = kaddr + bh_offset(bh); + if (*ptr == cpu_to_be32(GFS2_MAGIC)) + rv = 1; + kunmap_atomic(page, KM_USER0); + + return rv; +} + +/** + * databuf_lo_before_commit - Scan the data buffers, writing as we go + * + * Here we scan through the lists of buffers and make the assumption + * that any buffer thats been pinned is being journaled, and that + * any unpinned buffer is an ordered write data buffer and therefore + * will be written back rather than journaled. + */ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) { - struct list_head *head = &sdp->sd_log_le_databuf; LIST_HEAD(started); - struct gfs2_bufdata *bd; - struct buffer_head *bh; + struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; + struct buffer_head *bh = NULL; + unsigned int offset = sizeof(struct gfs2_log_descriptor); + struct gfs2_log_descriptor *ld; + unsigned int limit; + unsigned int total_dbuf = sdp->sd_log_num_databuf; + unsigned int total_jdata = sdp->sd_log_num_jdata; + unsigned int num, n; + __be64 *ptr; - while (!list_empty(head)) { - bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); - list_move(&bd->bd_le.le_list, &started); + offset += (2*sizeof(__be64) - 1); + offset &= ~(2*sizeof(__be64) - 1); + limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); - gfs2_log_lock(sdp); - bh = bd->bd_bh; + /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + /* + * Start writing ordered buffers, write journaled buffers + * into the log along with a header + */ + bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list); + while(total_dbuf) { + num = total_jdata; + if (num > limit) + num = limit; + n = 0; + list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) { + gfs2_log_lock(sdp); + /* An ordered write buffer */ + if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { + list_move(&bd1->bd_le.le_list, &started); + if (bd1 == bd2) { + bd2 = NULL; + bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list); + } + total_dbuf--; + if (bd1->bd_bh) { + get_bh(bd1->bd_bh); + gfs2_log_unlock(sdp); + if (buffer_dirty(bd1->bd_bh)) { + wait_on_buffer(bd1->bd_bh); + ll_rw_block(WRITE, 1, &bd1->bd_bh); + } + brelse(bd1->bd_bh); + continue; + } + gfs2_log_unlock(sdp); + continue; + } else if (bd1->bd_bh) { /* A journaled buffer */ + int magic; + gfs2_log_unlock(sdp); + /* printk(KERN_INFO "journaled buffer\n"); */ + if (!bh) { + bh = gfs2_log_get_buf(sdp); + ld = (struct gfs2_log_descriptor *)bh->b_data; + ptr = (__be64 *)(bh->b_data + offset); + ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD); + ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD); + ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA); + ld->ld_length = cpu_to_be32(num + 1); + ld->ld_data1 = cpu_to_be32(num); + ld->ld_data2 = cpu_to_be32(0); + memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); + } + magic = gfs2_check_magic(bd1->bd_bh); + *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); + *ptr++ = cpu_to_be64((__u64)magic); + clear_buffer_escaped(bd1->bd_bh); + if (unlikely(magic != 0)) + set_buffer_escaped(bd1->bd_bh); + if (n++ > num) + break; + } + } if (bh) { - get_bh(bh); - gfs2_log_unlock(sdp); - if (buffer_dirty(bh)) { - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + bh = NULL; + } + n = 0; + /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) { + if (!bd2->bd_bh) + continue; + /* copy buffer if it needs escaping */ + if (unlikely(buffer_escaped(bd2->bd_bh))) { + void *kaddr; + struct page *page = bd2->bd_bh->b_page; + bh = gfs2_log_get_buf(sdp); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize); + kunmap_atomic(page, KM_USER0); + *(__be32 *)bh->b_data = 0; + } else { + bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); } - brelse(bh); - } else - gfs2_log_unlock(sdp); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + if (++n >= num) + break; + } + bh = NULL; + total_dbuf -= num; + total_jdata -= num; } - + /* printk(KERN_INFO "wait on ordered data buffers\n"); */ + /* Wait on all ordered buffers */ while (!list_empty(&started)) { - bd = list_entry(started.next, struct gfs2_bufdata, - bd_le.le_list); - list_del(&bd->bd_le.le_list); + bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list); + list_del(&bd1->bd_le.le_list); sdp->sd_log_num_databuf--; gfs2_log_lock(sdp); - bh = bd->bd_bh; + bh = bd1->bd_bh; if (bh) { set_v2bd(bh, NULL); gfs2_log_unlock(sdp); @@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) } else gfs2_log_unlock(sdp); - kfree(bd); + kfree(bd1); } + /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */ + /* We've removed all the ordered write bufs here, so only jdata left */ + gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); +} + +static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, + __be64 *ptr, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + struct gfs2_glock *gl = jd->jd_inode->i_gl; + unsigned int blks = be32_to_cpu(ld->ld_data1); + struct buffer_head *bh_log, *bh_ip; + uint64_t blkno; + uint64_t esc; + int error = 0; + + if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) + return 0; + + gfs2_replay_incr_blk(sdp, &start); + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + blkno = be64_to_cpu(*ptr++); + esc = be64_to_cpu(*ptr++); + + sdp->sd_found_blocks++; + + if (gfs2_revoke_check(sdp, blkno, start)) + continue; + + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + /* Unescape */ + if (esc) { + __be32 *eptr = (__be32 *)bh_ip->b_data; + *eptr = cpu_to_be32(GFS2_MAGIC); + } + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + if (error) + break; + + sdp->sd_replayed_blocks++; + } + + return error; +} + +/* FIXME: sort out accounting for log blocks etc. */ + +static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + return; + } + if (pass != 1) + return; + + /* data sync? */ + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + + fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", + jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); +} + +static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_databuf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + sdp->sd_log_num_databuf--; + sdp->sd_log_num_jdata--; + gfs2_unpin(sdp, bd->bd_bh, ai); + brelse(bd->bd_bh); + kfree(bd); + } gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); + gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); } + struct gfs2_log_operations gfs2_glock_lops = { .lo_add = glock_lo_add, .lo_after_commit = glock_lo_after_commit, @@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = { struct gfs2_log_operations gfs2_databuf_lops = { .lo_add = databuf_lo_add, + .lo_incore_commit = buf_lo_incore_commit, .lo_before_commit = databuf_lo_before_commit, + .lo_after_commit = databuf_lo_after_commit, + .lo_scan_elements = databuf_lo_scan_elements, + .lo_after_scan = databuf_lo_after_scan, .lo_name = "databuf" }; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index b6bd2ebfc2cc808153df0998f7904076cf541e78..ef58d43b67eeb85f86be48fcb74bc8a3084527e9 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -547,10 +547,12 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta { struct gfs2_bufdata *bd; - lock_page(bh->b_page); + if (meta) + lock_page(bh->b_page); if (get_v2bd(bh)) { - unlock_page(bh->b_page); + if (meta) + unlock_page(bh->b_page); return; } @@ -563,14 +565,16 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta bd->bd_gl = gl; INIT_LIST_HEAD(&bd->bd_list_tr); - if (meta) + if (meta) { lops_init_le(&bd->bd_le, &gfs2_buf_lops); - else + } else { lops_init_le(&bd->bd_le, &gfs2_databuf_lops); - + get_bh(bh); + } set_v2bd(bh, bd); - unlock_page(bh->b_page); + if (meta) + unlock_page(bh->b_page); } /** diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d611b2ad2e97f087c0bb91863698b738009075fd..b14357e89421dbb8e61701c8302fec7752b9c191 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -20,13 +20,13 @@ #include "bmap.h" #include "glock.h" #include "inode.h" -#include "jdata.h" #include "log.h" #include "meta_io.h" #include "ops_address.h" #include "page.h" #include "quota.h" #include "trans.h" +#include "rgrp.h" /** * gfs2_get_block - Fills in a buffer head with details about a block @@ -149,33 +149,55 @@ static int get_blocks_noalloc(struct inode *inode, sector_t lblock, * * Returns: errno * - * Use Linux VFS block_write_full_page() to write one page, - * using GFS2's get_block_noalloc to find which blocks to write. + * Some of this is copied from block_write_full_page() although we still + * call it to do most of the work. */ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { + struct inode *inode = page->mapping->host; struct gfs2_inode *ip = get_v2ip(page->mapping->host); struct gfs2_sbd *sdp = ip->i_sbd; + loff_t i_size = i_size_read(inode); + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; int error; + int done_trans = 0; atomic_inc(&sdp->sd_ops_address); - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { unlock_page(page); return -EIO; } - if (get_transaction) { - redirty_page_for_writepage(wbc, page); + if (get_transaction) + goto out_ignore; + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); - return 0; + return 0; /* don't care */ } - error = block_write_full_page(page, get_block_noalloc, wbc); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) { + error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); + if (error) + goto out_ignore; + gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); + done_trans = 1; + } + error = block_write_full_page(page, get_block_noalloc, wbc); + if (done_trans) + gfs2_trans_end(sdp); gfs2_meta_cache_flush(ip); - return error; + +out_ignore: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; } /** @@ -226,41 +248,10 @@ static int zero_readpage(struct page *page) return 0; } -/** - * jdata_readpage - readpage that goes through gfs2_jdata_read_mem() - * @ip: - * @page: The page to read - * - * Returns: errno - */ - -static int jdata_readpage(struct gfs2_inode *ip, struct page *page) -{ - void *kaddr; - int ret; - - kaddr = kmap(page); - - ret = gfs2_jdata_read_mem(ip, kaddr, - (uint64_t)page->index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE); - if (ret >= 0) { - if (ret < PAGE_CACHE_SIZE) - memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret); - SetPageUptodate(page); - ret = 0; - } - - kunmap(page); - - unlock_page(page); - - return ret; -} - /** * gfs2_readpage - readpage with locking - * @file: The file to read a page for + * @file: The file to read a page for. N.B. This may be NULL if we are + * reading an internal file. * @page: The page to read * * Returns: errno @@ -270,31 +261,35 @@ static int gfs2_readpage(struct file *file, struct page *page) { struct gfs2_inode *ip = get_v2ip(page->mapping->host); struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_holder gh; int error; atomic_inc(&sdp->sd_ops_address); - if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) { - unlock_page(page); - return -EOPNOTSUPP; - } + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + error = gfs2_glock_nq_m_atime(1, &gh); + if (error) + goto out_unlock; - if (!gfs2_is_jdata(ip)) { - if (gfs2_is_stuffed(ip)) { - if (!page->index) { - error = stuffed_readpage(ip, page); - unlock_page(page); - } else - error = zero_readpage(page); + if (gfs2_is_stuffed(ip)) { + if (!page->index) { + error = stuffed_readpage(ip, page); + unlock_page(page); } else - error = mpage_readpage(page, gfs2_get_block); + error = zero_readpage(page); } else - error = jdata_readpage(ip, page); + error = mpage_readpage(page, gfs2_get_block); if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) error = -EIO; + gfs2_glock_dq_m(1, &gh); + gfs2_holder_uninit(&gh); +out: return error; +out_unlock: + unlock_page(page); + goto out; } /** @@ -312,28 +307,82 @@ static int gfs2_prepare_write(struct file *file, struct page *page, { struct gfs2_inode *ip = get_v2ip(page->mapping->host); struct gfs2_sbd *sdp = ip->i_sbd; + unsigned int data_blocks, ind_blocks, rblocks; + int alloc_required; int error = 0; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from; + loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct gfs2_alloc *al; atomic_inc(&sdp->sd_ops_address); - if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) - return -EOPNOTSUPP; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); + error = gfs2_glock_nq_m_atime(1, &ip->i_gh); + if (error) + goto out_uninit; - if (gfs2_is_stuffed(ip)) { - uint64_t file_size; - file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks); + + error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required); + if (error) + goto out_unlock; - if (file_size > sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode)) { - error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, - page); - if (!error) - error = block_prepare_write(page, from, to, - gfs2_get_block); - } else if (!PageUptodate(page)) + + if (alloc_required) { + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc_put; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_qunlock; + + al->al_requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + + error = gfs2_trans_begin(sdp, rblocks, 0); + if (error) + goto out; + + if (gfs2_is_stuffed(ip)) { + if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, page); + if (error) + goto out; + } else if (!PageUptodate(page)) { error = stuffed_readpage(ip, page); - } else - error = block_prepare_write(page, from, to, gfs2_get_block); + goto out; + } + } + + error = block_prepare_write(page, from, to, gfs2_get_block); + +out: + if (error) { + gfs2_trans_end(sdp); + if (alloc_required) { + gfs2_inplace_release(ip); +out_qunlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); + } +out_unlock: + gfs2_glock_dq_m(1, &ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + } return error; } @@ -354,48 +403,73 @@ static int gfs2_commit_write(struct file *file, struct page *page, struct inode *inode = page->mapping->host; struct gfs2_inode *ip = get_v2ip(inode); struct gfs2_sbd *sdp = ip->i_sbd; - int error; + int error = -EOPNOTSUPP; + struct buffer_head *dibh; + struct gfs2_alloc *al = &ip->i_alloc;; atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) + goto fail_nounlock; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_endtrans; + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + if (gfs2_is_stuffed(ip)) { - struct buffer_head *dibh; uint64_t file_size; void *kaddr; file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - kaddr = kmap(page); + kaddr = kmap_atomic(page, KM_USER0); memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, - (char *)kaddr + from, - to - from); - kunmap(page); - - brelse(dibh); + (char *)kaddr + from, to - from); + kunmap_atomic(page, KM_USER0); SetPageUptodate(page); if (inode->i_size < file_size) i_size_write(inode, file_size); } else { - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) gfs2_page_add_databufs(ip, page, from, to); error = generic_commit_write(file, page, from, to); if (error) goto fail; } + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + gfs2_trans_end(sdp); + if (al->al_requested) { + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + gfs2_glock_dq_m(1, &ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); return 0; - fail: +fail: + brelse(dibh); +fail_endtrans: + gfs2_trans_end(sdp); + if (al->al_requested) { + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + gfs2_glock_dq_m(1, &ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); +fail_nounlock: ClearPageUptodate(page); - return error; } @@ -492,12 +566,16 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *io atomic_inc(&sdp->sd_ops_address); - if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) || - gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + if (gfs2_is_jdata(ip)) return -EINVAL; - if (rw == WRITE && !get_transaction) - gb = get_blocks_noalloc; + if (rw == WRITE) { + return -EOPNOTSUPP; /* for now */ + } else { + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) || + gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + return -EINVAL; + } return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, gb, NULL); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 0f356fc4690ca6ba9e8a3bb6fcb2b610d1099342..56820b39a9934f9caffb94daa722912114a7ae43 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,6 @@ #include "glock.h" #include "glops.h" #include "inode.h" -#include "jdata.h" #include "lm.h" #include "log.h" #include "meta_io.h" @@ -67,10 +67,37 @@ struct filldir_reg { void *fdr_opaque; }; -typedef ssize_t(*do_rw_t) (struct file *file, - char __user *buf, - size_t size, loff_t *offset, - unsigned int num_gh, struct gfs2_holder *ghs); +static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long count = desc->count; + + if (size > count) + size = count; + + kaddr = kmap(page); + memcpy(desc->arg.buf, kaddr + offset, size); + kunmap(page); + + desc->count = count - size; + desc->written += size; + desc->arg.buf += size; + return size; +} + +int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, + char *buf, loff_t *pos, unsigned size) +{ + struct inode *inode = ip->i_vnode; + read_descriptor_t desc; + desc.written = 0; + desc.arg.buf = buf; + desc.count = size; + desc.error = 0; + do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor); + return desc.written ? desc.written : desc.error; +} /** * gfs2_llseek - seek to a location in a file @@ -105,247 +132,114 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) return error; } -static inline unsigned int vma2state(struct vm_area_struct *vma) -{ - if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == - (VM_MAYWRITE | VM_MAYSHARE)) - return LM_ST_EXCLUSIVE; - return LM_ST_SHARED; -} -static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size, - loff_t *offset, do_rw_t operation) +static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) { - struct gfs2_holder *ghs; - unsigned int num_gh = 0; - ssize_t count; - struct super_block *sb = file->f_dentry->d_inode->i_sb; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start = (unsigned long)buf; - unsigned long end = start + size; - int dumping = (current->flags & PF_DUMPCORE); - unsigned int x = 0; - - for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { - if (end <= vma->vm_start) - break; - if (vma->vm_file && - vma->vm_file->f_dentry->d_inode->i_sb == sb) { - num_gh++; - } - } - - ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL); - if (!ghs) { - if (!dumping) - up_read(&mm->mmap_sem); - return -ENOMEM; - } + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + ssize_t retval; - for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { - if (end <= vma->vm_start) - break; - if (vma->vm_file) { - struct inode *inode = vma->vm_file->f_dentry->d_inode; - if (inode->i_sb == sb) - gfs2_holder_init(get_v2ip(inode)->i_gl, - vma2state(vma), 0, &ghs[x++]); - } + retval = filemap_write_and_wait(mapping); + if (retval == 0) { + retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset, + nr_segs); } - - if (!dumping) - up_read(&mm->mmap_sem); - - gfs2_assert(get_v2sdp(sb), x == num_gh); - - count = operation(file, buf, size, offset, num_gh, ghs); - - while (num_gh--) - gfs2_holder_uninit(&ghs[num_gh]); - kfree(ghs); - - return count; + return retval; } /** - * walk_vm - Walk the vmas associated with a buffer for read or write. - * If any of them are gfs2, pass the gfs2 inode down to the read/write - * worker function so that locks can be acquired in the correct order. - * @file: The file to read/write from/to - * @buf: The buffer to copy to/from - * @size: The amount of data requested - * @offset: The current file offset - * @operation: The read or write worker function - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure + * __gfs2_file_aio_read - The main GFS2 read function + * + * N.B. This is almost, but not quite the same as __generic_file_aio_read() + * the important subtle different being that inode->i_size isn't valid + * unless we are holding a lock, and we do this _only_ on the O_DIRECT + * path since otherwise locking is done entirely at the page cache + * layer. */ - -static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size, - loff_t *offset, do_rw_t operation) +static ssize_t __gfs2_file_aio_read(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { + struct file *filp = iocb->ki_filp; + struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host); struct gfs2_holder gh; - - if (current->mm) { - struct super_block *sb = file->f_dentry->d_inode->i_sb; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start = (unsigned long)buf; - unsigned long end = start + size; - int dumping = (current->flags & PF_DUMPCORE); - - if (!dumping) - down_read(&mm->mmap_sem); - - for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { - if (end <= vma->vm_start) - break; - if (vma->vm_file && - vma->vm_file->f_dentry->d_inode->i_sb == sb) - goto do_locks; - } - - if (!dumping) - up_read(&mm->mmap_sem); - } - - return operation(file, buf, size, offset, 0, &gh); - -do_locks: - return walk_vm_hard(file, buf, size, offset, operation); -} - -static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size, - loff_t *offset) -{ - struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); - ssize_t count = 0; - - if (*offset < 0) + ssize_t retval; + unsigned long seg; + size_t count; + + count = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + count += iv->iov_len; + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) return -EINVAL; - if (!access_ok(VERIFY_WRITE, buf, size)) + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) return -EFAULT; + nr_segs = seg; + count -= iv->iov_len; /* This segment is no good */ + break; + } + + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ + if (filp->f_flags & O_DIRECT) { + loff_t pos = *ppos, size; + struct address_space *mapping; + struct inode *inode; + + mapping = filp->f_mapping; + inode = mapping->host; + retval = 0; + if (!count) + goto out; /* skip atime */ + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + retval = gfs2_glock_nq_m_atime(1, &gh); + if (retval) + goto out; - if (!(file->f_flags & O_LARGEFILE)) { - if (*offset >= MAX_NON_LFS) - return -EFBIG; - if (*offset + size > MAX_NON_LFS) - size = MAX_NON_LFS - *offset; - } - - count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user); - - if (count > 0) - *offset += count; - - return count; -} - -/** - * do_read_direct - Read bytes from a file - * @file: The file to read from - * @buf: The buffer to copy into - * @size: The amount of data requested - * @offset: The current file offset - * @num_gh: The number of other locks we need to do the read - * @ghs: the locks we need plus one for our lock - * - * Outputs: Offset - updated according to number of bytes read - * - * Returns: The number of bytes read, errno on failure - */ - -static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size, - loff_t *offset, unsigned int num_gh, - struct gfs2_holder *ghs) -{ - struct inode *inode = file->f_mapping->host; - struct gfs2_inode *ip = get_v2ip(inode); - unsigned int state = LM_ST_DEFERRED; - int flags = 0; - unsigned int x; - ssize_t count = 0; - int error; - - for (x = 0; x < num_gh; x++) - if (ghs[x].gh_gl == ip->i_gl) { - state = LM_ST_SHARED; - flags |= GL_LOCAL_EXCL; - break; + size = i_size_read(inode); + if (pos < size) { + retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs); + if (retval > 0 && !is_sync_kiocb(iocb)) + retval = -EIOCBQUEUED; + if (retval > 0) + *ppos = pos + retval; } - - gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); - - error = gfs2_glock_nq_m(num_gh + 1, ghs); - if (error) + file_accessed(filp); + gfs2_glock_dq_m(1, &gh); + gfs2_holder_uninit(&gh); goto out; + } - error = -EINVAL; - if (gfs2_is_jdata(ip)) - goto out_gunlock; - - if (gfs2_is_stuffed(ip)) { - size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; - - if (((*offset) & mask) || (((unsigned long)buf) & mask)) - goto out_gunlock; - - count = do_jdata_read(file, buf, size & ~mask, offset); - } else - count = generic_file_read(file, buf, size, offset); - - error = 0; - - out_gunlock: - gfs2_glock_dq_m(num_gh + 1, ghs); - - out: - gfs2_holder_uninit(&ghs[num_gh]); - - return (count) ? count : error; -} - -/** - * do_read_buf - Read bytes from a file - * @file: The file to read from - * @buf: The buffer to copy into - * @size: The amount of data requested - * @offset: The current file offset - * @num_gh: The number of other locks we need to do the read - * @ghs: the locks we need plus one for our lock - * - * Outputs: Offset - updated according to number of bytes read - * - * Returns: The number of bytes read, errno on failure - */ - -static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size, - loff_t *offset, unsigned int num_gh, - struct gfs2_holder *ghs) -{ - struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); - ssize_t count = 0; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]); - - error = gfs2_glock_nq_m_atime(num_gh + 1, ghs); - if (error) - goto out; - - if (gfs2_is_jdata(ip)) - count = do_jdata_read(file, buf, size, offset); - else - count = generic_file_read(file, buf, size, offset); - - gfs2_glock_dq_m(num_gh + 1, ghs); - - out: - gfs2_holder_uninit(&ghs[num_gh]); - - return (count) ? count : error; + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { + read_descriptor_t desc; + + desc.written = 0; + desc.arg.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; + desc.error = 0; + do_generic_file_read(filp,ppos,&desc,file_read_actor); + retval += desc.written; + if (desc.error) { + retval = retval ?: desc.error; + break; + } + } + } +out: + return retval; } /** @@ -360,550 +254,49 @@ static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size, * Returns: The number of bytes read, errno on failure */ -static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size, +static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size, loff_t *offset) { - atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file); - - if (file->f_flags & O_DIRECT) - return walk_vm(file, buf, size, offset, do_read_direct); - else - return walk_vm(file, buf, size, offset, do_read_buf); -} - -/** - * grope_mapping - feel up a mapping that needs to be written - * @buf: the start of the memory to be written - * @size: the size of the memory to be written - * - * We do this after acquiring the locks on the mapping, - * but before starting the write transaction. We need to make - * sure that we don't cause recursive transactions if blocks - * need to be allocated to the file backing the mapping. - * - * Returns: errno - */ - -static int grope_mapping(const char __user *buf, size_t size) -{ - const char __user *stop = buf + size; - char c; - - while (buf < stop) { - if (copy_from_user(&c, buf, 1)) - return -EFAULT; - buf += PAGE_CACHE_SIZE; - buf = (const char __user *)PAGE_ALIGN((unsigned long)buf); - } - - return 0; -} - -/** - * do_write_direct_alloc - Write bytes to a file - * @file: The file to write to - * @buf: The buffer to copy from - * @size: The amount of data requested - * @offset: The current file offset - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure - */ - -static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size, - loff_t *offset) -{ - struct inode *inode = file->f_mapping->host; - struct gfs2_inode *ip = get_v2ip(inode); - struct gfs2_sbd *sdp = ip->i_sbd; - struct gfs2_alloc *al = NULL; struct iovec local_iov = { .iov_base = buf, .iov_len = size }; - struct buffer_head *dibh; - unsigned int data_blocks, ind_blocks; - ssize_t count; - int error; - - gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); - - al = gfs2_alloc_get(ip); - - error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto fail; - - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); - if (error) - goto fail_gunlock_q; - - al->al_requested = data_blocks + ind_blocks; - - error = gfs2_inplace_reserve(ip); - if (error) - goto fail_gunlock_q; - - error = gfs2_trans_begin(sdp, - al->al_rgd->rd_ri.ri_length + ind_blocks + - RES_DINODE + RES_STATFS + RES_QUOTA, 0); - if (error) - goto fail_ipres; - - if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - - ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? - (~(S_ISUID | S_ISGID)) : (~S_ISUID); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - } - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL); - if (error) - goto fail_end_trans; - } - - count = generic_file_write_nolock(file, &local_iov, 1, offset); - if (count < 0) { - error = count; - goto fail_end_trans; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - - if (ip->i_di.di_size < inode->i_size) - ip->i_di.di_size = inode->i_size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - - gfs2_trans_end(sdp); + struct kiocb kiocb; + ssize_t ret; - if (file->f_flags & O_SYNC) - gfs2_log_flush_glock(ip->i_gl); - - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - - if (file->f_mapping->nrpages) { - error = filemap_fdatawrite(file->f_mapping); - if (!error) - error = filemap_fdatawait(file->f_mapping); - } - if (error) - return error; - - return count; - - fail_end_trans: - gfs2_trans_end(sdp); - - fail_ipres: - gfs2_inplace_release(ip); - - fail_gunlock_q: - gfs2_quota_unlock(ip); - - fail: - gfs2_alloc_put(ip); + atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file); - return error; -} - -/** - * do_write_direct - Write bytes to a file - * @file: The file to write to - * @buf: The buffer to copy from - * @size: The amount of data requested - * @offset: The current file offset - * @num_gh: The number of other locks we need to do the read - * @gh: the locks we need plus one for our lock - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure - */ - -static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size, - loff_t *offset, unsigned int num_gh, - struct gfs2_holder *ghs) -{ - struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); - struct gfs2_sbd *sdp = ip->i_sbd; - struct gfs2_file *fp = get_v2fp(file); - unsigned int state = LM_ST_DEFERRED; - int alloc_required; - unsigned int x; - size_t s; - ssize_t count = 0; - int error; - - if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags)) - state = LM_ST_EXCLUSIVE; - else - for (x = 0; x < num_gh; x++) - if (ghs[x].gh_gl == ip->i_gl) { - state = LM_ST_EXCLUSIVE; - break; - } - - restart: - gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]); - - error = gfs2_glock_nq_m(num_gh + 1, ghs); - if (error) - goto out; - - error = -EINVAL; - if (gfs2_is_jdata(ip)) - goto out_gunlock; - - if (num_gh) { - error = grope_mapping(buf, size); - if (error) - goto out_gunlock; - } - - if (file->f_flags & O_APPEND) - *offset = ip->i_di.di_size; - - if (!(file->f_flags & O_LARGEFILE)) { - error = -EFBIG; - if (*offset >= MAX_NON_LFS) - goto out_gunlock; - if (*offset + size > MAX_NON_LFS) - size = MAX_NON_LFS - *offset; - } - - if (gfs2_is_stuffed(ip) || - *offset + size > ip->i_di.di_size || - ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID))) - alloc_required = 1; - else { - error = gfs2_write_alloc_required(ip, *offset, size, - &alloc_required); - if (error) - goto out_gunlock; - } - - if (alloc_required && state != LM_ST_EXCLUSIVE) { - gfs2_glock_dq_m(num_gh + 1, ghs); - gfs2_holder_uninit(&ghs[num_gh]); - state = LM_ST_EXCLUSIVE; - goto restart; - } - - if (alloc_required) { - set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); - - /* split large writes into smaller atomic transactions */ - while (size) { - s = gfs2_tune_get(sdp, gt_max_atomic_write); - if (s > size) - s = size; - - error = do_write_direct_alloc(file, buf, s, offset); - if (error < 0) - goto out_gunlock; - - buf += error; - size -= error; - count += error; - } - } else { - struct iovec local_iov = { .iov_base = buf, .iov_len = size }; - struct gfs2_holder t_gh; - - clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - GL_NEVER_RECURSE, &t_gh); - if (error) - goto out_gunlock; - - count = generic_file_write_nolock(file, &local_iov, 1, offset); - - gfs2_glock_dq_uninit(&t_gh); - } - - error = 0; - - out_gunlock: - gfs2_glock_dq_m(num_gh + 1, ghs); - - out: - gfs2_holder_uninit(&ghs[num_gh]); - - return (count) ? count : error; + init_sync_kiocb(&kiocb, filp); + ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + return ret; } -/** - * do_do_write_buf - Write bytes to a file - * @file: The file to write to - * @buf: The buffer to copy from - * @size: The amount of data requested - * @offset: The current file offset - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure - */ - -static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size, - loff_t *offset) +static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos) { - struct inode *inode = file->f_mapping->host; - struct gfs2_inode *ip = get_v2ip(inode); - struct gfs2_sbd *sdp = ip->i_sbd; - struct gfs2_alloc *al = NULL; - struct buffer_head *dibh; - unsigned int data_blocks, ind_blocks; - int alloc_required, journaled; - ssize_t count; - int error; - - journaled = gfs2_is_jdata(ip); - - gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); - - error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required); - if (error) - return error; - - if (alloc_required) { - al = gfs2_alloc_get(ip); - - error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto fail; - - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); - if (error) - goto fail_gunlock_q; - - al->al_requested = data_blocks + ind_blocks; - - error = gfs2_inplace_reserve(ip); - if (error) - goto fail_gunlock_q; - - error = gfs2_trans_begin(sdp, - al->al_rgd->rd_ri.ri_length + - ind_blocks + - ((journaled) ? data_blocks : 0) + - RES_DINODE + RES_STATFS + RES_QUOTA, - 0); - if (error) - goto fail_ipres; - } else { - error = gfs2_trans_begin(sdp, - ((journaled) ? data_blocks : 0) + - RES_DINODE, - 0); - if (error) - goto fail_ipres; - } - - if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - - ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? - (~(S_ISUID | S_ISGID)) : (~S_ISUID); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - } + struct kiocb kiocb; + ssize_t ret; - if (journaled) { - count = gfs2_jdata_write(ip, buf, *offset, size, - gfs2_copy_from_user); - if (count < 0) { - error = count; - goto fail_end_trans; - } - - *offset += count; - } else { - struct iovec local_iov = { .iov_base = buf, .iov_len = size }; - - count = generic_file_write_nolock(file, &local_iov, 1, offset); - if (count < 0) { - error = count; - goto fail_end_trans; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - - if (ip->i_di.di_size < inode->i_size) - ip->i_di.di_size = inode->i_size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - } - - gfs2_trans_end(sdp); - - if (file->f_flags & O_SYNC || IS_SYNC(inode)) { - gfs2_log_flush_glock(ip->i_gl); - error = filemap_fdatawrite(file->f_mapping); - if (error == 0) - error = filemap_fdatawait(file->f_mapping); - if (error) - goto fail_ipres; - } - - if (alloc_required) { - gfs2_assert_warn(sdp, count != size || - al->al_alloced); - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - - return count; - - fail_end_trans: - gfs2_trans_end(sdp); - - fail_ipres: - if (alloc_required) - gfs2_inplace_release(ip); - - fail_gunlock_q: - if (alloc_required) - gfs2_quota_unlock(ip); + atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file); - fail: - if (alloc_required) - gfs2_alloc_put(ip); - - return error; + init_sync_kiocb(&kiocb, filp); + ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + return ret; } -/** - * do_write_buf - Write bytes to a file - * @file: The file to write to - * @buf: The buffer to copy from - * @size: The amount of data requested - * @offset: The current file offset - * @num_gh: The number of other locks we need to do the read - * @gh: the locks we need plus one for our lock - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure - */ - -static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size, - loff_t *offset, unsigned int num_gh, - struct gfs2_holder *ghs) +static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf, + size_t count, loff_t pos) { - struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); - struct gfs2_sbd *sdp = ip->i_sbd; - size_t s; - ssize_t count = 0; - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]); - - error = gfs2_glock_nq_m(num_gh + 1, ghs); - if (error) - goto out; - - if (num_gh) { - error = grope_mapping(buf, size); - if (error) - goto out_gunlock; - } - - if (file->f_flags & O_APPEND) - *offset = ip->i_di.di_size; - - if (!(file->f_flags & O_LARGEFILE)) { - error = -EFBIG; - if (*offset >= MAX_NON_LFS) - goto out_gunlock; - if (*offset + size > MAX_NON_LFS) - size = MAX_NON_LFS - *offset; - } - - /* split large writes into smaller atomic transactions */ - while (size) { - s = gfs2_tune_get(sdp, gt_max_atomic_write); - if (s > size) - s = size; - - error = do_do_write_buf(file, buf, s, offset); - if (error < 0) - goto out_gunlock; - - buf += error; - size -= error; - count += error; - } - - error = 0; + struct file *filp = iocb->ki_filp; + struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - out_gunlock: - gfs2_glock_dq_m(num_gh + 1, ghs); + atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file); - out: - gfs2_holder_uninit(&ghs[num_gh]); - - return (count) ? count : error; + BUG_ON(iocb->ki_pos != pos); + return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); } -/** - * gfs2_write - Write bytes to a file - * @file: The file to write to - * @buf: The buffer to copy from - * @size: The amount of data requested - * @offset: The current file offset - * - * Outputs: Offset - updated according to number of bytes written - * - * Returns: The number of bytes written, errno on failure - */ - -static ssize_t gfs2_write(struct file *file, const char __user *buf, - size_t size, loff_t *offset) -{ - struct inode *inode = file->f_mapping->host; - ssize_t count; - - atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file); - - if (*offset < 0) - return -EINVAL; - if (!access_ok(VERIFY_READ, buf, size)) - return -EFAULT; - - mutex_lock(&inode->i_mutex); - if (file->f_flags & O_DIRECT) - count = walk_vm(file, buf, size, offset, - do_write_direct); - else - count = walk_vm(file, buf, size, offset, do_write_buf); - mutex_unlock(&inode->i_mutex); - - return count; -} /** * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read() @@ -1158,9 +551,6 @@ static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd, unsigned lo if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) { if (!S_ISREG(ip->i_di.di_mode)) goto out; - /* FIXME: Would be nice not to require the following test */ - if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size) - goto out; } if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) { if (!S_ISDIR(ip->i_di.di_mode)) @@ -1246,21 +636,14 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) return error; } - if (gfs2_is_jdata(ip)) { - if (vma->vm_flags & VM_MAYSHARE) - error = -EOPNOTSUPP; - else - vma->vm_ops = &gfs2_vm_ops_private; - } else { - /* This is VM_MAYWRITE instead of VM_WRITE because a call - to mprotect() can turn on VM_WRITE later. */ - - if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == - (VM_MAYSHARE | VM_MAYWRITE)) - vma->vm_ops = &gfs2_vm_ops_sharewrite; - else - vma->vm_ops = &gfs2_vm_ops_private; - } + /* This is VM_MAYWRITE instead of VM_WRITE because a call + to mprotect() can turn on VM_WRITE later. */ + + if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == + (VM_MAYSHARE | VM_MAYWRITE)) + vma->vm_ops = &gfs2_vm_ops_sharewrite; + else + vma->vm_ops = &gfs2_vm_ops_private; gfs2_glock_dq_uninit(&i_gh); @@ -1313,13 +696,6 @@ static int gfs2_open(struct inode *inode, struct file *file) if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) file->f_flags |= O_DIRECT; - /* Don't let the user open O_DIRECT on a jdata file */ - - if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) { - error = -EINVAL; - goto fail_gunlock; - } - gfs2_glock_dq_uninit(&i_gh); } @@ -1446,29 +822,10 @@ static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count, read_actor_t actor, void *target) { struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host); - struct gfs2_holder gh; - ssize_t retval; atomic_inc(&ip->i_sbd->sd_ops_file); - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); - - retval = gfs2_glock_nq_atime(&gh); - if (retval) - goto out; - - if (gfs2_is_jdata(ip)) - retval = -EOPNOTSUPP; - else - retval = generic_file_sendfile(in_file, offset, count, actor, - target); - - gfs2_glock_dq(&gh); - - out: - gfs2_holder_uninit(&gh); - - return retval; + return generic_file_sendfile(in_file, offset, count, actor, target); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) @@ -1567,7 +924,11 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read = gfs2_read, - .write = gfs2_write, + .readv = gfs2_file_readv, + .aio_read = gfs2_file_aio_read, + .write = generic_file_write, + .writev = generic_file_writev, + .aio_write = generic_file_aio_write, .ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index a1b409ce75e1cb4f9d14821497cb12251c6bceaa..8f77bb7896bd6e4e3a05037cccd991a35795e83b 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -155,9 +155,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, if (error) return NULL; - if (gfs2_is_jdata(ip)) - goto out; - set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c index ea31bceac4f288f3e68ab6713c52bbfc7fd78fb3..3542aa6b01c4355b4dd224018200a32fb69b7ddd 100644 --- a/fs/gfs2/page.c +++ b/fs/gfs2/page.c @@ -172,8 +172,8 @@ int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, map_bh(bh, inode->i_sb, block); set_buffer_uptodate(bh); - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) - gfs2_trans_add_databuf(sdp, bh); + if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip)) + gfs2_trans_add_bh(ip->i_gl, bh, 0); mark_buffer_dirty(bh); if (release) { @@ -245,8 +245,8 @@ int gfs2_block_truncate_page(struct address_space *mapping) goto unlock; } - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED/* || gfs2_is_jdata(ip)*/) - gfs2_trans_add_databuf(sdp, bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + gfs2_trans_add_bh(ip->i_gl, bh, 0); kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + offset, 0, length); @@ -273,7 +273,7 @@ void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, end = start + bsize; if (end <= from || start >= to) continue; - gfs2_trans_add_databuf(ip->i_sbd, bh); + gfs2_trans_add_bh(ip->i_gl, bh, 0); } } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 69e8f4e92e572f5e4f964cf6128497eaeaa3ec1c..138fdf559a9aac7436981da6f111eca5e6cbb973 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -43,20 +43,22 @@ #include #include #include +#include #include #include "gfs2.h" #include "bmap.h" #include "glock.h" #include "glops.h" -#include "jdata.h" #include "log.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "super.h" #include "trans.h" +#include "inode.h" #include "ops_file.h" +#include "ops_address.h" #define QUOTA_USER 1 #define QUOTA_GROUP 0 @@ -561,6 +563,81 @@ static void do_qc(struct gfs2_quota_data *qd, int64_t change) up(&sdp->sd_quota_mutex); } +/** + * gfs2_adjust_quota + * + * This function was mostly borrowed from gfs2_block_truncate_page which was + * in turn mostly borrowed from ext3 + */ +static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, + int64_t change, struct gfs2_quota_data *qd) +{ + struct inode *inode = gfs2_ip2v(ip); + struct address_space *mapping = inode->i_mapping; + unsigned long index = loc >> PAGE_CACHE_SHIFT; + unsigned offset = loc & (PAGE_CACHE_SHIFT - 1); + unsigned blocksize, iblock, pos; + struct buffer_head *bh; + struct page *page; + void *kaddr; + __be64 *ptr; + u64 value; + int err = -EIO; + + page = grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + + blocksize = inode->i_sb->s_blocksize; + iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); + + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + + bh = page_buffers(page); + pos = blocksize; + while (offset >= pos) { + bh = bh->b_this_page; + iblock++; + pos += blocksize; + } + + if (!buffer_mapped(bh)) { + gfs2_get_block(inode, iblock, bh, 1); + if (!buffer_mapped(bh)) + goto unlock; + } + + if (PageUptodate(page)) + set_buffer_uptodate(bh); + + if (!buffer_uptodate(bh)) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + goto unlock; + } + + gfs2_trans_add_bh(ip->i_gl, bh, 0); + + kaddr = kmap_atomic(page, KM_USER0); + ptr = (__be64 *)(kaddr + offset); + value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + err = 0; + qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); +#if 0 + qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit); + qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn); +#endif + qd->qd_qb.qb_value = cpu_to_be64(value); +unlock: + unlock_page(page); + page_cache_release(page); + return err; +} + static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; @@ -635,43 +712,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) file_ra_state_init(&ra_state, ip->i_vnode->i_mapping); for (x = 0; x < num_qd; x++) { - char buf[sizeof(struct gfs2_quota)]; - struct gfs2_quota q; - qd = qda[x]; offset = qd2offset(qd); - - /* The quota file may not be a multiple of - sizeof(struct gfs2_quota) bytes. */ - memset(buf, 0, sizeof(struct gfs2_quota)); - - error = gfs2_internal_read(ip, &ra_state, buf, &offset, - sizeof(struct gfs2_quota)); - if (error < 0) + error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, + (struct gfs2_quota_data *)qd->qd_gl->gl_lvb); + if (error) goto out_end_trans; - gfs2_quota_in(&q, buf); - q.qu_value += qda[x]->qd_change_sync; - gfs2_quota_out(&q, buf); - - error = gfs2_jdata_write_mem(ip, buf, offset, - sizeof(struct gfs2_quota)); - if (error < 0) - goto out_end_trans; - else if (error != sizeof(struct gfs2_quota)) { - error = -EIO; - goto out_end_trans; - } - do_qc(qd, -qd->qd_change_sync); - - memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb)); - qd->qd_qb.qb_magic = GFS2_MAGIC; - qd->qd_qb.qb_limit = q.qu_limit; - qd->qd_qb.qb_warn = q.qu_warn; - qd->qd_qb.qb_value = q.qu_value; - - gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb); } error = 0; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index b014591fa4a45db665495774856dc0f2d79b8f2e..104e664fa182d27c8e6c9de0a4c472c423edc1db 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -154,14 +154,13 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) gfs2_attach_bufdata(gl, bh, meta); bd = get_v2bd(bh); } - lops_add(sdp, &bd->bd_le); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno) { struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), - GFP_KERNEL | __GFP_NOFAIL); + GFP_NOFS | __GFP_NOFAIL); lops_init_le(&rv->rv_le, &gfs2_revoke_lops); rv->rv_blkno = blkno; lops_add(sdp, &rv->rv_le); @@ -197,19 +196,3 @@ void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) lops_add(rgd->rd_sbd, &rgd->rd_le); } -void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh) -{ - struct gfs2_bufdata *bd; - - bd = get_v2bd(bh); - if (!bd) { - bd = kmalloc(sizeof(struct gfs2_bufdata), - GFP_NOFS | __GFP_NOFAIL); - lops_init_le(&bd->bd_le, &gfs2_databuf_lops); - get_bh(bh); - bd->bd_bh = bh; - set_v2bd(bh, bd); - lops_add(sdp, &bd->bd_le); - } -} - diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 5a7da1e853c9da3307bf625a8e343807f6b71e98..f7f3e2a3d5905d5c67e0c0653b800e97714f64ad 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -35,6 +35,5 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno); void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno); void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); -void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh); #endif /* __TRANS_DOT_H__ */ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index ad49153c33d1411d89bda8cb7e920e3967322d40..4fb1704aac10d8093126789eda90737e251dec25 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -50,6 +50,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", sdp->sd_fsname, assertion, sdp->sd_fsname, function, file, line); + dump_stack(); return (me) ? -1 : -2; } @@ -75,6 +76,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, if (sdp->sd_args.ar_debug) BUG(); + else + dump_stack(); sdp->sd_last_warning = jiffies; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index f1302e2616daec3547acd1eeee3829a08ea31fa8..99d7ae4f6b7e64500ca06863823968308fd651c0 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -336,6 +336,10 @@ struct gfs2_log_header { /* ld_data1 is the number of revoke blocks in the descriptor. ld_data2 is unused. */ +#define GFS2_LOG_DESC_JDATA 302 +/* ld_data1 is the number of data blocks in the descriptor. + ld_data2 is unused. */ + struct gfs2_log_descriptor { struct gfs2_meta_header ld_header; @@ -400,6 +404,7 @@ struct gfs2_quota_change { __be32 qc_id; }; +#ifdef __KERNEL__ /* Translation functions */ extern void gfs2_inum_in(struct gfs2_inum *no, char *buf); @@ -444,4 +449,6 @@ extern void gfs2_statfs_change_print(struct gfs2_statfs_change *sc); extern void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut); extern void gfs2_quota_change_print(struct gfs2_quota_change *qc); +#endif /* __KERNEL__ */ + #endif /* __GFS2_ONDISK_DOT_H__ */