提交 bea9a6d2 编写于 作者: L Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
  ocfs2: Silence gcc warning in ocfs2_write_zero_page().
  jbd2/ocfs2: Fix block checksumming when a buffer is used in several transactions
  ocfs2/dlm: Remove BUG_ON from migration in the rare case of a down node
  ocfs2: Don't duplicate pages past i_size during CoW.
  ocfs2: tighten up strlen() checking
  ocfs2: Make xattr reflink work with new local alloc reservation.
  ocfs2: make xattr extension work with new local alloc reservation.
  ocfs2: Remove the redundant cpu_to_le64.
  ocfs2/dlm: don't access beyond bitmap size
  ocfs2: No need to zero pages past i_size.
  ocfs2: Zero the tail cluster when extending past i_size.
  ocfs2: When zero extending, do it by page.
  ocfs2: Limit default local alloc size within bitmap range.
  ocfs2: Move orphan scan work to ocfs2_wq.
  fs/ocfs2/dlm: Add missing spin_unlock
......@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
struct jbd2_buffer_trigger_type *triggers;
journal_t *journal = transaction->t_journal;
/*
......@@ -328,21 +327,21 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
* Fire any commit trigger. Do this before checking for escaping,
* as the trigger may modify the magic offset. If a copy-out
* happens afterwards, it will have the correct data in the buffer.
* Fire data frozen trigger if data already wasn't frozen. Do this
* before checking for escaping, as the trigger may modify the magic
* offset. If a copy-out happens afterwards, it will have the correct
* data in the buffer.
*/
jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
triggers);
if (!done_copy_out)
jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
jh_in->b_triggers);
/*
* Check for escaping
......
......@@ -725,6 +725,9 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap_atomic(page, KM_USER0);
/* Fire data frozen trigger just before we copy the data */
jbd2_buffer_frozen_trigger(jh, source + offset,
jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
......@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
jh->b_triggers = type;
}
void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
if (!triggers || !triggers->t_commit)
if (!triggers || !triggers->t_frozen)
return;
triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
......
......@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
dump_stack();
goto bail;
}
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
(unsigned long long)past_eof);
if (create && (iblock >= past_eof))
set_buffer_new(bh_result);
}
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
(unsigned long long)past_eof);
if (create && (iblock >= past_eof))
set_buffer_new(bh_result);
bail:
if (err < 0)
err = -EIO;
......@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
return ret;
}
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
unsigned from,
unsigned to)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle;
int ret = 0;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
if (ret < 0)
mlog_errno(ret);
}
out:
if (ret) {
if (!IS_ERR(handle))
ocfs2_commit_trans(osb, handle);
handle = ERR_PTR(ret);
}
return handle;
}
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
{
sector_t status;
......@@ -1131,23 +1100,37 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
*/
static int ocfs2_grab_pages_for_write(struct address_space *mapping,
struct ocfs2_write_ctxt *wc,
u32 cpos, loff_t user_pos, int new,
u32 cpos, loff_t user_pos,
unsigned user_len, int new,
struct page *mmap_page)
{
int ret = 0, i;
unsigned long start, target_index, index;
unsigned long start, target_index, end_index, index;
struct inode *inode = mapping->host;
loff_t last_byte;
target_index = user_pos >> PAGE_CACHE_SHIFT;
/*
* Figure out how many pages we'll be manipulating here. For
* non allocating write, we just change the one
* page. Otherwise, we'll need a whole clusters worth.
* page. Otherwise, we'll need a whole clusters worth. If we're
* writing past i_size, we only need enough pages to cover the
* last page of the write.
*/
if (new) {
wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
/*
* We need the index *past* the last page we could possibly
* touch. This is the page past the end of the write or
* i_size, whichever is greater.
*/
last_byte = max(user_pos + user_len, i_size_read(inode));
BUG_ON(last_byte < 1);
end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
if ((start + wc->w_num_pages) > end_index)
wc->w_num_pages = end_index - start;
} else {
wc->w_num_pages = 1;
start = target_index;
......@@ -1620,21 +1603,20 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
unsigned len,
static int ocfs2_expand_nonsparse_inode(struct inode *inode,
struct buffer_head *di_bh,
loff_t pos, unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
if (ocfs2_sparse_alloc(osb))
return 0;
BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
if (newsize <= i_size_read(inode))
return 0;
ret = ocfs2_extend_no_holes(inode, newsize, pos);
ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
if (ret)
mlog_errno(ret);
......@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
return ret;
}
static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
loff_t pos)
{
int ret = 0;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
if (pos > i_size_read(inode))
ret = ocfs2_zero_extend(inode, di_bh, pos);
return ret;
}
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
......@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
}
ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
if (ocfs2_sparse_alloc(osb))
ret = ocfs2_zero_tail(inode, di_bh, pos);
else
ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
wc);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* that we can zero and flush if we error after adding the
* extent.
*/
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
......
......@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
struct dlm_ctxt *dlm = NULL;
struct dlm_ctxt *new_ctxt = NULL;
if (strlen(domain) > O2NM_MAX_NAME_LEN) {
if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
ret = -ENAMETOOLONG;
mlog(ML_ERROR, "domain name length too long\n");
goto leave;
......@@ -1709,6 +1709,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
}
if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
spin_unlock(&dlm_domain_lock);
mlog(ML_ERROR,
"Requested locking protocol version is not "
"compatible with already registered domain "
......
......@@ -2808,14 +2808,8 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
mlog(0, "trying again...\n");
goto again;
}
/* now that we are sure the MIGRATING state is there, drop
* the unneded state which blocked threads trying to DIRTY */
spin_lock(&res->spinlock);
BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
spin_unlock(&res->spinlock);
ret = 0;
/* did the target go down or die? */
spin_lock(&dlm->spinlock);
if (!test_bit(target, dlm->domain_map)) {
......@@ -2825,10 +2819,22 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
}
spin_unlock(&dlm->spinlock);
/*
* if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
* another try; otherwise, we are sure the MIGRATING state is there,
* drop the unneded state which blocked threads trying to DIRTY
*/
spin_lock(&res->spinlock);
BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
if (!ret)
BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
spin_unlock(&res->spinlock);
/*
* at this point:
*
* o the DLM_LOCK_RES_MIGRATING flag is set
* o the DLM_LOCK_RES_MIGRATING flag is set if target not down
* o there are no pending asts on this lockres
* o all processes trying to reserve an ast on this
* lockres must wait for the MIGRATING flag to clear
......
......@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
......
......@@ -724,28 +724,55 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
return status;
}
/*
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
*/
static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle = NULL;
int ret = 0;
if (!ocfs2_should_order_data(inode))
goto out;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
ret = ocfs2_jbd2_file_inode(handle, inode);
if (ret < 0)
mlog_errno(ret);
out:
if (ret) {
if (!IS_ERR(handle))
ocfs2_commit_trans(osb, handle);
handle = ERR_PTR(ret);
}
return handle;
}
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
static int ocfs2_write_zero_page(struct inode *inode,
u64 size)
static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
u64 abs_to)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
unsigned long index;
unsigned int offset;
unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
handle_t *handle = NULL;
int ret;
int ret = 0;
unsigned zero_from, zero_to, block_start, block_end;
offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
/* ugh. in prepare/commit_write, if from==to==start of block, we
** skip the prepare. make sure we never send an offset for the start
** of a block
*/
if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
offset++;
}
index = size >> PAGE_CACHE_SHIFT;
BUG_ON(abs_from >= abs_to);
BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
BUG_ON(abs_from & (inode->i_blkbits - 1));
page = grab_cache_page(mapping, index);
if (!page) {
......@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
goto out;
}
ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
/* Get the offsets within the page that we want to zero */
zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
if (!zero_to)
zero_to = PAGE_CACHE_SIZE;
if (ocfs2_should_order_data(inode)) {
handle = ocfs2_start_walk_page_trans(inode, page, offset,
offset);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
mlog(0,
"abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
(unsigned long long)abs_from, (unsigned long long)abs_to,
index, zero_from, zero_to);
/* We know that zero_from is block aligned */
for (block_start = zero_from; block_start < zero_to;
block_start = block_end) {
block_end = block_start + (1 << inode->i_blkbits);
/*
* block_start is block-aligned. Bump it by one to
* force ocfs2_{prepare,commit}_write() to zero the
* whole block.
*/
ret = ocfs2_prepare_write_nolock(inode, page,
block_start + 1,
block_start + 1);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
}
/* must not update i_size! */
ret = block_commit_write(page, offset, offset);
if (ret < 0)
mlog_errno(ret);
else
ret = 0;
if (!handle) {
handle = ocfs2_zero_start_ordered_transaction(inode);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
break;
}
}
/* must not update i_size! */
ret = block_commit_write(page, block_start + 1,
block_start + 1);
if (ret < 0)
mlog_errno(ret);
else
ret = 0;
}
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out_unlock:
unlock_page(page);
page_cache_release(page);
......@@ -786,22 +838,114 @@ static int ocfs2_write_zero_page(struct inode *inode,
return ret;
}
static int ocfs2_zero_extend(struct inode *inode,
u64 zero_to_size)
/*
* Find the next range to zero. We do this in terms of bytes because
* that's what ocfs2_zero_extend() wants, and it is dealing with the
* pagecache. We may return multiple extents.
*
* zero_start and zero_end are ocfs2_zero_extend()s current idea of what
* needs to be zeroed. range_start and range_end return the next zeroing
* range. A subsequent call should pass the previous range_end as its
* zero_start. If range_end is 0, there's nothing to do.
*
* Unwritten extents are skipped over. Refcounted extents are CoWd.
*/
static int ocfs2_zero_extend_get_range(struct inode *inode,
struct buffer_head *di_bh,
u64 zero_start, u64 zero_end,
u64 *range_start, u64 *range_end)
{
int ret = 0;
u64 start_off;
struct super_block *sb = inode->i_sb;
int rc = 0, needs_cow = 0;
u32 p_cpos, zero_clusters = 0;
u32 zero_cpos =
zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
unsigned int num_clusters = 0;
unsigned int ext_flags = 0;
start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
while (start_off < zero_to_size) {
ret = ocfs2_write_zero_page(inode, start_off);
if (ret < 0) {
mlog_errno(ret);
while (zero_cpos < last_cpos) {
rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
&num_clusters, &ext_flags);
if (rc) {
mlog_errno(rc);
goto out;
}
if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
zero_clusters = num_clusters;
if (ext_flags & OCFS2_EXT_REFCOUNTED)
needs_cow = 1;
break;
}
zero_cpos += num_clusters;
}
if (!zero_clusters) {
*range_end = 0;
goto out;
}
while ((zero_cpos + zero_clusters) < last_cpos) {
rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
&p_cpos, &num_clusters,
&ext_flags);
if (rc) {
mlog_errno(rc);
goto out;
}
start_off += sb->s_blocksize;
if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
break;
if (ext_flags & OCFS2_EXT_REFCOUNTED)
needs_cow = 1;
zero_clusters += num_clusters;
}
if ((zero_cpos + zero_clusters) > last_cpos)
zero_clusters = last_cpos - zero_cpos;
if (needs_cow) {
rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
UINT_MAX);
if (rc) {
mlog_errno(rc);
goto out;
}
}
*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
zero_cpos + zero_clusters);
out:
return rc;
}
/*
* Zero one range returned from ocfs2_zero_extend_get_range(). The caller
* has made sure that the entire range needs zeroing.
*/
static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
u64 range_end)
{
int rc = 0;
u64 next_pos;
u64 zero_pos = range_start;
mlog(0, "range_start = %llu, range_end = %llu\n",
(unsigned long long)range_start,
(unsigned long long)range_end);
BUG_ON(range_start >= range_end);
while (zero_pos < range_end) {
next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
if (next_pos > range_end)
next_pos = range_end;
rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
if (rc < 0) {
mlog_errno(rc);
break;
}
zero_pos = next_pos;
/*
* Very large extends have the potential to lock up
......@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
cond_resched();
}
out:
return rc;
}
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to_size)
{
int ret = 0;
u64 zero_start, range_start = 0, range_end = 0;
struct super_block *sb = inode->i_sb;
zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
mlog(0, "zero_start %llu for i_size %llu\n",
(unsigned long long)zero_start,
(unsigned long long)i_size_read(inode));
while (zero_start < zero_to_size) {
ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
zero_to_size,
&range_start,
&range_end);
if (ret) {
mlog_errno(ret);
break;
}
if (!range_end)
break;
/* Trim the ends */
if (range_start < zero_start)
range_start = zero_start;
if (range_end > zero_to_size)
range_end = zero_to_size;
ret = ocfs2_zero_extend_range(inode, range_start,
range_end);
if (ret) {
mlog_errno(ret);
break;
}
zero_start = range_end;
}
return ret;
}
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to)
{
int ret;
u32 clusters_to_add;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
/*
* Only quota files call this without a bh, and they can't be
* refcounted.
*/
BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
if (clusters_to_add < oi->ip_clusters)
clusters_to_add = 0;
......@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
* still need to zero the area between the old i_size and the
* new i_size.
*/
ret = ocfs2_zero_extend(inode, zero_to);
ret = ocfs2_zero_extend(inode, di_bh, zero_to);
if (ret < 0)
mlog_errno(ret);
......@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
goto out;
if (i_size_read(inode) == new_i_size)
goto out;
goto out;
BUG_ON(new_i_size < i_size_read(inode));
/*
* Fall through for converting inline data, even if the fs
* supports sparse files.
*
* The check for inline data here is legal - nobody can add
* the feature since we have i_mutex. We must check it again
* after acquiring ip_alloc_sem though, as paths like mmap
* might have raced us to converting the inode to extents.
*/
if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
&& ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
goto out_update_size;
/*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
* here. We even have to hold it for sparse files because there
* might be some tail zeroing.
*/
down_write(&oi->ip_alloc_sem);
......@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
mlog_errno(ret);
goto out;
}
}
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
else
ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
new_i_size);
up_write(&oi->ip_alloc_sem);
......
......@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
u64 new_i_size, u64 zero_to);
int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
loff_t zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
......
......@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
}
static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
......@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Quota blocks have their own trigger because the struct ocfs2_block_check
* offset depends on the blocksize.
*/
static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
......@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Directory blocks also have their own trigger because the
* struct ocfs2_block_check offset depends on the blocksize.
*/
static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
......@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
static struct ocfs2_triggers di_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dinode, i_check),
......@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
static struct ocfs2_triggers eb_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_extent_block, h_check),
......@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
static struct ocfs2_triggers rb_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
......@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
static struct ocfs2_triggers gd_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
......@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
static struct ocfs2_triggers db_triggers = {
.ot_triggers = {
.t_commit = ocfs2_db_commit_trigger,
.t_frozen = ocfs2_db_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers xb_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
......@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
static struct ocfs2_triggers dq_triggers = {
.ot_triggers = {
.t_commit = ocfs2_dq_commit_trigger,
.t_frozen = ocfs2_dq_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
......@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
.t_commit = ocfs2_commit_trigger,
.t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
......@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
mutex_lock(&os->os_lock);
ocfs2_queue_orphan_scan(osb);
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
schedule_delayed_work(&os->os_orphan_scan_work,
queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
mutex_unlock(&os->os_lock);
}
......@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
schedule_delayed_work(&os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
}
}
......
......@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
{
unsigned int la_mb;
unsigned int gd_mb;
unsigned int la_max_mb;
unsigned int megs_per_slot;
struct super_block *sb = osb->sb;
......@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
if (megs_per_slot < la_mb)
la_mb = megs_per_slot;
/* We can't store more bits than we can in a block. */
la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
ocfs2_local_alloc_size(sb) * 8);
if (la_mb > la_max_mb)
la_mb = la_max_mb;
return la_mb;
}
......
......@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
* locking allocators ranks above a transaction start
*/
WARN_ON(journal_current_handle());
status = ocfs2_extend_no_holes(gqinode,
status = ocfs2_extend_no_holes(gqinode, NULL,
gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
gqinode->i_size);
if (status < 0)
......
......@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
status = ocfs2_extend_no_holes(lqinode,
status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + 2 * sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
......@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
return ocfs2_local_quota_add_chunk(sb, type, offset);
/* We are protected by dqio_sem so no locking needed */
status = ocfs2_extend_no_holes(lqinode,
status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
......
......@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
/*
* We only duplicate pages until we reach the page contains i_size - 1.
* So trim 'end' to i_size.
*/
if (end > i_size_read(context->inode))
end = i_size_read(context->inode);
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
......@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *new_bh = NULL;
if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
}
ret = filemap_fdatawrite(inode->i_mapping);
if (ret) {
mlog_errno(ret);
......
......@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
le16_to_cpu(bg->bg_free_bits_count));
le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
le16_to_cpu(bg->bg_bits));
cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
le16_add_cpu(&cl->cl_next_free_rec, 1);
......
......@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
struct ocfs2_xattr_value_buf *vb,
struct ocfs2_xattr_set_ctxt *ctxt)
{
int status = 0;
int status = 0, credits;
handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
......@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
}
while (clusters_to_add) {
status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
break;
}
prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
status = ocfs2_add_clusters_in_btree(handle,
&et,
&logical_start,
clusters_to_add,
0,
ctxt->data_ac,
ctxt->meta_ac,
&why);
if (status < 0) {
mlog_errno(status);
goto leave;
}
prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
status = ocfs2_add_clusters_in_btree(handle,
&et,
&logical_start,
clusters_to_add,
0,
ctxt->data_ac,
ctxt->meta_ac,
&why);
if ((status < 0) && (status != -EAGAIN)) {
if (status != -ENOSPC)
mlog_errno(status);
break;
}
ocfs2_journal_dirty(handle, vb->vb_bh);
ocfs2_journal_dirty(handle, vb->vb_bh);
clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
prev_clusters;
/*
* We should have already allocated enough space before the transaction,
* so no need to restart.
*/
BUG_ON(why != RESTART_NONE || clusters_to_add);
leave:
if (why != RESTART_NONE && clusters_to_add) {
/*
* We can only fail in case the alloc file doesn't give
* up enough clusters.
*/
BUG_ON(why == RESTART_META);
mlog(0, "restarting xattr value extension for %u"
" clusters,.\n", clusters_to_add);
credits = ocfs2_calc_extend_credits(inode->i_sb,
&vb->vb_xv->xr_list,
clusters_to_add);
status = ocfs2_extend_trans(handle, credits);
if (status < 0) {
status = -ENOMEM;
mlog_errno(status);
break;
}
}
}
return status;
}
......@@ -6788,16 +6804,15 @@ static int ocfs2_lock_reflink_xattr_rec_allocators(
return ret;
}
static int ocfs2_reflink_xattr_buckets(handle_t *handle,
static int ocfs2_reflink_xattr_bucket(handle_t *handle,
u64 blkno, u64 new_blkno, u32 clusters,
u32 *cpos, int num_buckets,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_reflink_xattr_tree_args *args)
{
int i, j, ret = 0;
struct super_block *sb = args->reflink->old_inode->i_sb;
u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
u32 num_buckets = clusters * bpc;
int bpb = args->old_bucket->bu_blocks;
struct ocfs2_xattr_value_buf vb = {
.vb_access = ocfs2_journal_access,
......@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
break;
}
/*
* The real bucket num in this series of blocks is stored
* in the 1st bucket.
*/
if (i == 0)
num_buckets = le16_to_cpu(
bucket_xh(args->old_bucket)->xh_num_buckets);
ret = ocfs2_xattr_bucket_journal_access(handle,
args->new_bucket,
OCFS2_JOURNAL_ACCESS_CREATE);
......@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
bucket_block(args->old_bucket, j),
sb->s_blocksize);
/*
* Record the start cpos so that we can use it to initialize
* our xattr tree we also set the xh_num_bucket for the new
* bucket.
*/
if (i == 0) {
*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
xh_entries[0].xe_name_hash);
bucket_xh(args->new_bucket)->xh_num_buckets =
cpu_to_le16(num_buckets);
}
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
ret = ocfs2_reflink_xattr_header(handle, args->reflink,
......@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
}
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
ocfs2_xattr_bucket_relse(args->old_bucket);
ocfs2_xattr_bucket_relse(args->new_bucket);
}
......@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
ocfs2_xattr_bucket_relse(args->new_bucket);
return ret;
}
static int ocfs2_reflink_xattr_buckets(handle_t *handle,
struct inode *inode,
struct ocfs2_reflink_xattr_tree_args *args,
struct ocfs2_extent_tree *et,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac,
u64 blkno, u32 cpos, u32 len)
{
int ret, first_inserted = 0;
u32 p_cluster, num_clusters, reflink_cpos = 0;
u64 new_blkno;
unsigned int num_buckets, reflink_buckets;
unsigned int bpc =
ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
ocfs2_xattr_bucket_relse(args->old_bucket);
while (len && num_buckets) {
ret = ocfs2_claim_clusters(handle, data_ac,
1, &p_cluster, &num_clusters);
if (ret) {
mlog_errno(ret);
goto out;
}
new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
reflink_buckets = min(num_buckets, bpc * num_clusters);
ret = ocfs2_reflink_xattr_bucket(handle, blkno,
new_blkno, num_clusters,
&reflink_cpos, reflink_buckets,
meta_ac, data_ac, args);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* For the 1st allocated cluster, we make it use the same cpos
* so that the xattr tree looks the same as the original one
* in the most case.
*/
if (!first_inserted) {
reflink_cpos = cpos;
first_inserted = 1;
}
ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
num_clusters, 0, meta_ac);
if (ret)
mlog_errno(ret);
mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
(unsigned long long)new_blkno, num_clusters, reflink_cpos);
len -= num_clusters;
blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
num_buckets -= reflink_buckets;
}
out:
return ret;
}
/*
* Create the same xattr extent record in the new inode's xattr tree.
*/
......@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
void *para)
{
int ret, credits = 0;
u32 p_cluster, num_clusters;
u64 new_blkno;
handle_t *handle;
struct ocfs2_reflink_xattr_tree_args *args =
(struct ocfs2_reflink_xattr_tree_args *)para;
......@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_extent_tree et;
mlog(0, "reflink xattr buckets %llu len %u\n",
(unsigned long long)blkno, len);
ocfs2_init_xattr_tree_extent_tree(&et,
INODE_CACHE(args->reflink->new_inode),
args->new_blk_bh);
......@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
goto out;
}
ret = ocfs2_claim_clusters(handle, data_ac,
len, &p_cluster, &num_clusters);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
(unsigned long long)blkno, (unsigned long long)new_blkno, len);
ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
meta_ac, data_ac, args);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
(unsigned long long)new_blkno, len, cpos);
ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
len, 0, meta_ac);
ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
meta_ac, data_ac,
blkno, cpos, len);
if (ret)
mlog_errno(ret);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
......
......@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
struct jbd2_buffer_trigger_type {
/*
* Fired just before a buffer is written to the journal.
* mapped_data is a mapped buffer that is the frozen data for
* commit.
* Fired a the moment data to write to the journal are known to be
* stable - so either at the moment b_frozen_data is created or just
* before a buffer is written to the journal. mapped_data is a mapped
* buffer that is the frozen data for commit.
*/
void (*t_commit)(struct jbd2_buffer_trigger_type *type,
void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
struct buffer_head *bh, void *mapped_data,
size_t size);
......@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
struct buffer_head *bh);
};
extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
void *mapped_data,
struct jbd2_buffer_trigger_type *triggers);
extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册