提交 03e62303 编写于 作者: L Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (47 commits)
  ocfs2: Silence a gcc warning.
  ocfs2: Don't retry xattr set in case value extension fails.
  ocfs2:dlm: avoid dlm->ast_lock lockres->spinlock dependency break
  ocfs2: Reset xattr value size after xa_cleanup_value_truncate().
  fs/ocfs2/dlm: Use kstrdup
  fs/ocfs2/dlm: Drop memory allocation cast
  Ocfs2: Optimize punching-hole code.
  Ocfs2: Make ocfs2_find_cpos_for_left_leaf() public.
  Ocfs2: Fix hole punching to correctly do CoW during cluster zeroing.
  Ocfs2: Optimize ocfs2 truncate to use ocfs2_remove_btree_range() instead.
  ocfs2: Block signals for mkdir/link/symlink/O_CREAT.
  ocfs2: Wrap signal blocking in void functions.
  ocfs2/dlm: Increase o2dlm lockres hash size
  ocfs2: Make ocfs2_extend_trans() really extend.
  ocfs2/trivial: Code cleanup for allocation reservation.
  ocfs2: make ocfs2_adjust_resv_from_alloc simple.
  ocfs2: Make nointr a default mount option
  ocfs2/dlm: Make o2dlm domain join/leave messages KERN_NOTICE
  o2net: log socket state changes
  ocfs2: print node # when tcp fails
  ...
......@@ -80,3 +80,10 @@ user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
acl Enables POSIX Access Control Lists support.
noacl (*) Disables POSIX Access Control Lists support.
resv_level=2 (*) Set how agressive allocation reservations will be.
Valid values are between 0 (reservations off) to 8
(maximum space for reservations).
dir_resv_level= (*) By default, directory reservations will scale with file
reservations - users should rarely need to change this
value. If allocation reservations are turned off, this
option will have no effect.
......@@ -29,6 +29,7 @@ ocfs2-objs := \
mmap.o \
namei.o \
refcounttree.o \
reservations.o \
resize.o \
slot_map.o \
suballoc.o \
......
此差异已折叠。
......@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len,
struct ocfs2_cached_dealloc_ctxt *dealloc);
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
u64 refcount_loc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct ocfs2_extent_tree *et);
......@@ -209,7 +210,7 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
u64 blkno, unsigned int bit);
int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
int type, int slot, u64 blkno,
int type, int slot, u64 suballoc, u64 blkno,
unsigned int bit);
static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
{
......@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
struct ocfs2_truncate_context **tc);
int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
struct ocfs2_truncate_context *tc);
struct buffer_head *di_bh);
int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
unsigned int start, unsigned int end, int trunc);
......@@ -319,6 +319,8 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
struct ocfs2_path *path);
int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
struct ocfs2_path *path, u32 *cpos);
int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
struct ocfs2_path *path, u32 *cpos);
int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
struct ocfs2_path *left,
struct ocfs2_path *right);
......
......@@ -1735,6 +1735,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
goto out;
}
if (data_ac)
data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
credits = ocfs2_calc_extend_credits(inode->i_sb,
&di->id2.i_list,
clusters_to_alloc);
......
......@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
define_mask(RESERVATIONS),
};
static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
......
......@@ -119,6 +119,7 @@
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
......
......@@ -583,6 +583,9 @@ static void o2net_state_change(struct sock *sk)
o2net_sc_queue_work(sc, &sc->sc_connect_work);
break;
default:
printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT
" shutdown, state %d\n",
SC_NODEF_ARGS(sc), sk->sk_state);
o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
break;
}
......
......@@ -1194,7 +1194,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
else
de->inode = 0;
dir->i_version++;
status = ocfs2_journal_dirty(handle, bh);
ocfs2_journal_dirty(handle, bh);
goto bail;
}
i += le16_to_cpu(de->rec_len);
......@@ -1752,7 +1752,7 @@ int __ocfs2_add_entry(handle_t *handle,
ocfs2_recalc_free_list(dir, handle, lookup);
dir->i_version++;
status = ocfs2_journal_dirty(handle, insert_bh);
ocfs2_journal_dirty(handle, insert_bh);
retval = 0;
goto bail;
}
......@@ -2297,12 +2297,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
}
ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
i_size_write(inode, size);
inode->i_nlink = 2;
......@@ -2366,11 +2361,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
ocfs2_init_dir_trailer(inode, new_bh, size);
}
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_journal_dirty(handle, new_bh);
i_size_write(inode, inode->i_sb->s_blocksize);
inode->i_nlink = 2;
......@@ -2404,15 +2395,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
int ret;
struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
u16 dr_suballoc_bit;
u64 dr_blkno;
u64 suballoc_loc, dr_blkno;
unsigned int num_bits;
struct buffer_head *dx_root_bh = NULL;
struct ocfs2_dx_root_block *dx_root;
struct ocfs2_dir_block_trailer *trailer =
ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit,
&num_bits, &dr_blkno);
ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&dr_suballoc_bit, &num_bits, &dr_blkno);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -2440,6 +2431,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
memset(dx_root, 0, osb->sb->s_blocksize);
strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
dx_root->dr_blkno = cpu_to_le64(dr_blkno);
......@@ -2458,10 +2450,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
}
ret = ocfs2_journal_dirty(handle, dx_root_bh);
if (ret)
mlog_errno(ret);
ocfs2_journal_dirty(handle, dx_root_bh);
ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
......@@ -2475,9 +2464,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret)
mlog_errno(ret);
ocfs2_journal_dirty(handle, di_bh);
*ret_dx_root_bh = dx_root_bh;
dx_root_bh = NULL;
......@@ -2558,7 +2545,7 @@ static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
* chance of contiguousness as the directory grows in number
* of entries.
*/
ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num);
ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -2991,7 +2978,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* if we only get one now, that's enough to continue. The rest
* will be claimed after the conversion to extents.
*/
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
if (ocfs2_dir_resv_allowed(osb))
data_ac->ac_resv = &oi->ip_la_data_resv;
ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -3034,11 +3023,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_init_dir_trailer(dir, dirdata_bh, i);
}
ret = ocfs2_journal_dirty(handle, dirdata_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ocfs2_journal_dirty(handle, dirdata_bh);
if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
/*
......@@ -3104,11 +3089,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
*/
dir->i_blocks = ocfs2_inode_sector_count(dir);
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ocfs2_journal_dirty(handle, di_bh);
if (ocfs2_supports_indexed_dirs(osb)) {
ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
......@@ -3138,7 +3119,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* pass. Claim the 2nd cluster as a separate extent.
*/
if (alloc > len) {
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
&len);
if (ret) {
mlog_errno(ret);
......@@ -3369,6 +3350,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
goto bail;
}
if (ocfs2_dir_resv_allowed(osb))
data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
credits = ocfs2_calc_extend_credits(sb, el, 1);
} else {
spin_unlock(&OCFS2_I(dir)->ip_lock);
......@@ -3423,11 +3407,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
} else {
de->rec_len = cpu_to_le16(sb->s_blocksize);
}
status = ocfs2_journal_dirty(handle, new_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_journal_dirty(handle, new_bh);
dir_i_size += dir->i_sb->s_blocksize;
i_size_write(dir, dir_i_size);
......@@ -3906,11 +3886,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
dx_leaf_sort_swap);
ret = ocfs2_journal_dirty(handle, dx_leaf_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ocfs2_journal_dirty(handle, dx_leaf_bh);
ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
&split_hash);
......@@ -4490,7 +4466,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
blk = le64_to_cpu(dx_root->dr_blkno);
bit = le16_to_cpu(dx_root->dr_suballoc_bit);
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
if (dx_root->dr_suballoc_loc)
bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
else
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
bit, bg_blkno, 1);
if (ret)
......@@ -4551,8 +4530,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen,
&dealloc);
ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
&dealloc, 0);
if (ret) {
mlog_errno(ret);
goto out;
......
......@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
return 0;
}
static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
mlog_entry_void();
......@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
}
static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
mlog_entry_void();
......@@ -451,7 +451,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
lock->ml.node, &status);
if (ret < 0)
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
lock->ml.node);
else {
if (status == DLM_RECOVERING) {
mlog(ML_ERROR, "sent AST to node %u, it thinks this "
......
......@@ -37,7 +37,7 @@
#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
#define DLM_THREAD_MS 200 // flush at least every 200 ms
#define DLM_HASH_SIZE_DEFAULT (1 << 14)
#define DLM_HASH_SIZE_DEFAULT (1 << 17)
#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
# define DLM_HASH_PAGES 1
#else
......@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_do_local_ast(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
struct dlm_lock *lock);
......
......@@ -390,7 +390,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
} else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
dlm_error(ret);
} else {
mlog_errno(tmpret);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
res->owner);
if (dlm_is_host_down(tmpret)) {
/* instead of logging the same network error over
* and over, sleep here and wait for the heartbeat
......
......@@ -511,7 +511,7 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
assert_spin_locked(&dlm->spinlock);
printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name);
printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name);
while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
node + 1)) < O2NM_MAX_NODES) {
......@@ -534,7 +534,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
node = exit_msg->node_idx;
printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name);
printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
spin_lock(&dlm->spinlock);
clear_bit(node, dlm->domain_map);
......@@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
&leave_msg, sizeof(leave_msg), node,
NULL);
if (status < 0)
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
mlog(0, "status return %d from o2net_send_message\n", status);
return status;
......@@ -904,7 +906,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
set_bit(assert->node_idx, dlm->domain_map);
__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n",
printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
assert->node_idx, dlm->name);
__dlm_print_nodes(dlm);
......@@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
&cancel_msg, sizeof(cancel_msg), node,
NULL);
if (status < 0) {
mlog_errno(status);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
node);
goto bail;
}
......@@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
sizeof(join_msg), node,
&join_resp);
sizeof(join_msg), node, &join_resp);
if (status < 0 && status != -ENOPROTOOPT) {
mlog_errno(status);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
node);
goto bail;
}
dlm_query_join_wire_to_packet(join_resp, &packet);
......@@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
&assert_msg, sizeof(assert_msg), node,
NULL);
if (status < 0)
mlog_errno(status);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
node);
return status;
}
......@@ -1516,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
goto leave;
}
dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL);
dlm->name = kstrdup(domain, GFP_KERNEL);
if (dlm->name == NULL) {
mlog_errno(-ENOMEM);
kfree(dlm);
......@@ -1550,7 +1557,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
for (i = 0; i < DLM_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
strcpy(dlm->name, domain);
dlm->key = key;
dlm->node_num = o2nm_this_node();
......
......@@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
BUG();
}
} else {
mlog_errno(tmpret);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
res->owner);
if (dlm_is_host_down(tmpret)) {
ret = DLM_RECOVERING;
mlog(0, "node %u died so returning DLM_RECOVERING "
......@@ -429,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
struct dlm_lock *lock;
int kernel_allocated = 0;
lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
if (!lock)
return NULL;
......
......@@ -617,13 +617,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
{
struct dlm_lock_resource *res = NULL;
res = (struct dlm_lock_resource *)
kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
if (!res)
goto error;
res->lockname.name = (char *)
kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
if (!res->lockname.name)
goto error;
......@@ -757,8 +755,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
spin_unlock(&dlm->spinlock);
mlog(0, "allocating a new resource\n");
/* nothing found and we need to allocate one. */
alloc_mle = (struct dlm_master_list_entry *)
kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
if (!alloc_mle)
goto leave;
res = dlm_new_lockres(dlm, lockid, namelen);
......@@ -1542,8 +1539,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
mle = (struct dlm_master_list_entry *)
kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
if (!mle) {
response = DLM_MASTER_RESP_ERROR;
mlog_errno(-ENOMEM);
......@@ -1666,7 +1662,9 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm,
tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
&assert, sizeof(assert), to, &r);
if (tmpret < 0) {
mlog(0, "assert_master returned %d!\n", tmpret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", tmpret,
DLM_ASSERT_MASTER_MSG, dlm->key, to);
if (!dlm_is_host_down(tmpret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
BUG();
......@@ -2205,7 +2203,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
&deref, sizeof(deref), res->owner, &r);
if (ret < 0)
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
res->owner);
else if (r < 0) {
/* BAD. other node says I did not have a ref. */
mlog(ML_ERROR,"while dropping ref on %s:%.*s "
......@@ -2452,8 +2452,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
goto leave;
}
mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
GFP_NOFS);
mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
if (!mle) {
mlog_errno(ret);
goto leave;
......@@ -2975,7 +2974,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
&migrate, sizeof(migrate), nodenum,
&status);
if (ret < 0) {
mlog(0, "migrate_request returned %d!\n", ret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
dlm->key, nodenum);
if (!dlm_is_host_down(ret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", ret);
BUG();
......@@ -3033,8 +3034,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
hash = dlm_lockid_hash(name, namelen);
/* preallocate.. if this fails, abort */
mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
GFP_NOFS);
mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
if (!mle) {
ret = -ENOMEM;
......
......@@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
/* negative status is handled by caller */
if (ret < 0)
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
dlm->key, request_from);
// return from here, then
// sleep until all received or error
......@@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
sizeof(done_msg), send_to, &tmpret);
if (ret < 0) {
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
dlm->key, send_to);
if (!dlm_is_host_down(ret)) {
mlog_errno(ret);
mlog(ML_ERROR, "%s: unknown error sending data-done "
"to %u\n", dlm->name, send_to);
BUG();
}
} else
......@@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
if (ret < 0) {
/* XXX: negative status is not handled.
* this will end up killing this node. */
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
dlm->key, send_to);
} else {
/* might get an -ENOMEM back here */
ret = status;
......@@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
&req, sizeof(req), nodenum, &status);
/* XXX: negative status not handled properly here. */
if (ret < 0)
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
dlm->key, nodenum);
else {
BUG_ON(status < 0);
BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
......@@ -2640,7 +2646,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
if (dlm_is_host_down(ret)) {
/* node is down. not involved in recovery
* so just keep going */
mlog(0, "%s: node %u was down when sending "
mlog(ML_NOTICE, "%s: node %u was down when sending "
"begin reco msg (%d)\n", dlm->name, nodenum, ret);
ret = 0;
}
......@@ -2660,11 +2666,12 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
}
if (ret < 0) {
struct dlm_lock_resource *res;
/* this is now a serious problem, possibly ENOMEM
* in the network stack. must retry */
mlog_errno(ret);
mlog(ML_ERROR, "begin reco of dlm %s to node %u "
" returned %d\n", dlm->name, nodenum, ret);
"returned %d\n", dlm->name, nodenum, ret);
res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
DLM_RECOVERY_LOCK_NAME_LEN);
if (res) {
......@@ -2789,7 +2796,9 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
if (ret >= 0)
ret = status;
if (ret < 0) {
mlog_errno(ret);
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
dlm->key, nodenum);
if (dlm_is_host_down(ret)) {
/* this has no effect on this recovery
* session, so set the status to zero to
......
......@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
* spinlock, and because we know that it is not migrating/
* recovering/in-progress, it is fine to reserve asts and
* basts right before queueing them all throughout */
assert_spin_locked(&dlm->ast_lock);
assert_spin_locked(&res->spinlock);
BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
DLM_LOCK_RES_RECOVERING|
......@@ -337,7 +338,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
/* queue the BAST if not already */
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
dlm_queue_bast(dlm, lock);
__dlm_queue_bast(dlm, lock);
}
/* update the highest_blocked if needed */
if (lock->ml.highest_blocked < target->ml.convert_type)
......@@ -355,7 +356,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
dlm_queue_bast(dlm, lock);
__dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.convert_type)
lock->ml.highest_blocked =
......@@ -383,7 +384,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
spin_unlock(&target->spinlock);
__dlm_lockres_reserve_ast(res);
dlm_queue_ast(dlm, target);
__dlm_queue_ast(dlm, target);
/* go back and check for more */
goto converting;
}
......@@ -402,7 +403,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
dlm_queue_bast(dlm, lock);
__dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.type)
lock->ml.highest_blocked = target->ml.type;
......@@ -418,7 +419,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
can_grant = 0;
if (lock->ml.highest_blocked == LKM_IVMODE) {
__dlm_lockres_reserve_ast(res);
dlm_queue_bast(dlm, lock);
__dlm_queue_bast(dlm, lock);
}
if (lock->ml.highest_blocked < target->ml.type)
lock->ml.highest_blocked = target->ml.type;
......@@ -444,7 +445,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
spin_unlock(&target->spinlock);
__dlm_lockres_reserve_ast(res);
dlm_queue_ast(dlm, target);
__dlm_queue_ast(dlm, target);
/* go back and check for more */
goto converting;
}
......@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
/* lockres can be re-dirtied/re-added to the
* dirty_list in this gap, but that is ok */
spin_lock(&dlm->ast_lock);
spin_lock(&res->spinlock);
if (res->owner != dlm->node_num) {
__dlm_print_one_lock_resource(res);
......@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
/* move it to the tail and keep going */
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
spin_unlock(&dlm->ast_lock);
mlog(0, "delaying list shuffling for in-"
"progress lockres %.*s, state=%d\n",
res->lockname.len, res->lockname.name,
......@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
dlm_shuffle_lists(dlm, res);
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
spin_unlock(&dlm->ast_lock);
dlm_lockres_calc_usage(dlm, res);
......
......@@ -354,7 +354,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
mlog(0, "master was in-progress. retry\n");
ret = status;
} else {
mlog_errno(tmpret);
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
if (dlm_is_host_down(tmpret)) {
/* NOTE: this seems strange, but it is what we want.
* when the master goes down during a cancel or
......
......@@ -278,10 +278,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
inode->i_atime = CURRENT_TIME;
di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
ret = ocfs2_journal_dirty(handle, bh);
if (ret < 0)
mlog_errno(ret);
ocfs2_journal_dirty(handle, bh);
out_commit:
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
......@@ -430,9 +427,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
status = ocfs2_journal_dirty(handle, fe_bh);
if (status < 0)
mlog_errno(status);
ocfs2_journal_dirty(handle, fe_bh);
out_commit:
ocfs2_commit_trans(osb, handle);
......@@ -449,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
int status = 0;
struct ocfs2_dinode *fe = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_truncate_context *tc = NULL;
mlog_entry("(inode = %llu, new_i_size = %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
......@@ -488,6 +482,9 @@ static int ocfs2_truncate_file(struct inode *inode,
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ocfs2_resv_discard(&osb->osb_la_resmap,
&OCFS2_I(inode)->ip_la_data_resv);
/*
* The inode lock forced other nodes to sync and drop their
* pages, which (correctly) happens even if we have a truncate
......@@ -517,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
goto bail_unlock_sem;
}
status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_sem;
}
status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
status = ocfs2_commit_truncate(osb, inode, di_bh);
if (status < 0) {
mlog_errno(status);
goto bail_unlock_sem;
......@@ -666,11 +657,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
goto leave;
}
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, bh);
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
......@@ -1195,9 +1182,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
di = (struct ocfs2_dinode *) bh->b_data;
di->i_mode = cpu_to_le16(inode->i_mode);
ret = ocfs2_journal_dirty(handle, bh);
if (ret < 0)
mlog_errno(ret);
ocfs2_journal_dirty(handle, bh);
out_trans:
ocfs2_commit_trans(osb, handle);
......@@ -1434,16 +1419,90 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
return ret;
}
static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
{
int i;
struct ocfs2_extent_rec *rec = NULL;
for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
rec = &el->l_recs[i];
if (le32_to_cpu(rec->e_cpos) < pos)
break;
}
return i;
}
/*
* Helper to calculate the punching pos and length in one run, we handle the
* following three cases in order:
*
* - remove the entire record
* - remove a partial record
* - no record needs to be removed (hole-punching completed)
*/
static void ocfs2_calc_trunc_pos(struct inode *inode,
struct ocfs2_extent_list *el,
struct ocfs2_extent_rec *rec,
u32 trunc_start, u32 *trunc_cpos,
u32 *trunc_len, u32 *trunc_end,
u64 *blkno, int *done)
{
int ret = 0;
u32 coff, range;
range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
*trunc_cpos = le32_to_cpu(rec->e_cpos);
/*
* Skip holes if any.
*/
if (range < *trunc_end)
*trunc_end = range;
*trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
*blkno = le64_to_cpu(rec->e_blkno);
*trunc_end = le32_to_cpu(rec->e_cpos);
} else if (range > trunc_start) {
*trunc_cpos = trunc_start;
*trunc_len = *trunc_end - trunc_start;
coff = trunc_start - le32_to_cpu(rec->e_cpos);
*blkno = le64_to_cpu(rec->e_blkno) +
ocfs2_clusters_to_blocks(inode->i_sb, coff);
*trunc_end = trunc_start;
} else {
/*
* It may have two following possibilities:
*
* - last record has been removed
* - trunc_start was within a hole
*
* both two cases mean the completion of hole punching.
*/
ret = 1;
}
*done = ret;
}
static int ocfs2_remove_inode_range(struct inode *inode,
struct buffer_head *di_bh, u64 byte_start,
u64 byte_len)
{
int ret = 0;
u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
int ret = 0, flags = 0, done = 0, i;
u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
u32 cluster_in_el;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
struct address_space *mapping = inode->i_mapping;
struct ocfs2_extent_tree et;
struct ocfs2_path *path = NULL;
struct ocfs2_extent_list *el = NULL;
struct ocfs2_extent_rec *rec = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
......@@ -1469,17 +1528,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
goto out;
}
/*
* For reflinks, we may need to CoW 2 clusters which might be
* partially zero'd later, if hole's start and end offset were
* within one cluster(means is not exactly aligned to clustersize).
*/
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
if (ret) {
mlog_errno(ret);
goto out;
}
}
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
if (trunc_len >= trunc_start)
trunc_len -= trunc_start;
else
trunc_len = 0;
trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
cluster_in_el = trunc_end;
mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)byte_start,
(unsigned long long)byte_len, trunc_start, trunc_len);
(unsigned long long)byte_len, trunc_start, trunc_end);
ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
if (ret) {
......@@ -1487,31 +1564,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
goto out;
}
cpos = trunc_start;
while (trunc_len) {
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
&alloc_size, NULL);
path = ocfs2_new_path_from_et(&et);
if (!path) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
while (trunc_end > trunc_start) {
ret = ocfs2_find_path(INODE_CACHE(inode), path,
cluster_in_el);
if (ret) {
mlog_errno(ret);
goto out;
}
if (alloc_size > trunc_len)
alloc_size = trunc_len;
el = path_leaf_el(path);
/* Only do work for non-holes */
if (phys_cpos != 0) {
ret = ocfs2_remove_btree_range(inode, &et, cpos,
phys_cpos, alloc_size,
&dealloc);
i = ocfs2_find_rec(el, trunc_end);
/*
* Need to go to previous extent block.
*/
if (i < 0) {
if (path->p_tree_depth == 0)
break;
ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
path,
&cluster_in_el);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* We've reached the leftmost extent block,
* it's safe to leave.
*/
if (cluster_in_el == 0)
break;
/*
* The 'pos' searched for previous extent block is
* always one cluster less than actual trunc_end.
*/
trunc_end = cluster_in_el + 1;
ocfs2_reinit_path(path, 1);
continue;
} else
rec = &el->l_recs[i];
ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
&trunc_len, &trunc_end, &blkno, &done);
if (done)
break;
flags = rec->e_flags;
phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags,
&dealloc, refcount_loc);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
cpos += alloc_size;
trunc_len -= alloc_size;
cluster_in_el = trunc_end;
ocfs2_reinit_path(path, 1);
}
ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
......
......@@ -376,6 +376,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
OCFS2_I(inode)->ip_last_used_slot = 0;
OCFS2_I(inode)->ip_last_used_group = 0;
if (S_ISDIR(inode->i_mode))
ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
OCFS2_RESV_FLAG_DIR);
mlog_exit_void();
}
......@@ -539,7 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
struct buffer_head *fe_bh)
{
int status = 0;
struct ocfs2_truncate_context *tc = NULL;
struct ocfs2_dinode *fe;
handle_t *handle = NULL;
......@@ -582,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
ocfs2_commit_trans(osb, handle);
handle = NULL;
status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
if (status < 0) {
mlog_errno(status);
goto out;
}
status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
status = ocfs2_commit_truncate(osb, inode, fe_bh);
if (status < 0) {
mlog_errno(status);
goto out;
......@@ -659,12 +656,7 @@ static int ocfs2_remove_inode(struct inode *inode,
di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
status = ocfs2_journal_dirty(handle, di_bh);
if (status < 0) {
mlog_errno(status);
goto bail_commit;
}
ocfs2_journal_dirty(handle, di_bh);
ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
dquot_free_inode(inode);
......@@ -980,7 +972,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
void ocfs2_delete_inode(struct inode *inode)
{
int wipe, status;
sigset_t blocked, oldset;
sigset_t oldset;
struct buffer_head *di_bh = NULL;
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
......@@ -1007,13 +999,7 @@ void ocfs2_delete_inode(struct inode *inode)
* messaging paths may return us -ERESTARTSYS. Which would
* cause us to exit early, resulting in inodes being orphaned
* forever. */
sigfillset(&blocked);
status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
if (status < 0) {
mlog_errno(status);
ocfs2_cleanup_delete_inode(inode, 1);
goto bail;
}
ocfs2_block_signals(&oldset);
/*
* Synchronize us against ocfs2_get_dentry. We take this in
......@@ -1087,9 +1073,7 @@ void ocfs2_delete_inode(struct inode *inode)
ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
bail_unblock:
status = sigprocmask(SIG_SETMASK, &oldset, NULL);
if (status < 0)
mlog_errno(status);
ocfs2_unblock_signals(&oldset);
bail:
clear_inode(inode);
mlog_exit_void();
......@@ -1123,6 +1107,10 @@ void ocfs2_clear_inode(struct inode *inode)
ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
&oi->ip_la_data_resv);
ocfs2_resv_init_once(&oi->ip_la_data_resv);
/* We very well may get a clear_inode before all an inodes
* metadata has hit disk. Of course, we can't drop any cluster
* locks until the journal has finished with it. The only
......@@ -1298,13 +1286,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0)
mlog_errno(status);
status = 0;
ocfs2_journal_dirty(handle, bh);
leave:
mlog_exit(status);
return status;
}
......
......@@ -70,6 +70,8 @@ struct ocfs2_inode_info
/* Only valid if the inode is the dir. */
u32 ip_last_used_slot;
u64 ip_last_used_group;
struct ocfs2_alloc_reservation ip_la_data_resv;
};
/*
......
......@@ -402,9 +402,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
}
/*
* 'nblocks' is what you want to add to the current
* transaction. extend_trans will either extend the current handle by
* nblocks, or commit it and start a new one with nblocks credits.
* 'nblocks' is what you want to add to the current transaction.
*
* This might call jbd2_journal_restart() which will commit dirty buffers
* and then restart the transaction. Before calling
......@@ -422,11 +420,15 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
*/
int ocfs2_extend_trans(handle_t *handle, int nblocks)
{
int status;
int status, old_nblocks;
BUG_ON(!handle);
BUG_ON(!nblocks);
BUG_ON(nblocks < 0);
if (!nblocks)
return 0;
old_nblocks = handle->h_buffer_credits;
mlog_entry_void();
mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
......@@ -445,7 +447,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
mlog(0,
"jbd2_journal_extend failed, trying "
"jbd2_journal_restart\n");
status = jbd2_journal_restart(handle, nblocks);
status = jbd2_journal_restart(handle,
old_nblocks + nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -734,8 +737,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
return __ocfs2_journal_access(handle, ci, bh, NULL, type);
}
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh)
void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
{
int status;
......@@ -743,13 +745,9 @@ int ocfs2_journal_dirty(handle_t *handle,
(unsigned long long)bh->b_blocknr);
status = jbd2_journal_dirty_metadata(handle, bh);
if (status < 0)
mlog(ML_ERROR, "Could not dirty metadata buffer. "
"(bh->b_blocknr=%llu)\n",
(unsigned long long)bh->b_blocknr);
BUG_ON(status);
mlog_exit(status);
return status;
mlog_exit_void();
}
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
......
......@@ -325,8 +325,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
* <modify the bh>
* ocfs2_journal_dirty(handle, bh);
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh);
/*
* Credit Macros:
......@@ -562,6 +561,18 @@ static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
return blocks;
}
/*
* Allocating a discontiguous block group requires the credits from
* ocfs2_calc_group_alloc_credits() as well as enough credits to fill
* the group descriptor's extent list. The caller already has started
* the transaction with ocfs2_calc_group_alloc_credits(). They extend
* it with these credits.
*/
static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
{
return ocfs2_extent_recs_per_gd(sb);
}
static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
unsigned int clusters_to_del,
struct ocfs2_dinode *fe,
......
......@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
struct ocfs2_dinode *alloc,
u32 numbits);
u32 *numbits,
struct ocfs2_alloc_reservation *resv);
static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
......@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode);
/*
* ocfs2_la_default_mb() - determine a default size, in megabytes of
* the local alloc.
*
* Generally, we'd like to pick as large a local alloc as
* possible. Performance on large workloads tends to scale
* proportionally to la size. In addition to that, the reservations
* code functions more efficiently as it can reserve more windows for
* write.
*
* Some things work against us when trying to choose a large local alloc:
*
* - We need to ensure our sizing is picked to leave enough space in
* group descriptors for other allocations (such as block groups,
* etc). Picking default sizes which are a multiple of 4 could help
* - block groups are allocated in 2mb and 4mb chunks.
*
* - Likewise, we don't want to starve other nodes of bits on small
* file systems. This can easily be taken care of by limiting our
* default to a reasonable size (256M) on larger cluster sizes.
*
* - Some file systems can't support very large sizes - 4k and 8k in
* particular are limited to less than 128 and 256 megabytes respectively.
*
* The following reference table shows group descriptor and local
* alloc maximums at various cluster sizes (4k blocksize)
*
* csize: 4K group: 126M la: 121M
* csize: 8K group: 252M la: 243M
* csize: 16K group: 504M la: 486M
* csize: 32K group: 1008M la: 972M
* csize: 64K group: 2016M la: 1944M
* csize: 128K group: 4032M la: 3888M
* csize: 256K group: 8064M la: 7776M
* csize: 512K group: 16128M la: 15552M
* csize: 1024K group: 32256M la: 31104M
*/
#define OCFS2_LA_MAX_DEFAULT_MB 256
#define OCFS2_LA_OLD_DEFAULT 8
unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
{
unsigned int la_mb;
unsigned int gd_mb;
unsigned int megs_per_slot;
struct super_block *sb = osb->sb;
gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
/*
* This takes care of files systems with very small group
* descriptors - 512 byte blocksize at cluster sizes lower
* than 16K and also 1k blocksize with 4k cluster size.
*/
if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
|| (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
return OCFS2_LA_OLD_DEFAULT;
/*
* Leave enough room for some block groups and make the final
* value we work from a multiple of 4.
*/
gd_mb -= 16;
gd_mb &= 0xFFFFFFFB;
la_mb = gd_mb;
/*
* Keep window sizes down to a reasonable default
*/
if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
/*
* Some clustersize / blocksize combinations will have
* given us a larger than OCFS2_LA_MAX_DEFAULT_MB
* default size, but get poor distribution when
* limited to exactly 256 megabytes.
*
* As an example, 16K clustersize at 4K blocksize
* gives us a cluster group size of 504M. Paring the
* local alloc size down to 256 however, would give us
* only one window and around 200MB left in the
* cluster group. Instead, find the first size below
* 256 which would give us an even distribution.
*
* Larger cluster group sizes actually work out pretty
* well when pared to 256, so we don't have to do this
* for any group that fits more than two
* OCFS2_LA_MAX_DEFAULT_MB windows.
*/
if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
la_mb = 256;
else {
unsigned int gd_mult = gd_mb;
while (gd_mult > 256)
gd_mult = gd_mult >> 1;
la_mb = gd_mult;
}
}
megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
/* Too many nodes, too few disk clusters. */
if (megs_per_slot < la_mb)
la_mb = megs_per_slot;
return la_mb;
}
void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
{
struct super_block *sb = osb->sb;
unsigned int la_default_mb = ocfs2_la_default_mb(osb);
unsigned int la_max_mb;
la_max_mb = ocfs2_clusters_to_megabytes(sb,
ocfs2_local_alloc_size(sb) * 8);
mlog(0, "requested: %dM, max: %uM, default: %uM\n",
requested_mb, la_max_mb, la_default_mb);
if (requested_mb == -1) {
/* No user request - use defaults */
osb->local_alloc_default_bits =
ocfs2_megabytes_to_clusters(sb, la_default_mb);
} else if (requested_mb > la_max_mb) {
/* Request is too big, we give the maximum available */
osb->local_alloc_default_bits =
ocfs2_megabytes_to_clusters(sb, la_max_mb);
} else {
osb->local_alloc_default_bits =
ocfs2_megabytes_to_clusters(sb, requested_mb);
}
osb->local_alloc_bits = osb->local_alloc_default_bits;
}
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{
return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
......@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
osb->local_alloc_bits, (osb->bitmap_cpg - 1));
osb->local_alloc_bits =
ocfs2_megabytes_to_clusters(osb->sb,
OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
ocfs2_la_default_mb(osb));
}
/* read the alloc off disk */
......@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
osb->local_alloc_state = OCFS2_LA_DISABLED;
ocfs2_resmap_uninit(&osb->osb_la_resmap);
main_bm_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
......@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
}
ocfs2_clear_local_alloc(alloc);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
goto out_commit;
}
ocfs2_journal_dirty(handle, bh);
brelse(bh);
osb->local_alloc_bh = NULL;
......@@ -481,46 +617,6 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
return status;
}
/* Check to see if the local alloc window is within ac->ac_max_block */
static int ocfs2_local_alloc_in_range(struct inode *inode,
struct ocfs2_alloc_context *ac,
u32 bits_wanted)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *alloc;
struct ocfs2_local_alloc *la;
int start;
u64 block_off;
if (!ac->ac_max_block)
return 1;
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
if (start == -1) {
mlog_errno(-ENOSPC);
return 0;
}
/*
* Converting (bm_off + start + bits_wanted) to blocks gives us
* the blkno just past our actual allocation. This is perfect
* to compare with ac_max_block.
*/
block_off = ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(la->la_bm_off) +
start + bits_wanted);
mlog(0, "Checking %llu against %llu\n",
(unsigned long long)block_off,
(unsigned long long)ac->ac_max_block);
if (block_off > ac->ac_max_block)
return 0;
return 1;
}
/*
* make sure we've got at least bits_wanted contiguous bits in the
* local alloc. You lose them when you drop i_mutex.
......@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
mlog(0, "Calling in_range for max block %llu\n",
(unsigned long long)ac->ac_max_block);
if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
bits_wanted)) {
/*
* The window is outside ac->ac_max_block.
* This errno tells the caller to keep localalloc enabled
* but to get the allocation from the main bitmap.
*/
status = -EFBIG;
goto bail;
}
ac->ac_inode = local_alloc_inode;
/* We should never use localalloc from another slot */
ac->ac_alloc_slot = osb->slot_num;
......@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
ac->ac_resv);
if (start == -1) {
/* TODO: Shouldn't we just BUG here? */
status = -ENOSPC;
......@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
bitmap = la->la_bitmap;
*bit_off = le32_to_cpu(la->la_bm_off) + start;
/* local alloc is always contiguous by nature -- we never
* delete bits from it! */
*num_bits = bits_wanted;
status = ocfs2_journal_access_di(handle,
......@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
goto bail;
}
ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
bits_wanted);
while(bits_wanted--)
ocfs2_set_bit(start++, bitmap);
le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
ocfs2_journal_dirty(handle, osb->local_alloc_bh);
status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
status = 0;
bail:
mlog_exit(status);
return status;
......@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
}
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
struct ocfs2_dinode *alloc,
u32 numbits)
struct ocfs2_dinode *alloc,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
int numfound, bitoff, left, startoff, lastzero;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
mlog_entry("(numbits wanted = %u)\n", numbits);
mlog_entry("(numbits wanted = %u)\n", *numbits);
if (!alloc->id1.bitmap1.i_total) {
mlog(0, "No bits in my window!\n");
......@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
goto bail;
}
if (!resv) {
local_resv = 1;
ocfs2_resv_init_once(&r);
ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
resv = &r;
}
numfound = *numbits;
if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
if (numfound < *numbits)
*numbits = numfound;
goto bail;
}
/*
* Code error. While reservations are enabled, local
* allocation should _always_ go through them.
*/
BUG_ON(osb->osb_resv_level != 0);
/*
* Reservations are disabled. Handle this the old way.
*/
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
numfound = bitoff = startoff = 0;
......@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
startoff = bitoff+1;
}
/* we got everything we needed */
if (numfound == numbits) {
if (numfound == *numbits) {
/* mlog(0, "Found it all!\n"); */
break;
}
......@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
numfound);
if (numfound == numbits)
if (numfound == *numbits)
bitoff = startoff - numfound;
else
bitoff = -1;
bail:
if (local_resv)
ocfs2_resv_discard(resmap, resv);
mlog_exit(bitoff);
return bitoff;
}
......@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
/* we used the generic suballoc reserve function, but we set
* everything up nicely, so there's no reason why we can't use
* the more specific cluster api to claim bits. */
status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
&cluster_off, &cluster_count);
if (status == -ENOSPC) {
retry_enospc:
......@@ -1063,7 +1175,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
goto bail;
ac->ac_bits_wanted = osb->local_alloc_default_bits;
status = ocfs2_claim_clusters(osb, handle, ac,
status = ocfs2_claim_clusters(handle, ac,
osb->local_alloc_bits,
&cluster_off,
&cluster_count);
......@@ -1098,6 +1210,9 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
le16_to_cpu(la->la_size));
ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
mlog(0, "New window allocated:\n");
mlog(0, "window la_bm_off = %u\n",
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
......@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
}
ocfs2_clear_local_alloc(alloc);
status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_journal_dirty(handle, osb->local_alloc_bh);
status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
main_bm_inode, main_bm_bh);
......@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
atomic_inc(&osb->alloc_stats.moves);
status = 0;
bail:
if (handle)
ocfs2_commit_trans(osb, handle);
......
......@@ -30,6 +30,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
int node_num,
struct ocfs2_dinode **alloc_copy);
......
......@@ -41,44 +41,20 @@
#include "file.h"
#include "inode.h"
#include "mmap.h"
#include "super.h"
static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
{
/* The best way to deal with signals in the vm path is
* to block them upfront, rather than allowing the
* locking paths to return -ERESTARTSYS. */
sigfillset(blocked);
/* We should technically never get a bad return value
* from sigprocmask */
return sigprocmask(SIG_BLOCK, blocked, oldset);
}
static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
{
return sigprocmask(SIG_SETMASK, oldset, NULL);
}
static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
{
sigset_t blocked, oldset;
int error, ret;
sigset_t oldset;
int ret;
mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
error = ocfs2_vm_op_block_sigs(&blocked, &oldset);
if (error < 0) {
mlog_errno(error);
ret = VM_FAULT_SIGBUS;
goto out;
}
ocfs2_block_signals(&oldset);
ret = filemap_fault(area, vmf);
ocfs2_unblock_signals(&oldset);
error = ocfs2_vm_op_unblock_sigs(&oldset);
if (error < 0)
mlog_errno(error);
out:
mlog_exit_ptr(vmf->page);
return ret;
}
......@@ -158,14 +134,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct buffer_head *di_bh = NULL;
sigset_t blocked, oldset;
int ret, ret2;
sigset_t oldset;
int ret;
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
ocfs2_block_signals(&oldset);
/*
* The cluster locks taken will block a truncate from another
......@@ -193,9 +165,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ocfs2_inode_unlock(inode, 1);
out:
ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
if (ret2 < 0)
mlog_errno(ret2);
ocfs2_unblock_signals(&oldset);
if (ret)
ret = VM_FAULT_SIGBUS;
return ret;
......
......@@ -239,6 +239,8 @@ static int ocfs2_mknod(struct inode *dir,
};
int did_quota_inode = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
......@@ -350,6 +352,10 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
/* Starting to change things, restart is no longer possible. */
ocfs2_block_signals(&oldset);
did_block_signals = 1;
status = dquot_alloc_inode(inode);
if (status)
goto leave;
......@@ -384,11 +390,7 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
ocfs2_add_links_count(dirfe, 1);
status = ocfs2_journal_dirty(handle, parent_fe_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, parent_fe_bh);
inc_nlink(dir);
}
......@@ -439,6 +441,8 @@ static int ocfs2_mknod(struct inode *dir,
ocfs2_commit_trans(osb, handle);
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
ocfs2_unblock_signals(&oldset);
if (status == -ENOSPC)
mlog(0, "Disk is full\n");
......@@ -487,14 +491,15 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
int status = 0;
struct ocfs2_dinode *fe = NULL;
struct ocfs2_extent_list *fel;
u64 fe_blkno = 0;
u64 suballoc_loc, fe_blkno = 0;
u16 suballoc_bit;
u16 feat;
*new_fe_bh = NULL;
status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
inode_ac, &suballoc_bit, &fe_blkno);
status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
inode_ac, &suballoc_loc,
&suballoc_bit, &fe_blkno);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -531,6 +536,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_generation = cpu_to_le32(inode->i_generation);
fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
fe->i_blkno = cpu_to_le64(fe_blkno);
fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
fe->i_uid = cpu_to_le32(inode->i_uid);
......@@ -567,11 +573,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
}
status = ocfs2_journal_dirty(handle, *new_fe_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, *new_fe_bh);
ocfs2_populate_inode(inode, fe, 1);
ocfs2_ci_set_new(osb, INODE_CACHE(inode));
......@@ -637,6 +639,7 @@ static int ocfs2_link(struct dentry *old_dentry,
struct ocfs2_dinode *fe = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
old_dentry->d_name.len, old_dentry->d_name.name,
......@@ -693,6 +696,9 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_unlock_inode;
}
/* Starting to change things, restart is no longer possible. */
ocfs2_block_signals(&oldset);
err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (err < 0) {
......@@ -705,14 +711,7 @@ static int ocfs2_link(struct dentry *old_dentry,
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
err = ocfs2_journal_dirty(handle, fe_bh);
if (err < 0) {
ocfs2_add_links_count(fe, -1);
drop_nlink(inode);
mlog_errno(err);
goto out_commit;
}
ocfs2_journal_dirty(handle, fe_bh);
err = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno,
......@@ -736,6 +735,7 @@ static int ocfs2_link(struct dentry *old_dentry,
out_commit:
ocfs2_commit_trans(osb, handle);
ocfs2_unblock_signals(&oldset);
out_unlock_inode:
ocfs2_inode_unlock(inode, 1);
......@@ -909,12 +909,7 @@ static int ocfs2_unlink(struct inode *dir,
drop_nlink(inode);
drop_nlink(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
status = ocfs2_journal_dirty(handle, fe_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, fe_bh);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (S_ISDIR(inode->i_mode))
......@@ -1332,12 +1327,7 @@ static int ocfs2_rename(struct inode *old_dir,
ocfs2_set_links_count(newfe, 0);
else
ocfs2_add_links_count(newfe, -1);
status = ocfs2_journal_dirty(handle, newfe_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_journal_dirty(handle, newfe_bh);
} else {
/* if the name was not found in new_dir, add it now */
status = ocfs2_add_entry(handle, new_dentry, old_inode,
......@@ -1356,10 +1346,7 @@ static int ocfs2_rename(struct inode *old_dir,
old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
status = ocfs2_journal_dirty(handle, old_inode_bh);
if (status < 0)
mlog_errno(status);
ocfs2_journal_dirty(handle, old_inode_bh);
} else
mlog_errno(status);
......@@ -1431,7 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
OCFS2_JOURNAL_ACCESS_WRITE);
fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
ocfs2_set_links_count(fe, old_dir->i_nlink);
status = ocfs2_journal_dirty(handle, old_dir_bh);
ocfs2_journal_dirty(handle, old_dir_bh);
}
}
ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
......@@ -1563,11 +1550,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
(bytes_left > sb->s_blocksize) ? sb->s_blocksize :
bytes_left);
status = ocfs2_journal_dirty(handle, bhs[virtual]);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_journal_dirty(handle, bhs[virtual]);
virtual++;
p_blkno++;
......@@ -1611,6 +1594,8 @@ static int ocfs2_symlink(struct inode *dir,
};
int did_quota = 0, did_quota_inode = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
dentry, symname, dentry->d_name.len, dentry->d_name.name);
......@@ -1706,6 +1691,10 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
/* Starting to change things, restart is no longer possible. */
ocfs2_block_signals(&oldset);
did_block_signals = 1;
status = dquot_alloc_inode(inode);
if (status)
goto bail;
......@@ -1814,6 +1803,8 @@ static int ocfs2_symlink(struct inode *dir,
ocfs2_commit_trans(osb, handle);
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
ocfs2_unblock_signals(&oldset);
brelse(new_fe_bh);
brelse(parent_fe_bh);
......@@ -1961,12 +1952,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
if (S_ISDIR(inode->i_mode))
ocfs2_add_links_count(orphan_fe, 1);
orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
status = ocfs2_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, orphan_dir_bh);
status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
OCFS2_ORPHAN_NAMELEN, inode,
......@@ -2065,12 +2051,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
if (S_ISDIR(inode->i_mode))
ocfs2_add_links_count(orphan_fe, -1);
orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
status = ocfs2_journal_dirty(handle, orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
ocfs2_journal_dirty(handle, orphan_dir_bh);
leave:
ocfs2_free_dir_lookup_result(&lookup);
......
......@@ -47,6 +47,7 @@
/* For struct ocfs2_blockcheck_stats */
#include "blockcheck.h"
#include "reservations.h"
/* Caching of metadata buffers */
......@@ -341,6 +342,9 @@ struct ocfs2_super
*/
unsigned int local_alloc_bits;
unsigned int local_alloc_default_bits;
/* osb_clusters_at_boot can become stale! Do not trust it to
* be up to date. */
unsigned int osb_clusters_at_boot;
enum ocfs2_local_alloc_state local_alloc_state; /* protected
* by osb_lock */
......@@ -349,6 +353,11 @@ struct ocfs2_super
u64 la_last_gd;
struct ocfs2_reservation_map osb_la_resmap;
unsigned int osb_resv_level;
unsigned int osb_dir_resv_level;
/* Next three fields are for local node slot recovery during
* mount. */
int dirty;
......@@ -482,6 +491,13 @@ static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
{
if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
return 1;
return 0;
}
static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
{
if (ocfs2_supports_indexed_dirs(osb))
......@@ -763,6 +779,12 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
}
static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
unsigned int clusters)
{
return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
}
static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
{
ext2_set_bit(bit, bitmap);
......
......@@ -100,7 +100,8 @@
| OCFS2_FEATURE_INCOMPAT_XATTR \
| OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
| OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
......@@ -165,6 +166,9 @@
/* Refcount tree support */
#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
/* Discontigous block groups */
#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
......@@ -282,14 +286,6 @@
/* Journal limits (in bytes) */
#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
/*
* Default local alloc size (in megabytes)
*
* The value chosen should be such that most allocations, including new
* block groups, use local alloc.
*/
#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
/*
* Inline extended attribute size (in bytes)
* The value chosen should be aligned to 16 byte boundaries.
......@@ -512,7 +508,10 @@ struct ocfs2_extent_block
block group */
__le32 h_fs_generation; /* Must match super block */
__le64 h_blkno; /* Offset on disk, in blocks */
/*20*/ __le64 h_reserved3;
/*20*/ __le64 h_suballoc_loc; /* Suballocator block group this
eb belongs to. Only valid
if allocated from a
discontiguous block group */
__le64 h_next_leaf_blk; /* Offset on disk, in blocks,
of next leaf header pointing
to data */
......@@ -679,7 +678,11 @@ struct ocfs2_dinode {
/*80*/ struct ocfs2_block_check i_check; /* Error checking */
/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
/*90*/ __le64 i_refcount_loc;
__le64 i_reserved2[4];
__le64 i_suballoc_loc; /* Suballocator block group this
inode belongs to. Only valid
if allocated from a
discontiguous block group */
/*A0*/ __le64 i_reserved2[3];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
......@@ -814,7 +817,12 @@ struct ocfs2_dx_root_block {
__le32 dr_reserved2;
__le64 dr_free_blk; /* Pointer to head of free
* unindexed block list. */
__le64 dr_reserved3[15];
__le64 dr_suballoc_loc; /* Suballocator block group
this root belongs to.
Only valid if allocated
from a discontiguous
block group */
__le64 dr_reserved3[14];
union {
struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
* bits for maximum space
......@@ -839,6 +847,13 @@ struct ocfs2_dx_leaf {
struct ocfs2_dx_entry_list dl_list;
};
/*
* Largest bitmap for a block (suballocator) group in bytes. This limit
* does not affect cluster groups (global allocator). Cluster group
* bitmaps run to the end of the block.
*/
#define OCFS2_MAX_BG_BITMAP_SIZE 256
/*
* On disk allocator group structure for OCFS2
*/
......@@ -860,7 +875,29 @@ struct ocfs2_group_desc
__le64 bg_blkno; /* Offset on disk, in blocks */
/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
__le64 bg_reserved2;
/*40*/ __u8 bg_bitmap[0];
/*40*/ union {
__u8 bg_bitmap[0];
struct {
/*
* Block groups may be discontiguous when
* OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set.
* The extents of a discontigous block group are
* stored in bg_list. It is a flat list.
* l_tree_depth must always be zero. A
* discontiguous group is signified by a non-zero
* bg_list->l_next_free_rec. Only block groups
* can be discontiguous; Cluster groups cannot.
* We've never made a block group with more than
* 2048 blocks (256 bytes of bg_bitmap). This
* codifies that limit so that we can fit bg_list.
* bg_size of a discontiguous block group will
* be 256 to match bg_bitmap_filler.
*/
__u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE];
/*140*/ struct ocfs2_extent_list bg_list;
};
};
/* Actual on-disk size is one block */
};
struct ocfs2_refcount_rec {
......@@ -905,7 +942,11 @@ struct ocfs2_refcount_block {
/*40*/ __le32 rf_generation; /* generation number. all be the same
* for the same refcount tree. */
__le32 rf_reserved0;
__le64 rf_reserved1[7];
__le64 rf_suballoc_loc; /* Suballocator block group this
refcount block belongs to. Only
valid if allocated from a
discontiguous block group */
/*50*/ __le64 rf_reserved1[6];
/*80*/ union {
struct ocfs2_refcount_list rf_records; /* List of refcount
records */
......@@ -1017,7 +1058,10 @@ struct ocfs2_xattr_block {
real xattr or a xattr tree. */
__le16 xb_reserved0;
__le32 xb_reserved1;
__le64 xb_reserved2;
__le64 xb_suballoc_loc; /* Suballocator block group this
xattr block belongs to. Only
valid if allocated from a
discontiguous block group */
/*30*/ union {
struct ocfs2_xattr_header xb_header; /* xattr header if this
block contains xattr */
......@@ -1254,6 +1298,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb)
{
int size;
size = sb->s_blocksize -
offsetof(struct ocfs2_group_desc, bg_list.l_recs);
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
{
int size;
......@@ -1284,13 +1338,23 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
return size;
}
static inline int ocfs2_group_bitmap_size(struct super_block *sb)
static inline int ocfs2_group_bitmap_size(struct super_block *sb,
int suballocator,
u32 feature_incompat)
{
int size;
size = sb->s_blocksize -
int size = sb->s_blocksize -
offsetof(struct ocfs2_group_desc, bg_bitmap);
/*
* The cluster allocator uses the entire block. Suballocators have
* never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
* code expects bg_size set to the maximum. Thus we must keep
* bg_size as-is unless discontig_bg is enabled.
*/
if (suballocator &&
(feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
size = OCFS2_MAX_BG_BITMAP_SIZE;
return size;
}
......@@ -1402,23 +1466,43 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_local_alloc_size(int blocksize)
static inline int ocfs2_extent_recs_per_gd(int blocksize)
{
int size;
size = blocksize -
offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
offsetof(struct ocfs2_group_desc, bg_list.l_recs);
return size;
return size / sizeof(struct ocfs2_extent_rec);
}
static inline int ocfs2_group_bitmap_size(int blocksize)
static inline int ocfs2_local_alloc_size(int blocksize)
{
int size;
size = blocksize -
offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
return size;
}
static inline int ocfs2_group_bitmap_size(int blocksize,
int suballocator,
uint32_t feature_incompat)
{
int size = sb->s_blocksize -
offsetof(struct ocfs2_group_desc, bg_bitmap);
/*
* The cluster allocator uses the entire block. Suballocators have
* never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
* code expects bg_size set to the maximum. Thus we must keep
* bg_size as-is unless discontig_bg is enabled.
*/
if (suballocator &&
(feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
size = OCFS2_MAX_BG_BITMAP_SIZE;
return size;
}
......@@ -1491,5 +1575,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
{
if ((offsetof(struct ocfs2_group_desc, bg_bitmap) +
le16_to_cpu(gd->bg_size)) !=
offsetof(struct ocfs2_group_desc, bg_list))
return 0;
/*
* Only valid to check l_next_free_rec if
* bg_bitmap + bg_size == bg_list.
*/
if (!gd->bg_list.l_next_free_rec)
return 0;
return 1;
}
#endif /* _OCFS2_FS_H */
......@@ -261,10 +261,8 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
brelse(bh);
goto out;
}
err = ocfs2_journal_dirty(handle, bh);
ocfs2_journal_dirty(handle, bh);
brelse(bh);
if (err < 0)
goto out;
out:
if (err) {
mutex_unlock(&gqinode->i_mutex);
......
......@@ -119,12 +119,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
lock_buffer(bh);
modify(bh, private);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
ocfs2_commit_trans(OCFS2_SB(sb), handle);
return status;
}
ocfs2_journal_dirty(handle, bh);
status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
if (status < 0) {
mlog_errno(status);
......@@ -523,9 +519,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
le32_add_cpu(&dchunk->dqc_free, 1);
unlock_buffer(qbh);
status = ocfs2_journal_dirty(handle, qbh);
if (status < 0)
mlog_errno(status);
ocfs2_journal_dirty(handle, qbh);
out_commit:
mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
ocfs2_commit_trans(OCFS2_SB(sb), handle);
......@@ -631,9 +625,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
lock_buffer(bh);
ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0)
mlog_errno(status);
ocfs2_journal_dirty(handle, bh);
out_trans:
ocfs2_commit_trans(osb, handle);
out_bh:
......@@ -1009,11 +1001,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
OCFS2_QBLK_RESERVED_SPACE);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
ocfs2_journal_dirty(handle, bh);
/* Initialize new block with structures */
down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
......@@ -1040,11 +1028,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
lock_buffer(dbh);
memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
unlock_buffer(dbh);
status = ocfs2_journal_dirty(handle, dbh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
ocfs2_journal_dirty(handle, dbh);
/* Update local quotafile info */
oinfo->dqi_blocks += 2;
......@@ -1155,11 +1139,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
lock_buffer(bh);
memset(bh->b_data, 0, sb->s_blocksize);
unlock_buffer(bh);
status = ocfs2_journal_dirty(handle, bh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
ocfs2_journal_dirty(handle, bh);
/* Update chunk header */
status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
chunk->qc_headerbh,
......@@ -1173,11 +1154,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
lock_buffer(chunk->qc_headerbh);
le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
unlock_buffer(chunk->qc_headerbh);
status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
if (status < 0) {
mlog_errno(status);
goto out_trans;
}
ocfs2_journal_dirty(handle, chunk->qc_headerbh);
/* Update file header */
oinfo->dqi_blocks++;
status = ocfs2_local_write_info(sb, type);
......@@ -1312,12 +1290,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
le32_add_cpu(&dchunk->dqc_free, 1);
unlock_buffer(od->dq_chunk->qc_headerbh);
status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
if (status < 0) {
mlog_errno(status);
goto out;
}
status = 0;
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
/* Clear the read bit so that next time someone uses this
* dquot he reads fresh info from disk and allocates local
......
......@@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
u16 suballoc_bit_start;
u32 num_got;
u64 first_blkno;
u64 suballoc_loc, first_blkno;
BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
......@@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
goto out_commit;
}
ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&first_blkno);
if (ret) {
......@@ -626,6 +626,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
memset(rb, 0, inode->i_sb->s_blocksize);
strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
rb->rf_blkno = cpu_to_le64(first_blkno);
......@@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
if (le32_to_cpu(rb->rf_count) == 1) {
blk = le64_to_cpu(rb->rf_blkno);
bit = le16_to_cpu(rb->rf_suballoc_bit);
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
if (rb->rf_suballoc_loc)
bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
else
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
alloc_inode = ocfs2_get_system_file_inode(osb,
EXTENT_ALLOC_SYSTEM_INODE,
......@@ -1268,9 +1272,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
} else if (merge)
ocfs2_refcount_rec_merge(rb, index);
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
if (ret)
mlog_errno(ret);
ocfs2_journal_dirty(handle, ref_leaf_bh);
out:
return ret;
}
......@@ -1284,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
int ret;
u16 suballoc_bit_start;
u32 num_got;
u64 blkno;
u64 suballoc_loc, blkno;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct buffer_head *new_bh = NULL;
struct ocfs2_refcount_block *new_rb;
......@@ -1298,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
goto out;
}
ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&blkno);
if (ret) {
......@@ -1330,6 +1332,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
new_rb->rf_blkno = cpu_to_le64(blkno);
new_rb->rf_cpos = cpu_to_le32(0);
......@@ -1524,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
int ret;
u16 suballoc_bit_start;
u32 num_got, new_cpos;
u64 blkno;
u64 suballoc_loc, blkno;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct ocfs2_refcount_block *root_rb =
(struct ocfs2_refcount_block *)ref_root_bh->b_data;
......@@ -1548,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
goto out;
}
ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&blkno);
if (ret) {
......@@ -1576,6 +1579,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
memset(new_rb, 0, sb->s_blocksize);
strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
new_rb->rf_blkno = cpu_to_le64(blkno);
......@@ -1694,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
* 2 more credits, one for the leaf refcount block, one for
* the extent block contains the extent rec.
*/
ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
ret = ocfs2_extend_trans(handle, 2);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -1802,11 +1806,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
if (merge)
ocfs2_refcount_rec_merge(rb, index);
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
ocfs2_journal_dirty(handle, ref_leaf_bh);
if (index == 0) {
ret = ocfs2_adjust_refcount_rec(handle, ci,
......@@ -1977,9 +1977,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
ocfs2_refcount_rec_merge(rb, index);
}
ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
if (ret)
mlog_errno(ret);
ocfs2_journal_dirty(handle, ref_leaf_bh);
out:
brelse(new_bh);
......@@ -2112,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
*/
ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
le16_to_cpu(rb->rf_suballoc_slot),
le64_to_cpu(rb->rf_suballoc_loc),
le64_to_cpu(rb->rf_blkno),
le16_to_cpu(rb->rf_suballoc_bit));
if (ret) {
......@@ -2516,20 +2515,19 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
*
* Normally the refcount blocks store these refcount should be
* contiguous also, so that we can get the number easily.
* As for meta_ac, we will at most add split 2 refcount record and
* 2 more refcount block, so just check it in a rough way.
* We will at most add split 2 refcount records and 2 more
* refcount blocks, so just check it in a rough way.
*
* Caller must hold refcount tree lock.
*/
int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
struct buffer_head *di_bh,
u64 refcount_loc,
u64 phys_blkno,
u32 clusters,
int *credits,
struct ocfs2_alloc_context **meta_ac)
int *ref_blocks)
{
int ret, ref_blocks = 0;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
int ret;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
......@@ -2546,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_refcount_loc), &tree);
refcount_loc, &tree);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_read_refcount_block(&tree->rf_ci,
le64_to_cpu(di->i_refcount_loc),
ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
&ref_root_bh);
if (ret) {
mlog_errno(ret);
......@@ -2564,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
&tree->rf_ci,
ref_root_bh,
start_cpos, clusters,
&ref_blocks, credits);
ref_blocks, credits);
if (ret) {
mlog_errno(ret);
goto out;
}
mlog(0, "reserve new metadata %d, credits = %d\n",
ref_blocks, *credits);
if (ref_blocks) {
ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
ref_blocks, meta_ac);
if (ret)
mlog_errno(ret);
}
mlog(0, "reserve new metadata %d blocks, credits = %d\n",
*ref_blocks, *credits);
out:
brelse(ref_root_bh);
......@@ -3040,11 +3030,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
}
memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
ret = ocfs2_journal_dirty(handle, new_bh);
if (ret) {
mlog_errno(ret);
break;
}
ocfs2_journal_dirty(handle, new_bh);
brelse(new_bh);
brelse(old_bh);
......@@ -3282,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
} else {
delete = 1;
ret = __ocfs2_claim_clusters(osb, handle,
ret = __ocfs2_claim_clusters(handle,
context->data_ac,
1, set_len,
&new_bit, &new_len);
......
......@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
struct ocfs2_cached_dealloc_ctxt *dealloc,
int delete);
int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
struct buffer_head *di_bh,
u64 refcount_loc,
u64 phys_blkno,
u32 clusters,
int *credits,
struct ocfs2_alloc_context **meta_ac);
int *ref_blocks);
int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos);
......
此差异已折叠。
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* reservations.h
*
* Allocation reservations function prototypes and structures.
*
* Copyright (C) 2010 Novell. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_RESERVATIONS_H
#define OCFS2_RESERVATIONS_H
#include <linux/rbtree.h>
#define OCFS2_DEFAULT_RESV_LEVEL 2
#define OCFS2_MAX_RESV_LEVEL 9
#define OCFS2_MIN_RESV_LEVEL 0
struct ocfs2_alloc_reservation {
struct rb_node r_node;
unsigned int r_start; /* Begining of current window */
unsigned int r_len; /* Length of the window */
unsigned int r_last_len; /* Length of most recent alloc */
unsigned int r_last_start; /* Start of most recent alloc */
struct list_head r_lru; /* LRU list head */
unsigned int r_flags;
};
#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
* destroyed immedately after use */
#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
* directory btree */
struct ocfs2_reservation_map {
struct rb_root m_reservations;
char *m_disk_bitmap;
struct ocfs2_super *m_osb;
/* The following are not initialized to meaningful values until a disk
* bitmap is provided. */
u32 m_bitmap_len; /* Number of valid
* bits available */
struct list_head m_lru; /* LRU of reservations
* structures. */
};
void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP|OCFS2_RESV_FLAG_DIR)
void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
unsigned int flags);
int ocfs2_dir_resv_allowed(struct ocfs2_super *osb);
/**
* ocfs2_resv_discard() - truncate a reservation
* @resmap:
* @resv: the reservation to truncate.
*
* After this function is called, the reservation will be empty, and
* unlinked from the rbtree.
*/
void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv);
/**
* ocfs2_resmap_init() - Initialize fields of a reservations bitmap
* @resmap: struct ocfs2_reservation_map to initialize
* @obj: unused for now
* @ops: unused for now
* @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
*
* Only possible return value other than '0' is -ENOMEM for failure to
* allocation mirror bitmap.
*/
int ocfs2_resmap_init(struct ocfs2_super *osb,
struct ocfs2_reservation_map *resmap);
/**
* ocfs2_resmap_restart() - "restart" a reservation bitmap
* @resmap: reservations bitmap
* @clen: Number of valid bits in the bitmap
* @disk_bitmap: the disk bitmap this resmap should refer to.
*
* Re-initialize the parameters of a reservation bitmap. This is
* useful for local alloc window slides.
*
* This function will call ocfs2_trunc_resv against all existing
* reservations. A future version will recalculate existing
* reservations based on the new bitmap.
*/
void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
unsigned int clen, char *disk_bitmap);
/**
* ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
* @resmap: the struct ocfs2_reservation_map to uninitialize
*/
void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
/**
* ocfs2_resmap_resv_bits() - Return still-valid reservation bits
* @resmap: reservations bitmap
* @resv: reservation to base search from
* @cstart: start of proposed allocation
* @clen: length (in clusters) of proposed allocation
*
* Using the reservation data from resv, this function will compare
* resmap and resmap->m_disk_bitmap to determine what part (if any) of
* the reservation window is still clear to use. If resv is empty,
* this function will try to allocate a window for it.
*
* On success, zero is returned and the valid allocation area is set in cstart
* and clen.
*
* Returns -ENOSPC if reservations are disabled.
*/
int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv,
int *cstart, int *clen);
/**
* ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
* @resmap: reservations bitmap
* @resv: optional reservation to recalulate based on new bitmap
* @cstart: start of allocation in clusters
* @clen: end of allocation in clusters.
*
* Tell the reservation code that bits were used to fulfill allocation in
* resmap. The bits don't have to have been part of any existing
* reservation. But we must always call this function when bits are claimed.
* Internally, the reservations code will use this information to mark the
* reservations bitmap. If resv is passed, it's next allocation window will be
* calculated. It also expects that 'cstart' is the same as we passed back
* from ocfs2_resmap_resv_bits().
*/
void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
struct ocfs2_alloc_reservation *resv,
u32 cstart, u32 clen);
#endif /* OCFS2_RESERVATIONS_H */
......@@ -134,11 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
ret = ocfs2_journal_dirty(handle, group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_rollback;
}
ocfs2_journal_dirty(handle, group_bh);
/* update the inode accordingly. */
ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
......@@ -319,7 +315,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
ocfs2_group_bitmap_size(osb->sb) * 8) {
ocfs2_group_bitmap_size(osb->sb, 0,
osb->s_feature_incompat) * 8) {
mlog(ML_ERROR, "The disk is too old and small. "
"Force to do offline resize.");
ret = -EINVAL;
......@@ -500,7 +497,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
ocfs2_group_bitmap_size(osb->sb) * 8) {
ocfs2_group_bitmap_size(osb->sb, 0,
osb->s_feature_incompat) * 8) {
mlog(ML_ERROR, "The disk is too old and small."
" Force to do offline resize.");
ret = -EINVAL;
......@@ -545,12 +543,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
group = (struct ocfs2_group_desc *)group_bh->b_data;
group->bg_next_group = cr->c_blkno;
ret = ocfs2_journal_dirty(handle, group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_commit;
}
ocfs2_journal_dirty(handle, group_bh);
ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
......
此差异已折叠。
......@@ -26,13 +26,14 @@
#ifndef _CHAINALLOC_H_
#define _CHAINALLOC_H_
struct ocfs2_suballoc_result;
typedef int (group_search_t)(struct inode *,
struct buffer_head *,
u32, /* bits_wanted */
u32, /* min_bits */
u64, /* max_block */
u16 *, /* *bit_off */
u16 *); /* *bits_found */
struct ocfs2_suballoc_result *);
/* found bits */
struct ocfs2_alloc_context {
struct inode *ac_inode; /* which bitmap are we allocating from? */
......@@ -54,6 +55,8 @@ struct ocfs2_alloc_context {
u64 ac_last_group;
u64 ac_max_block; /* Highest block number to allocate. 0 is
is the same as ~0 - unlimited */
struct ocfs2_alloc_reservation *ac_resv;
};
void ocfs2_init_steal_slots(struct ocfs2_super *osb);
......@@ -80,22 +83,21 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac);
int ocfs2_claim_metadata(struct ocfs2_super *osb,
handle_t *handle,
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 bits_wanted,
u64 *suballoc_loc,
u16 *suballoc_bit_start,
u32 *num_bits,
u64 *blkno_start);
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
handle_t *handle,
int ocfs2_claim_new_inode(handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
struct ocfs2_alloc_context *ac,
u64 *suballoc_loc,
u16 *suballoc_bit,
u64 *fe_blkno);
int ocfs2_claim_clusters(struct ocfs2_super *osb,
handle_t *handle,
int ocfs2_claim_clusters(handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 min_clusters,
u32 *cluster_start,
......@@ -104,8 +106,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
* Use this variant of ocfs2_claim_clusters to specify a maxiumum
* number of clusters smaller than the allocation reserved.
*/
int __ocfs2_claim_clusters(struct ocfs2_super *osb,
handle_t *handle,
int __ocfs2_claim_clusters(handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 min_clusters,
u32 max_clusters,
......
此差异已折叠。
......@@ -45,4 +45,11 @@ void __ocfs2_abort(struct super_block *sb,
#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
/*
* Void signal blockers, because in-kernel sigprocmask() only fails
* when SIG_* is wrong.
*/
void ocfs2_block_signals(sigset_t *oldset);
void ocfs2_unblock_signals(sigset_t *oldset);
#endif /* OCFS2_SUPER_H */
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册