提交 f3270b16 编写于 作者: L Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (48 commits)
  ocfs2: Avoid to evaluate xattr block flags again.
  ocfs2/cluster: Release debugfs file elapsed_time_in_ms
  ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
  Initialize max_slots early
  When I tried to compile I got the following warning: fs/ocfs2/slot_map.c: In function ‘ocfs2_init_slot_info’: fs/ocfs2/slot_map.c:360: warning: ‘bytes’ may be used uninitialized in this function fs/ocfs2/slot_map.c:360: note: ‘bytes’ was declared here Compiler: gcc version 4.4.3 (GCC) on Mandriva I'm not sure why this warning occurs, I think compiler don't know that variable "bytes" is initialized when it is sent by reference to ocfs2_slot_map_physical_size and it throws that ugly warning. However, a simple initialization of "bytes" variable with 0 will fix it.
  ocfs2: validate bg_free_bits_count after update
  ocfs2/cluster: Bump up dlm protocol to version 1.1
  ocfs2/cluster: Show per region heartbeat elapsed time
  ocfs2/cluster: Add mlogs for heartbeat up/down events
  ocfs2/cluster: Create debugfs dir/files for each region
  ocfs2/cluster: Create debugfs files for live, quorum and failed region bitmaps
  ocfs2/cluster: Maintain bitmap of failed regions
  ocfs2/cluster: Maintain bitmap of quorum regions
  ocfs2/cluster: Track bitmap of live heartbeat regions
  ocfs2/cluster: Track number of global heartbeat regions
  ocfs2/cluster: Maintain live node bitmap per heartbeat region
  ocfs2/cluster: Reorganize o2hb debugfs init
  ocfs2/cluster: Check slots for unconfigured live nodes
  ocfs2/cluster: Print messages when adding/removing nodes
  ocfs2/cluster: Print messages when adding/removing heartbeat regions
  ...
...@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file ...@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file
reservations - users should rarely need to change this reservations - users should rarely need to change this
value. If allocation reservations are turned off, this value. If allocation reservations are turned off, this
option will have no effect. option will have no effect.
coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
lock will be taken to force other nodes drop cache,
therefore full cluster coherency is guaranteed even
for O_DIRECT writes.
coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
nodes, which gains high performance at risk of getting
stale data on other nodes.
...@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ...@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
} }
if (le32_to_cpu(es->s_blocks_count) > if (generic_check_addressable(sb->s_blocksize_bits,
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { le32_to_cpu(es->s_blocks_count))) {
ext3_msg(sb, KERN_ERR, ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely"); "error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8) if (sizeof(sector_t) < 8)
......
...@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Test whether we have more sectors than will fit in sector_t, * Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache. * and whether the max offset is addressable by the page cache.
*/ */
if ((ext4_blocks_count(es) > ret = generic_check_addressable(sb->s_blocksize_bits,
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || ext4_blocks_count(es));
(ext4_blocks_count(es) > if (ret) {
(pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
ext4_msg(sb, KERN_ERR, "filesystem" ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system"); " too large to mount safely on this system");
if (sizeof(sector_t) < 8) if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
ret = -EFBIG;
goto failed_mount; goto failed_mount;
} }
......
...@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, ...@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
if (!compat && !ro && !incompat) if (!compat && !ro && !incompat)
return 1; return 1;
/* Load journal superblock if it is not loaded yet. */
if (journal->j_format_version == 0 &&
journal_get_superblock(journal) != 0)
return 0;
if (journal->j_format_version == 1) if (journal->j_format_version == 1)
return 0; return 0;
......
...@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) ...@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync)
} }
EXPORT_SYMBOL(generic_file_fsync); EXPORT_SYMBOL(generic_file_fsync);
/**
* generic_check_addressable - Check addressability of file system
* @blocksize_bits: log of file system block size
* @num_blocks: number of blocks in file system
*
* Determine whether a file system with @num_blocks blocks (and a
* block size of 2**@blocksize_bits) is addressable by the sector_t
* and page cache of the system. Return 0 if so and -EFBIG otherwise.
*/
int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
{
u64 last_fs_block = num_blocks - 1;
u64 last_fs_page =
last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
if (unlikely(num_blocks == 0))
return 0;
if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
return -EINVAL;
if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
(last_fs_page > (pgoff_t)(~0ULL))) {
return -EFBIG;
}
return 0;
}
EXPORT_SYMBOL(generic_check_addressable);
/* /*
* No-op implementation of ->fsync for in-memory filesystems. * No-op implementation of ->fsync for in-memory filesystems.
*/ */
......
...@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { ...@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt {
* out in so that future reads from that region will get * out in so that future reads from that region will get
* zero's. * zero's.
*/ */
struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
unsigned int w_num_pages; unsigned int w_num_pages;
struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
struct page *w_target_page; struct page *w_target_page;
/* /*
...@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, ...@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
return ret; return ret;
} }
int ocfs2_write_begin_nolock(struct address_space *mapping, int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata, struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page) struct buffer_head *di_bh, struct page *mmap_page)
...@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, ...@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
} else if (ret == 1) { } else if (ret == 1) {
ret = ocfs2_refcount_cow(inode, di_bh, ret = ocfs2_refcount_cow(inode, filp, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX); wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
...@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, ...@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
*/ */
down_write(&OCFS2_I(inode)->ip_alloc_sem); down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
fsdata, di_bh, NULL); fsdata, di_bh, NULL);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, ...@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied, loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata); struct page *page, void *fsdata);
int ocfs2_write_begin_nolock(struct address_space *mapping, int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata, struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page); struct buffer_head *di_bh, struct page *mmap_page);
......
此差异已折叠。
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
#define O2HB_REGION_TIMEOUT_MS 2000 #define O2HB_REGION_TIMEOUT_MS 2000
#define O2HB_MAX_REGION_NAME_LEN 32
/* number of changes to be seen as live */ /* number of changes to be seen as live */
#define O2HB_LIVE_THRESHOLD 2 #define O2HB_LIVE_THRESHOLD 2
/* number of equal samples to be seen as dead */ /* number of equal samples to be seen as dead */
...@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); ...@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void); int o2hb_check_local_node_heartbeating(void);
void o2hb_stop_all_regions(void); void o2hb_stop_all_regions(void);
int o2hb_get_all_regions(char *region_uuids, u8 numregions);
int o2hb_global_heartbeat_active(void);
#endif /* O2CLUSTER_HEARTBEAT_H */ #endif /* O2CLUSTER_HEARTBEAT_H */
...@@ -119,7 +119,8 @@ ...@@ -119,7 +119,8 @@
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
......
...@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, ...@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
spin_lock_init(&node->nd_lock); spin_lock_init(&node->nd_lock);
mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name);
return &node->nd_item; return &node->nd_item;
} }
...@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, ...@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group,
} }
write_unlock(&cluster->cl_nodes_lock); write_unlock(&cluster->cl_nodes_lock);
mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n",
config_item_name(&node->nd_item));
config_item_put(item); config_item_put(item);
} }
......
...@@ -36,4 +36,10 @@ ...@@ -36,4 +36,10 @@
/* host name, group name, cluster name all 64 bytes */ /* host name, group name, cluster name all 64 bytes */
#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
/*
* Maximum number of global heartbeat regions allowed.
* **CAUTION** Changing this number will break dlm compatibility.
*/
#define O2NM_MAX_REGIONS 32
#endif /* _OCFS2_NODEMANAGER_H */ #endif /* _OCFS2_NODEMANAGER_H */
...@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, ...@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
{ {
o2quo_hb_down(node_num); o2quo_hb_down(node_num);
if (!node)
return;
if (node_num != o2nm_this_node()) if (node_num != o2nm_this_node())
o2net_disconnect_node(node); o2net_disconnect_node(node);
...@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, ...@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
o2quo_hb_up(node_num); o2quo_hb_up(node_num);
BUG_ON(!node);
/* ensure an immediate connect attempt */ /* ensure an immediate connect attempt */
nn->nn_last_connect_attempt = jiffies - nn->nn_last_connect_attempt = jiffies -
(msecs_to_jiffies(o2net_reconnect_delay()) + 1); (msecs_to_jiffies(o2net_reconnect_delay()) + 1);
......
...@@ -40,6 +40,14 @@ ...@@ -40,6 +40,14 @@
#include "inode.h" #include "inode.h"
#include "super.h" #include "super.h"
void ocfs2_dentry_attach_gen(struct dentry *dentry)
{
unsigned long gen =
OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
BUG_ON(dentry->d_inode);
dentry->d_fsdata = (void *)gen;
}
static int ocfs2_dentry_revalidate(struct dentry *dentry, static int ocfs2_dentry_revalidate(struct dentry *dentry,
struct nameidata *nd) struct nameidata *nd)
...@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, ...@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
mlog_entry("(0x%p, '%.*s')\n", dentry, mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name); dentry->d_name.len, dentry->d_name.name);
/* Never trust a negative dentry - force a new lookup. */ /* For a negative dentry -
* check the generation number of the parent and compare with the
* one stored in the inode.
*/
if (inode == NULL) { if (inode == NULL) {
mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, unsigned long gen = (unsigned long) dentry->d_fsdata;
dentry->d_name.name); unsigned long pgen =
goto bail; OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
mlog(0, "negative dentry: %.*s parent gen: %lu "
"dentry gen: %lu\n",
dentry->d_name.len, dentry->d_name.name, pgen, gen);
if (gen != pgen)
goto bail;
goto valid;
} }
BUG_ON(!osb); BUG_ON(!osb);
...@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, ...@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
goto bail; goto bail;
} }
valid:
ret = 1; ret = 1;
bail: bail:
...@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, ...@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
if (!inode) if (!inode)
return 0; return 0;
if (!dentry->d_inode && dentry->d_fsdata) {
/* Converting a negative dentry to positive
Clear dentry->d_fsdata */
dentry->d_fsdata = dl = NULL;
}
if (dl) { if (dl) {
mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
" \"%.*s\": old parent: %llu, new: %llu\n", " \"%.*s\": old parent: %llu, new: %llu\n",
...@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) ...@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
out: out:
iput(inode); iput(inode);
ocfs2_dentry_attach_gen(dentry);
} }
/* /*
......
...@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, ...@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
struct inode *old_dir, struct inode *new_dir); struct inode *old_dir, struct inode *new_dir);
extern spinlock_t dentry_attach_lock; extern spinlock_t dentry_attach_lock;
void ocfs2_dentry_attach_gen(struct dentry *dentry);
#endif /* OCFS2_DCACHE_H */ #endif /* OCFS2_DCACHE_H */
...@@ -445,7 +445,9 @@ enum { ...@@ -445,7 +445,9 @@ enum {
DLM_LOCK_REQUEST_MSG, /* 515 */ DLM_LOCK_REQUEST_MSG, /* 515 */
DLM_RECO_DATA_DONE_MSG, /* 516 */ DLM_RECO_DATA_DONE_MSG, /* 516 */
DLM_BEGIN_RECO_MSG, /* 517 */ DLM_BEGIN_RECO_MSG, /* 517 */
DLM_FINALIZE_RECO_MSG /* 518 */ DLM_FINALIZE_RECO_MSG, /* 518 */
DLM_QUERY_REGION, /* 519 */
DLM_QUERY_NODEINFO, /* 520 */
}; };
struct dlm_reco_node_data struct dlm_reco_node_data
...@@ -727,6 +729,31 @@ struct dlm_cancel_join ...@@ -727,6 +729,31 @@ struct dlm_cancel_join
u8 domain[O2NM_MAX_NAME_LEN]; u8 domain[O2NM_MAX_NAME_LEN];
}; };
struct dlm_query_region {
u8 qr_node;
u8 qr_numregions;
u8 qr_namelen;
u8 pad1;
u8 qr_domain[O2NM_MAX_NAME_LEN];
u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS];
};
struct dlm_node_info {
u8 ni_nodenum;
u8 pad1;
u16 ni_ipv4_port;
u32 ni_ipv4_address;
};
struct dlm_query_nodeinfo {
u8 qn_nodenum;
u8 qn_numnodes;
u8 qn_namelen;
u8 pad1;
u8 qn_domain[O2NM_MAX_NAME_LEN];
struct dlm_node_info qn_nodes[O2NM_MAX_NODES];
};
struct dlm_exit_domain struct dlm_exit_domain
{ {
u8 node_idx; u8 node_idx;
......
...@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) ...@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
struct hlist_head *bucket; struct hlist_head *bucket;
struct hlist_node *list; struct hlist_node *list;
int i, out = 0; int i, out = 0;
unsigned long total = 0, longest = 0, bktcnt; unsigned long total = 0, longest = 0, bucket_count = 0;
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Dumping MLEs for Domain: %s\n", dlm->name); "Dumping MLEs for Domain: %s\n", dlm->name);
...@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) ...@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
mle = hlist_entry(list, struct dlm_master_list_entry, mle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node); master_hash_node);
++total; ++total;
++bktcnt; ++bucket_count;
if (db->len - out < 200) if (db->len - out < 200)
continue; continue;
out += dump_mle(mle, db->buf + out, db->len - out); out += dump_mle(mle, db->buf + out, db->len - out);
} }
longest = max(longest, bktcnt); longest = max(longest, bucket_count);
bktcnt = 0; bucket_count = 0;
} }
spin_unlock(&dlm->master_lock); spin_unlock(&dlm->master_lock);
...@@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) ...@@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */ /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
"Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); "Domain: %s Key: 0x%08x Protocol: %d.%d\n",
dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
/* Thread Pid: xxx Node: xxx State: xxxxx */ /* Thread Pid: xxx Node: xxx State: xxxxx */
out += snprintf(db->buf + out, db->len - out, out += snprintf(db->buf + out, db->len - out,
......
...@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); ...@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
* will have a negotiated version with the same major number and a minor * will have a negotiated version with the same major number and a minor
* number equal or smaller. The dlm_ctxt->dlm_locking_proto field should * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should
* be used to determine what a running domain is actually using. * be used to determine what a running domain is actually using.
*
* New in version 1.1:
* - Message DLM_QUERY_REGION added to support global heartbeat
* - Message DLM_QUERY_NODEINFO added to allow online node removes
*/ */
static const struct dlm_protocol_version dlm_protocol = { static const struct dlm_protocol_version dlm_protocol = {
.pv_major = 1, .pv_major = 1,
.pv_minor = 0, .pv_minor = 1,
}; };
#define DLM_DOMAIN_BACKOFF_MS 200 #define DLM_DOMAIN_BACKOFF_MS 200
...@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, ...@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data); void **ret_data);
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data); void **ret_data);
static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data);
static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data); void **ret_data);
static int dlm_protocol_compare(struct dlm_protocol_version *existing, static int dlm_protocol_compare(struct dlm_protocol_version *existing,
...@@ -921,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, ...@@ -921,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
return 0; return 0;
} }
static int dlm_match_regions(struct dlm_ctxt *dlm,
struct dlm_query_region *qr)
{
char *local = NULL, *remote = qr->qr_regions;
char *l, *r;
int localnr, i, j, foundit;
int status = 0;
if (!o2hb_global_heartbeat_active()) {
if (qr->qr_numregions) {
mlog(ML_ERROR, "Domain %s: Joining node %d has global "
"heartbeat enabled but local node %d does not\n",
qr->qr_domain, qr->qr_node, dlm->node_num);
status = -EINVAL;
}
goto bail;
}
if (o2hb_global_heartbeat_active() && !qr->qr_numregions) {
mlog(ML_ERROR, "Domain %s: Local node %d has global "
"heartbeat enabled but joining node %d does not\n",
qr->qr_domain, dlm->node_num, qr->qr_node);
status = -EINVAL;
goto bail;
}
r = remote;
for (i = 0; i < qr->qr_numregions; ++i) {
mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r);
r += O2HB_MAX_REGION_NAME_LEN;
}
local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
if (!local) {
status = -ENOMEM;
goto bail;
}
localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
/* compare local regions with remote */
l = local;
for (i = 0; i < localnr; ++i) {
foundit = 0;
r = remote;
for (j = 0; j <= qr->qr_numregions; ++j) {
if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) {
foundit = 1;
break;
}
r += O2HB_MAX_REGION_NAME_LEN;
}
if (!foundit) {
status = -EINVAL;
mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
"in local node %d but not in joining node %d\n",
qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l,
dlm->node_num, qr->qr_node);
goto bail;
}
l += O2HB_MAX_REGION_NAME_LEN;
}
/* compare remote with local regions */
r = remote;
for (i = 0; i < qr->qr_numregions; ++i) {
foundit = 0;
l = local;
for (j = 0; j < localnr; ++j) {
if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) {
foundit = 1;
break;
}
l += O2HB_MAX_REGION_NAME_LEN;
}
if (!foundit) {
status = -EINVAL;
mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
"in joining node %d but not in local node %d\n",
qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r,
qr->qr_node, dlm->node_num);
goto bail;
}
r += O2HB_MAX_REGION_NAME_LEN;
}
bail:
kfree(local);
return status;
}
static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
{
struct dlm_query_region *qr = NULL;
int status, ret = 0, i;
char *p;
if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
goto bail;
qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
if (!qr) {
ret = -ENOMEM;
mlog_errno(ret);
goto bail;
}
qr->qr_node = dlm->node_num;
qr->qr_namelen = strlen(dlm->name);
memcpy(qr->qr_domain, dlm->name, qr->qr_namelen);
/* if local hb, the numregions will be zero */
if (o2hb_global_heartbeat_active())
qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions,
O2NM_MAX_REGIONS);
p = qr->qr_regions;
for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN)
mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p);
i = -1;
while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
i + 1)) < O2NM_MAX_NODES) {
if (i == dlm->node_num)
continue;
mlog(0, "Sending regions to node %d\n", i);
ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr,
sizeof(struct dlm_query_region),
i, &status);
if (ret >= 0)
ret = status;
if (ret) {
mlog(ML_ERROR, "Region mismatch %d, node %d\n",
ret, i);
break;
}
}
bail:
kfree(qr);
return ret;
}
static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data)
{
struct dlm_query_region *qr;
struct dlm_ctxt *dlm = NULL;
int status = 0;
int locked = 0;
qr = (struct dlm_query_region *) msg->buf;
mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
qr->qr_domain);
status = -EINVAL;
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen);
if (!dlm) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"before join domain\n", qr->qr_node, qr->qr_domain);
goto bail;
}
spin_lock(&dlm->spinlock);
locked = 1;
if (dlm->joining_node != qr->qr_node) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but joining node is %d\n", qr->qr_node, qr->qr_domain,
dlm->joining_node);
goto bail;
}
/* Support for global heartbeat was added in 1.1 */
if (dlm->dlm_locking_proto.pv_major == 1 &&
dlm->dlm_locking_proto.pv_minor == 0) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but active dlm protocol is %d.%d\n", qr->qr_node,
qr->qr_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
goto bail;
}
status = dlm_match_regions(dlm, qr);
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
}
static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn)
{
struct o2nm_node *local;
struct dlm_node_info *remote;
int i, j;
int status = 0;
for (j = 0; j < qn->qn_numnodes; ++j)
mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum,
&(qn->qn_nodes[j].ni_ipv4_address),
ntohs(qn->qn_nodes[j].ni_ipv4_port));
for (i = 0; i < O2NM_MAX_NODES && !status; ++i) {
local = o2nm_get_node_by_num(i);
remote = NULL;
for (j = 0; j < qn->qn_numnodes; ++j) {
if (qn->qn_nodes[j].ni_nodenum == i) {
remote = &(qn->qn_nodes[j]);
break;
}
}
if (!local && !remote)
continue;
if ((local && !remote) || (!local && remote))
status = -EINVAL;
if (!status &&
((remote->ni_nodenum != local->nd_num) ||
(remote->ni_ipv4_port != local->nd_ipv4_port) ||
(remote->ni_ipv4_address != local->nd_ipv4_address)))
status = -EINVAL;
if (status) {
if (remote && !local)
mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
"registered in joining node %d but not in "
"local node %d\n", qn->qn_domain,
remote->ni_nodenum,
&(remote->ni_ipv4_address),
ntohs(remote->ni_ipv4_port),
qn->qn_nodenum, dlm->node_num);
if (local && !remote)
mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
"registered in local node %d but not in "
"joining node %d\n", qn->qn_domain,
local->nd_num, &(local->nd_ipv4_address),
ntohs(local->nd_ipv4_port),
dlm->node_num, qn->qn_nodenum);
BUG_ON((!local && !remote));
}
if (local)
o2nm_node_put(local);
}
return status;
}
static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
{
struct dlm_query_nodeinfo *qn = NULL;
struct o2nm_node *node;
int ret = 0, status, count, i;
if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
goto bail;
qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
if (!qn) {
ret = -ENOMEM;
mlog_errno(ret);
goto bail;
}
for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) {
node = o2nm_get_node_by_num(i);
if (!node)
continue;
qn->qn_nodes[count].ni_nodenum = node->nd_num;
qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port;
qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address;
mlog(0, "Node %3d, %pI4:%u\n", node->nd_num,
&(node->nd_ipv4_address), ntohs(node->nd_ipv4_port));
++count;
o2nm_node_put(node);
}
qn->qn_nodenum = dlm->node_num;
qn->qn_numnodes = count;
qn->qn_namelen = strlen(dlm->name);
memcpy(qn->qn_domain, dlm->name, qn->qn_namelen);
i = -1;
while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
i + 1)) < O2NM_MAX_NODES) {
if (i == dlm->node_num)
continue;
mlog(0, "Sending nodeinfo to node %d\n", i);
ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
qn, sizeof(struct dlm_query_nodeinfo),
i, &status);
if (ret >= 0)
ret = status;
if (ret) {
mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i);
break;
}
}
bail:
kfree(qn);
return ret;
}
static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data)
{
struct dlm_query_nodeinfo *qn;
struct dlm_ctxt *dlm = NULL;
int locked = 0, status = -EINVAL;
qn = (struct dlm_query_nodeinfo *) msg->buf;
mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum,
qn->qn_domain);
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen);
if (!dlm) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s before "
"join domain\n", qn->qn_nodenum, qn->qn_domain);
goto bail;
}
spin_lock(&dlm->spinlock);
locked = 1;
if (dlm->joining_node != qn->qn_nodenum) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
"joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
dlm->joining_node);
goto bail;
}
/* Support for node query was added in 1.1 */
if (dlm->dlm_locking_proto.pv_major == 1 &&
dlm->dlm_locking_proto.pv_minor == 0) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s "
"but active dlm protocol is %d.%d\n", qn->qn_nodenum,
qn->qn_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
goto bail;
}
status = dlm_match_nodes(dlm, qn);
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
}
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data) void **ret_data)
{ {
...@@ -1241,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) ...@@ -1241,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
set_bit(dlm->node_num, dlm->domain_map); set_bit(dlm->node_num, dlm->domain_map);
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->spinlock);
/* Support for global heartbeat and node info was added in 1.1 */
if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
if (status) {
mlog_errno(status);
goto bail;
}
status = dlm_send_regions(dlm, ctxt->yes_resp_map);
if (status) {
mlog_errno(status);
goto bail;
}
}
dlm_send_join_asserts(dlm, ctxt->yes_resp_map); dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
/* Joined state *must* be set before the joining node /* Joined state *must* be set before the joining node
...@@ -1807,7 +2191,21 @@ static int dlm_register_net_handlers(void) ...@@ -1807,7 +2191,21 @@ static int dlm_register_net_handlers(void)
sizeof(struct dlm_cancel_join), sizeof(struct dlm_cancel_join),
dlm_cancel_join_handler, dlm_cancel_join_handler,
NULL, NULL, &dlm_join_handlers); NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY,
sizeof(struct dlm_query_region),
dlm_query_region_handler,
NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
sizeof(struct dlm_query_nodeinfo),
dlm_query_nodeinfo_handler,
NULL, NULL, &dlm_join_handlers);
bail: bail:
if (status < 0) if (status < 0)
dlm_unregister_net_handlers(); dlm_unregister_net_handlers();
......
...@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, ...@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
{ {
struct inode *inode; struct inode *inode;
struct address_space *mapping; struct address_space *mapping;
struct ocfs2_inode_info *oi;
inode = ocfs2_lock_res_inode(lockres); inode = ocfs2_lock_res_inode(lockres);
mapping = inode->i_mapping; mapping = inode->i_mapping;
if (S_ISDIR(inode->i_mode)) {
oi = OCFS2_I(inode);
oi->ip_dir_lock_gen++;
mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
goto out;
}
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
goto out; goto out;
......
...@@ -64,12 +64,6 @@ ...@@ -64,12 +64,6 @@
#include "buffer_head_io.h" #include "buffer_head_io.h"
static int ocfs2_sync_inode(struct inode *inode)
{
filemap_fdatawrite(inode->i_mapping);
return sync_mapping_buffers(inode->i_mapping);
}
static int ocfs2_init_file_private(struct inode *inode, struct file *file) static int ocfs2_init_file_private(struct inode *inode, struct file *file)
{ {
struct ocfs2_file_private *fp; struct ocfs2_file_private *fp;
...@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync) ...@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync)
{ {
int err = 0; int err = 0;
journal_t *journal; journal_t *journal;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync,
dentry->d_name.len, dentry->d_name.name); file->f_path.dentry, file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name);
err = ocfs2_sync_inode(dentry->d_inode);
if (err)
goto bail;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
/* /*
...@@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, ...@@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
goto out; goto out;
return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
out: out:
return status; return status;
...@@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, ...@@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
zero_clusters = last_cpos - zero_cpos; zero_clusters = last_cpos - zero_cpos;
if (needs_cow) { if (needs_cow) {
rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
UINT_MAX); zero_clusters, UINT_MAX);
if (rc) { if (rc) {
mlog_errno(rc); mlog_errno(rc);
goto out; goto out;
...@@ -2062,6 +2052,7 @@ int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, ...@@ -2062,6 +2052,7 @@ int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
} }
static int ocfs2_prepare_inode_for_refcount(struct inode *inode, static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
struct file *file,
loff_t pos, size_t count, loff_t pos, size_t count,
int *meta_level) int *meta_level)
{ {
...@@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, ...@@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
*meta_level = 1; *meta_level = 1;
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
out: out:
...@@ -2087,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, ...@@ -2087,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
return ret; return ret;
} }
static int ocfs2_prepare_inode_for_write(struct dentry *dentry, static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t *ppos, loff_t *ppos,
size_t count, size_t count,
int appending, int appending,
...@@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
int *has_refcount) int *has_refcount)
{ {
int ret = 0, meta_level = 0; int ret = 0, meta_level = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
loff_t saved_pos, end; loff_t saved_pos, end;
...@@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, ...@@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
meta_level = -1; meta_level = -1;
ret = ocfs2_prepare_inode_for_refcount(inode, ret = ocfs2_prepare_inode_for_refcount(inode,
file,
saved_pos, saved_pos,
count, count,
&meta_level); &meta_level);
...@@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ...@@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
mlog_entry("(0x%p, %u, '%.*s')\n", file, mlog_entry("(0x%p, %u, '%.*s')\n", file,
(unsigned int)nr_segs, (unsigned int)nr_segs,
...@@ -2255,16 +2250,39 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ...@@ -2255,16 +2250,39 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
have_alloc_sem = 1; have_alloc_sem = 1;
} }
/* concurrent O_DIRECT writes are allowed */ /*
rw_level = !direct_io; * Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
*/
rw_level = (!direct_io || full_coherency);
ret = ocfs2_rw_lock(inode, rw_level); ret = ocfs2_rw_lock(inode, rw_level);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
goto out_sems; goto out_sems;
} }
/*
* O_DIRECT writes with "coherency=full" need to take EX cluster
* inode_lock to guarantee coherency.
*/
if (direct_io && full_coherency) {
/*
* We need to take and drop the inode lock to force
* other nodes to drop their caches. Buffered I/O
* already does this in write_begin().
*/
ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto out_sems;
}
ocfs2_inode_unlock(inode, 1);
}
can_do_direct = direct_io; can_do_direct = direct_io;
ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, ret = ocfs2_prepare_inode_for_write(file, ppos,
iocb->ki_left, appending, iocb->ki_left, appending,
&can_do_direct, &has_refcount); &can_do_direct, &has_refcount);
if (ret < 0) { if (ret < 0) {
...@@ -2312,17 +2330,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ...@@ -2312,17 +2330,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
ppos, count, ocount); ppos, count, ocount);
if (written < 0) { if (written < 0) {
/*
* direct write may have instantiated a few
* blocks outside i_size. Trim these off again.
* Don't need i_size_read because we hold i_mutex.
*
* XXX(truncate): this looks buggy because ocfs2 did not
* actually implement ->truncate. Take a look at
* the new truncate sequence and update this accordingly
*/
if (*ppos + count > inode->i_size)
truncate_setsize(inode, inode->i_size);
ret = written; ret = written;
goto out_dio; goto out_dio;
} }
...@@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, ...@@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
{ {
int ret; int ret;
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
sd->total_len, 0, NULL, NULL); sd->total_len, 0, NULL, NULL);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ...@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
else else
inode->i_fop = &ocfs2_dops_no_plocks; inode->i_fop = &ocfs2_dops_no_plocks;
i_size_write(inode, le64_to_cpu(fe->i_size)); i_size_write(inode, le64_to_cpu(fe->i_size));
OCFS2_I(inode)->ip_dir_lock_gen = 1;
break; break;
case S_IFLNK: case S_IFLNK:
if (ocfs2_inode_is_fast_symlink(inode)) if (ocfs2_inode_is_fast_symlink(inode))
......
...@@ -46,30 +46,28 @@ struct ocfs2_inode_info ...@@ -46,30 +46,28 @@ struct ocfs2_inode_info
/* These fields are protected by ip_lock */ /* These fields are protected by ip_lock */
spinlock_t ip_lock; spinlock_t ip_lock;
u32 ip_open_count; u32 ip_open_count;
u32 ip_clusters;
struct list_head ip_io_markers; struct list_head ip_io_markers;
u32 ip_clusters;
u16 ip_dyn_features;
struct mutex ip_io_mutex; struct mutex ip_io_mutex;
u32 ip_flags; /* see below */ u32 ip_flags; /* see below */
u32 ip_attr; /* inode attributes */ u32 ip_attr; /* inode attributes */
u16 ip_dyn_features;
/* protected by recovery_lock. */ /* protected by recovery_lock. */
struct inode *ip_next_orphan; struct inode *ip_next_orphan;
u32 ip_dir_start_lookup;
struct ocfs2_caching_info ip_metadata_cache; struct ocfs2_caching_info ip_metadata_cache;
struct ocfs2_extent_map ip_extent_map; struct ocfs2_extent_map ip_extent_map;
struct inode vfs_inode; struct inode vfs_inode;
struct jbd2_inode ip_jinode; struct jbd2_inode ip_jinode;
u32 ip_dir_start_lookup;
/* Only valid if the inode is the dir. */ /* Only valid if the inode is the dir. */
u32 ip_last_used_slot; u32 ip_last_used_slot;
u64 ip_last_used_group; u64 ip_last_used_group;
u32 ip_dir_lock_gen;
struct ocfs2_alloc_reservation ip_la_data_resv; struct ocfs2_alloc_reservation ip_la_data_resv;
}; };
......
...@@ -26,6 +26,26 @@ ...@@ -26,6 +26,26 @@
#include <linux/ext2_fs.h> #include <linux/ext2_fs.h>
#define o2info_from_user(a, b) \
copy_from_user(&(a), (b), sizeof(a))
#define o2info_to_user(a, b) \
copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
/*
* This call is void because we are already reporting an error that may
* be -EFAULT. The error will be returned from the ioctl(2) call. It's
* just a best-effort to tell userspace that this request caused the error.
*/
static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
struct ocfs2_info_request __user *req)
{
kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
(void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
}
#define o2info_set_request_error(a, b) \
__o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
{ {
int status; int status;
...@@ -109,6 +129,328 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, ...@@ -109,6 +129,328 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
return status; return status;
} }
int ocfs2_info_handle_blocksize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_blocksize oib;
if (o2info_from_user(oib, req))
goto bail;
oib.ib_blocksize = inode->i_sb->s_blocksize;
oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oib, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oib, req);
return status;
}
int ocfs2_info_handle_clustersize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_clustersize oic;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oic, req))
goto bail;
oic.ic_clustersize = osb->s_clustersize;
oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oic, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oic, req);
return status;
}
int ocfs2_info_handle_maxslots(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_maxslots oim;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oim, req))
goto bail;
oim.im_max_slots = osb->max_slots;
oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oim, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oim, req);
return status;
}
int ocfs2_info_handle_label(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_label oil;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oil, req))
goto bail;
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oil, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oil, req);
return status;
}
int ocfs2_info_handle_uuid(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_uuid oiu;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oiu, req))
goto bail;
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oiu, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oiu, req);
return status;
}
int ocfs2_info_handle_fs_features(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_fs_features oif;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oif, req))
goto bail;
oif.if_compat_features = osb->s_feature_compat;
oif.if_incompat_features = osb->s_feature_incompat;
oif.if_ro_compat_features = osb->s_feature_ro_compat;
oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oif, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oif, req);
return status;
}
int ocfs2_info_handle_journal_size(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_journal_size oij;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oij, req))
goto bail;
oij.ij_journal_size = osb->journal->j_inode->i_size;
oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oij, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oij, req);
return status;
}
int ocfs2_info_handle_unknown(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
goto bail;
oir.ir_flags &= ~OCFS2_INFO_FL_FILLED;
if (o2info_to_user(oir, req))
goto bail;
status = 0;
bail:
if (status)
o2info_set_request_error(oir, req);
return status;
}
/*
* Validate and distinguish OCFS2_IOC_INFO requests.
*
* - validate the magic number.
* - distinguish different requests.
* - validate size of different requests.
*/
int ocfs2_info_handle_request(struct inode *inode,
struct ocfs2_info_request __user *req)
{
int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
goto bail;
status = -EINVAL;
if (oir.ir_magic != OCFS2_INFO_MAGIC)
goto bail;
switch (oir.ir_code) {
case OCFS2_INFO_BLOCKSIZE:
if (oir.ir_size == sizeof(struct ocfs2_info_blocksize))
status = ocfs2_info_handle_blocksize(inode, req);
break;
case OCFS2_INFO_CLUSTERSIZE:
if (oir.ir_size == sizeof(struct ocfs2_info_clustersize))
status = ocfs2_info_handle_clustersize(inode, req);
break;
case OCFS2_INFO_MAXSLOTS:
if (oir.ir_size == sizeof(struct ocfs2_info_maxslots))
status = ocfs2_info_handle_maxslots(inode, req);
break;
case OCFS2_INFO_LABEL:
if (oir.ir_size == sizeof(struct ocfs2_info_label))
status = ocfs2_info_handle_label(inode, req);
break;
case OCFS2_INFO_UUID:
if (oir.ir_size == sizeof(struct ocfs2_info_uuid))
status = ocfs2_info_handle_uuid(inode, req);
break;
case OCFS2_INFO_FS_FEATURES:
if (oir.ir_size == sizeof(struct ocfs2_info_fs_features))
status = ocfs2_info_handle_fs_features(inode, req);
break;
case OCFS2_INFO_JOURNAL_SIZE:
if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
status = ocfs2_info_handle_journal_size(inode, req);
break;
default:
status = ocfs2_info_handle_unknown(inode, req);
break;
}
bail:
return status;
}
int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
u64 *req_addr, int compat_flag)
{
int status = -EFAULT;
u64 __user *bp = NULL;
if (compat_flag) {
#ifdef CONFIG_COMPAT
/*
* pointer bp stores the base address of a pointers array,
* which collects all addresses of separate request.
*/
bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests);
#else
BUG();
#endif
} else
bp = (u64 __user *)(unsigned long)(info->oi_requests);
if (o2info_from_user(*req_addr, bp + idx))
goto bail;
status = 0;
bail:
return status;
}
/*
* OCFS2_IOC_INFO handles an array of requests passed from userspace.
*
* ocfs2_info_handle() recevies a large info aggregation, grab and
* validate the request count from header, then break it into small
* pieces, later specific handlers can handle them one by one.
*
* Idea here is to make each separate request small enough to ensure
* a better backward&forward compatibility, since a small piece of
* request will be less likely to be broken if disk layout get changed.
*/
int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
int compat_flag)
{
int i, status = 0;
u64 req_addr;
struct ocfs2_info_request __user *reqp;
if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) ||
(!info->oi_requests)) {
status = -EINVAL;
goto bail;
}
for (i = 0; i < info->oi_count; i++) {
status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag);
if (status)
break;
reqp = (struct ocfs2_info_request *)(unsigned long)req_addr;
if (!reqp) {
status = -EINVAL;
goto bail;
}
status = ocfs2_info_handle_request(inode, reqp);
if (status)
break;
}
bail:
return status;
}
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct inode *inode = filp->f_path.dentry->d_inode; struct inode *inode = filp->f_path.dentry->d_inode;
...@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct reflink_arguments args; struct reflink_arguments args;
const char *old_path, *new_path; const char *old_path, *new_path;
bool preserve; bool preserve;
struct ocfs2_info info;
switch (cmd) { switch (cmd) {
case OCFS2_IOC_GETFLAGS: case OCFS2_IOC_GETFLAGS:
...@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
preserve = (args.preserve != 0); preserve = (args.preserve != 0);
return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
case OCFS2_IOC_INFO:
if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
sizeof(struct ocfs2_info)))
return -EFAULT;
return ocfs2_info_handle(inode, &info, 0);
default: default:
return -ENOTTY; return -ENOTTY;
} }
...@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) ...@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
bool preserve; bool preserve;
struct reflink_arguments args; struct reflink_arguments args;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_info info;
switch (cmd) { switch (cmd) {
case OCFS2_IOC32_GETFLAGS: case OCFS2_IOC32_GETFLAGS:
...@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) ...@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
compat_ptr(args.new_path), preserve); compat_ptr(args.new_path), preserve);
case OCFS2_IOC_INFO:
if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
sizeof(struct ocfs2_info)))
return -EFAULT;
return ocfs2_info_handle(inode, &info, 1);
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
} }
......
...@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) ...@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
{ {
int status = 0; int status = 0;
unsigned int flushed; unsigned int flushed;
unsigned long old_id;
struct ocfs2_journal *journal = NULL; struct ocfs2_journal *journal = NULL;
mlog_entry_void(); mlog_entry_void();
...@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) ...@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
goto finally; goto finally;
} }
old_id = ocfs2_inc_trans_id(journal); ocfs2_inc_trans_id(journal);
flushed = atomic_read(&journal->j_num_trans); flushed = atomic_read(&journal->j_num_trans);
atomic_set(&journal->j_num_trans, 0); atomic_set(&journal->j_num_trans, 0);
...@@ -342,9 +341,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) ...@@ -342,9 +341,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
return status; return status;
} }
/* pass it NULL and it will allocate a new handle object for you. If
* you pass it a handle however, it may still return error, in which
* case it has free'd the passed handle for you. */
handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
{ {
journal_t *journal = osb->journal->j_journal; journal_t *journal = osb->journal->j_journal;
...@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) ...@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
os = &osb->osb_orphan_scan; os = &osb->osb_orphan_scan;
mlog(0, "Begin orphan scan\n");
if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
goto out; goto out;
...@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) ...@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
unlock: unlock:
ocfs2_orphan_scan_unlock(osb, seqno); ocfs2_orphan_scan_unlock(osb, seqno);
out: out:
mlog(0, "Orphan scan completed\n");
return; return;
} }
......
...@@ -67,11 +67,12 @@ struct ocfs2_journal { ...@@ -67,11 +67,12 @@ struct ocfs2_journal {
struct buffer_head *j_bh; /* Journal disk inode block */ struct buffer_head *j_bh; /* Journal disk inode block */
atomic_t j_num_trans; /* Number of transactions atomic_t j_num_trans; /* Number of transactions
* currently in the system. */ * currently in the system. */
spinlock_t j_lock;
unsigned long j_trans_id; unsigned long j_trans_id;
struct rw_semaphore j_trans_barrier; struct rw_semaphore j_trans_barrier;
wait_queue_head_t j_checkpointed; wait_queue_head_t j_checkpointed;
spinlock_t j_lock; /* both fields protected by j_lock*/
struct list_head j_la_cleanups; struct list_head j_la_cleanups;
struct work_struct j_recovery_work; struct work_struct j_recovery_work;
}; };
......
...@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) ...@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
return ret; return ret;
} }
static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
struct page *page) struct page *page)
{ {
int ret; int ret;
struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
loff_t pos = page_offset(page); loff_t pos = page_offset(page);
unsigned int len = PAGE_CACHE_SIZE; unsigned int len = PAGE_CACHE_SIZE;
...@@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, ...@@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
if (page->index == last_index) if (page->index == last_index)
len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
&fsdata, di_bh, page); &fsdata, di_bh, page);
if (ret) { if (ret) {
if (ret != -ENOSPC) if (ret != -ENOSPC)
...@@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
*/ */
down_write(&OCFS2_I(inode)->ip_alloc_sem); down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = __ocfs2_page_mkwrite(inode, di_bh, page); ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
up_write(&OCFS2_I(inode)->ip_alloc_sem); up_write(&OCFS2_I(inode)->ip_alloc_sem);
......
...@@ -171,7 +171,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, ...@@ -171,7 +171,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
ret = ERR_PTR(status); ret = ERR_PTR(status);
goto bail_unlock; goto bail_unlock;
} }
} } else
ocfs2_dentry_attach_gen(dentry);
bail_unlock: bail_unlock:
/* Don't drop the cluster lock until *after* the d_add -- /* Don't drop the cluster lock until *after* the d_add --
......
...@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); ...@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
struct ocfs2_lock_res { struct ocfs2_lock_res {
void *l_priv; void *l_priv;
struct ocfs2_lock_res_ops *l_ops; struct ocfs2_lock_res_ops *l_ops;
spinlock_t l_lock;
struct list_head l_blocked_list; struct list_head l_blocked_list;
struct list_head l_mask_waiters; struct list_head l_mask_waiters;
enum ocfs2_lock_type l_type;
unsigned long l_flags; unsigned long l_flags;
char l_name[OCFS2_LOCK_ID_MAX_LEN]; char l_name[OCFS2_LOCK_ID_MAX_LEN];
int l_level;
unsigned int l_ro_holders; unsigned int l_ro_holders;
unsigned int l_ex_holders; unsigned int l_ex_holders;
struct ocfs2_dlm_lksb l_lksb; unsigned char l_level;
/* Data packed - type enum ocfs2_lock_type */
unsigned char l_type;
/* used from AST/BAST funcs. */ /* used from AST/BAST funcs. */
enum ocfs2_ast_action l_action; /* Data packed - enum type ocfs2_ast_action */
enum ocfs2_unlock_action l_unlock_action; unsigned char l_action;
int l_requested; /* Data packed - enum type ocfs2_unlock_action */
int l_blocking; unsigned char l_unlock_action;
unsigned char l_requested;
unsigned char l_blocking;
unsigned int l_pending_gen; unsigned int l_pending_gen;
spinlock_t l_lock;
struct ocfs2_dlm_lksb l_lksb;
wait_queue_head_t l_event; wait_queue_head_t l_event;
struct list_head l_debug_list; struct list_head l_debug_list;
...@@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state ...@@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state
enum ocfs2_mount_options enum ocfs2_mount_options
{ {
OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */
OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
...@@ -256,6 +263,10 @@ enum ocfs2_mount_options ...@@ -256,6 +263,10 @@ enum ocfs2_mount_options
control lists */ control lists */
OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT
writes */
OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
}; };
#define OCFS2_OSB_SOFT_RO 0x0001 #define OCFS2_OSB_SOFT_RO 0x0001
...@@ -277,7 +288,8 @@ struct ocfs2_super ...@@ -277,7 +288,8 @@ struct ocfs2_super
struct super_block *sb; struct super_block *sb;
struct inode *root_inode; struct inode *root_inode;
struct inode *sys_root_inode; struct inode *sys_root_inode;
struct inode *system_inodes[NUM_SYSTEM_INODES]; struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES];
struct inode **local_system_inodes;
struct ocfs2_slot_info *slot_info; struct ocfs2_slot_info *slot_info;
...@@ -368,6 +380,8 @@ struct ocfs2_super ...@@ -368,6 +380,8 @@ struct ocfs2_super
struct ocfs2_alloc_stats alloc_stats; struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */ char dev_str[20]; /* "major,minor" of the device */
u8 osb_stackflags;
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
struct ocfs2_cluster_connection *cconn; struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_super_lockres;
...@@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) ...@@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
return ret; return ret;
} }
static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb)
{ {
return (osb->s_feature_incompat & return (osb->s_feature_incompat &
OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK |
OCFS2_FEATURE_INCOMPAT_CLUSTERINFO));
}
static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
{
if (ocfs2_clusterinfo_valid(osb) &&
memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
OCFS2_STACK_LABEL_LEN))
return 1;
return 0;
}
static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb)
{
if (ocfs2_clusterinfo_valid(osb) &&
!memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
OCFS2_STACK_LABEL_LEN))
return 1;
return 0;
}
static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
{
return ocfs2_o2cb_stack(osb) &&
(osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT);
} }
static inline int ocfs2_mount_local(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb)
......
...@@ -101,7 +101,8 @@ ...@@ -101,7 +101,8 @@
| OCFS2_FEATURE_INCOMPAT_META_ECC \ | OCFS2_FEATURE_INCOMPAT_META_ECC \
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
| OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
| OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
...@@ -169,6 +170,13 @@ ...@@ -169,6 +170,13 @@
/* Discontigous block groups */ /* Discontigous block groups */
#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
/*
* Incompat bit to indicate useable clusterinfo with stackflags for all
* cluster stacks (userspace adnd o2cb). If this bit is set,
* INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set.
*/
#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
/* /*
* backup superblock flag is used to indicate that this volume * backup superblock flag is used to indicate that this volume
* has backup superblocks. * has backup superblocks.
...@@ -292,10 +300,13 @@ ...@@ -292,10 +300,13 @@
#define OCFS2_VOL_UUID_LEN 16 #define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64 #define OCFS2_MAX_VOL_LABEL_LEN 64
/* The alternate, userspace stack fields */ /* The cluster stack fields */
#define OCFS2_STACK_LABEL_LEN 4 #define OCFS2_STACK_LABEL_LEN 4
#define OCFS2_CLUSTER_NAME_LEN 16 #define OCFS2_CLUSTER_NAME_LEN 16
/* Classic (historically speaking) cluster stack */
#define OCFS2_CLASSIC_CLUSTER_STACK "o2cb"
/* Journal limits (in bytes) */ /* Journal limits (in bytes) */
#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
...@@ -305,6 +316,11 @@ ...@@ -305,6 +316,11 @@
*/ */
#define OCFS2_MIN_XATTR_INLINE_SIZE 256 #define OCFS2_MIN_XATTR_INLINE_SIZE 256
/*
* Cluster info flags (ocfs2_cluster_info.ci_stackflags)
*/
#define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01)
struct ocfs2_system_inode_info { struct ocfs2_system_inode_info {
char *si_name; char *si_name;
int si_iflags; int si_iflags;
...@@ -322,6 +338,7 @@ enum { ...@@ -322,6 +338,7 @@ enum {
USER_QUOTA_SYSTEM_INODE, USER_QUOTA_SYSTEM_INODE,
GROUP_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE,
#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
#define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
ORPHAN_DIR_SYSTEM_INODE, ORPHAN_DIR_SYSTEM_INODE,
EXTENT_ALLOC_SYSTEM_INODE, EXTENT_ALLOC_SYSTEM_INODE,
INODE_ALLOC_SYSTEM_INODE, INODE_ALLOC_SYSTEM_INODE,
...@@ -330,8 +347,12 @@ enum { ...@@ -330,8 +347,12 @@ enum {
TRUNCATE_LOG_SYSTEM_INODE, TRUNCATE_LOG_SYSTEM_INODE,
LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE,
#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
NUM_SYSTEM_INODES NUM_SYSTEM_INODES
}; };
#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
#define NUM_LOCAL_SYSTEM_INODES \
(NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
/* Global system inodes (single copy) */ /* Global system inodes (single copy) */
...@@ -360,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { ...@@ -360,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
/* Parameter passed from mount.ocfs2 to module */ /* Parameter passed from mount.ocfs2 to module */
#define OCFS2_HB_NONE "heartbeat=none" #define OCFS2_HB_NONE "heartbeat=none"
#define OCFS2_HB_LOCAL "heartbeat=local" #define OCFS2_HB_LOCAL "heartbeat=local"
#define OCFS2_HB_GLOBAL "heartbeat=global"
/* /*
* OCFS2 directory file types. Only the low 3 bits are used. The * OCFS2 directory file types. Only the low 3 bits are used. The
...@@ -566,9 +588,21 @@ struct ocfs2_slot_map_extended { ...@@ -566,9 +588,21 @@ struct ocfs2_slot_map_extended {
*/ */
}; };
/*
* ci_stackflags is only valid if the incompat bit
* OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set.
*/
struct ocfs2_cluster_info { struct ocfs2_cluster_info {
/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN];
__le32 ci_reserved; union {
__le32 ci_reserved;
struct {
__u8 ci_stackflags;
__u8 ci_reserved1;
__u8 ci_reserved2;
__u8 ci_reserved3;
};
};
/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN];
/*18*/ /*18*/
}; };
...@@ -605,9 +639,9 @@ struct ocfs2_super_block { ...@@ -605,9 +639,9 @@ struct ocfs2_super_block {
* group header */ * group header */
/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either
stack. Only valid userspace or clusterinfo
with INCOMPAT flag. */ INCOMPAT flag set. */
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/ for this fs*/
__le16 s_reserved0; __le16 s_reserved0;
......
...@@ -76,4 +76,99 @@ struct reflink_arguments { ...@@ -76,4 +76,99 @@ struct reflink_arguments {
}; };
#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
/* Following definitions dedicated for ocfs2_info_request ioctls. */
#define OCFS2_INFO_MAX_REQUEST (50)
#define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2)
/* Magic number of all requests */
#define OCFS2_INFO_MAGIC (0x4F32494E)
/*
* Always try to separate info request into small pieces to
* guarantee the backward&forward compatibility.
*/
struct ocfs2_info {
__u64 oi_requests; /* Array of __u64 pointers to requests */
__u32 oi_count; /* Number of requests in info_requests */
__u32 oi_pad;
};
struct ocfs2_info_request {
/*00*/ __u32 ir_magic; /* Magic number */
__u32 ir_code; /* Info request code */
__u32 ir_size; /* Size of request */
__u32 ir_flags; /* Request flags */
/*10*/ /* Request specific fields */
};
struct ocfs2_info_clustersize {
struct ocfs2_info_request ic_req;
__u32 ic_clustersize;
__u32 ic_pad;
};
struct ocfs2_info_blocksize {
struct ocfs2_info_request ib_req;
__u32 ib_blocksize;
__u32 ib_pad;
};
struct ocfs2_info_maxslots {
struct ocfs2_info_request im_req;
__u32 im_max_slots;
__u32 im_pad;
};
struct ocfs2_info_label {
struct ocfs2_info_request il_req;
__u8 il_label[OCFS2_MAX_VOL_LABEL_LEN];
} __attribute__ ((packed));
struct ocfs2_info_uuid {
struct ocfs2_info_request iu_req;
__u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1];
} __attribute__ ((packed));
struct ocfs2_info_fs_features {
struct ocfs2_info_request if_req;
__u32 if_compat_features;
__u32 if_incompat_features;
__u32 if_ro_compat_features;
__u32 if_pad;
};
struct ocfs2_info_journal_size {
struct ocfs2_info_request ij_req;
__u64 ij_journal_size;
};
/* Codes for ocfs2_info_request */
enum ocfs2_info_type {
OCFS2_INFO_CLUSTERSIZE = 1,
OCFS2_INFO_BLOCKSIZE,
OCFS2_INFO_MAXSLOTS,
OCFS2_INFO_LABEL,
OCFS2_INFO_UUID,
OCFS2_INFO_FS_FEATURES,
OCFS2_INFO_JOURNAL_SIZE,
OCFS2_INFO_NUM_TYPES
};
/* Flags for struct ocfs2_info_request */
/* Filled by the caller */
#define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not
required. This is a hint.
It is up to ocfs2 whether
the request can be fulfilled
without locking. */
/* Filled by ocfs2 */
#define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood
this request and
filled in the answer */
#define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during
request handling. */
#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
#endif /* OCFS2_IOCTL_H */ #endif /* OCFS2_IOCTL_H */
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
struct ocfs2_cow_context { struct ocfs2_cow_context {
struct inode *inode; struct inode *inode;
struct file *file;
u32 cow_start; u32 cow_start;
u32 cow_len; u32 cow_len;
struct ocfs2_extent_tree data_et; struct ocfs2_extent_tree data_et;
...@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page; struct page *page;
pgoff_t page_index; pgoff_t page_index;
unsigned int from, to; unsigned int from, to, readahead_pages;
loff_t offset, end, map_end; loff_t offset, end, map_end;
struct address_space *mapping = context->inode->i_mapping; struct address_space *mapping = context->inode->i_mapping;
mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
new_cluster, new_len, cpos); new_cluster, new_len, cpos);
readahead_pages =
(ocfs2_cow_contig_clusters(sb) <<
OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT;
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
/* /*
...@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page)); BUG_ON(PageDirty(page));
if (PageReadahead(page) && context->file) {
page_cache_async_readahead(mapping,
&context->file->f_ra,
context->file,
page, page_index,
readahead_pages);
}
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block); ret = block_read_full_page(page, ocfs2_get_block);
if (ret) { if (ret) {
...@@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) ...@@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
return ret; return ret;
} }
static void ocfs2_readahead_for_cow(struct inode *inode,
struct file *file,
u32 start, u32 len)
{
struct address_space *mapping;
pgoff_t index;
unsigned long num_pages;
int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
if (!file)
return;
mapping = file->f_mapping;
num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
if (!num_pages)
num_pages = 1;
index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
page_cache_sync_readahead(mapping, &file->f_ra, file,
index, num_pages);
}
/* /*
* Starting at cpos, try to CoW write_len clusters. Don't CoW * Starting at cpos, try to CoW write_len clusters. Don't CoW
* past max_cpos. This will stop when it runs into a hole or an * past max_cpos. This will stop when it runs into a hole or an
* unrefcounted extent. * unrefcounted extent.
*/ */
static int ocfs2_refcount_cow_hunk(struct inode *inode, static int ocfs2_refcount_cow_hunk(struct inode *inode,
struct file *file,
struct buffer_head *di_bh, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos) u32 cpos, u32 write_len, u32 max_cpos)
{ {
...@@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
BUG_ON(cow_len == 0); BUG_ON(cow_len == 0);
ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
if (!context) { if (!context) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
context->ref_root_bh = ref_root_bh; context->ref_root_bh = ref_root_bh;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
context->get_clusters = ocfs2_di_get_clusters; context->get_clusters = ocfs2_di_get_clusters;
context->file = file;
ocfs2_init_dinode_extent_tree(&context->data_et, ocfs2_init_dinode_extent_tree(&context->data_et,
INODE_CACHE(inode), di_bh); INODE_CACHE(inode), di_bh);
...@@ -3492,6 +3530,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3492,6 +3530,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
* clusters between cpos and cpos+write_len are safe to modify. * clusters between cpos and cpos+write_len are safe to modify.
*/ */
int ocfs2_refcount_cow(struct inode *inode, int ocfs2_refcount_cow(struct inode *inode,
struct file *file,
struct buffer_head *di_bh, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos) u32 cpos, u32 write_len, u32 max_cpos)
{ {
...@@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, ...@@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode,
num_clusters = write_len; num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) { if (ext_flags & OCFS2_EXT_REFCOUNTED) {
ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
num_clusters, max_cpos); num_clusters, max_cpos);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { ...@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree {
struct rb_node rf_node; struct rb_node rf_node;
u64 rf_blkno; u64 rf_blkno;
u32 rf_generation; u32 rf_generation;
struct kref rf_getcnt;
struct rw_semaphore rf_sem; struct rw_semaphore rf_sem;
struct ocfs2_lock_res rf_lockres; struct ocfs2_lock_res rf_lockres;
struct kref rf_getcnt;
int rf_removed; int rf_removed;
/* the following 4 fields are used by caching_info. */ /* the following 4 fields are used by caching_info. */
struct ocfs2_caching_info rf_ci;
spinlock_t rf_lock; spinlock_t rf_lock;
struct ocfs2_caching_info rf_ci;
struct mutex rf_io_mutex; struct mutex rf_io_mutex;
struct super_block *rf_sb; struct super_block *rf_sb;
}; };
...@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, ...@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
u32 clusters, u32 clusters,
int *credits, int *credits,
int *ref_blocks); int *ref_blocks);
int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, int ocfs2_refcount_cow(struct inode *inode,
struct file *filep, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos); u32 cpos, u32 write_len, u32 max_cpos);
typedef int (ocfs2_post_refcount_func)(struct inode *inode, typedef int (ocfs2_post_refcount_func)(struct inode *inode,
......
...@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, ...@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
{ {
int status = 0; int status = 0;
u64 blkno; u64 blkno;
unsigned long long blocks, bytes; unsigned long long blocks, bytes = 0;
unsigned int i; unsigned int i;
struct buffer_head *bh; struct buffer_head *bh;
......
...@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) ...@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
/* for now we only have one cluster/node, make sure we see it /* for now we only have one cluster/node, make sure we see it
* in the heartbeat universe */ * in the heartbeat universe */
if (!o2hb_check_local_node_heartbeating()) { if (!o2hb_check_local_node_heartbeating()) {
if (o2hb_global_heartbeat_active())
mlog(ML_ERROR, "Global heartbeat not started\n");
rc = -EINVAL; rc = -EINVAL;
goto out; goto out;
} }
......
...@@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, ...@@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
} }
le16_add_cpu(&bg->bg_free_bits_count, -num_bits); le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
" count %u but claims %u are freed. num_bits %d",
(unsigned long long)le64_to_cpu(bg->bg_blkno),
le16_to_cpu(bg->bg_bits),
le16_to_cpu(bg->bg_free_bits_count), num_bits);
return -EROFS;
}
while(num_bits--) while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap); ocfs2_set_bit(bit_off++, bitmap);
...@@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, ...@@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
(unsigned long *) undo_bg->bg_bitmap); (unsigned long *) undo_bg->bg_bitmap);
} }
le16_add_cpu(&bg->bg_free_bits_count, num_bits); le16_add_cpu(&bg->bg_free_bits_count, num_bits);
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
" count %u but claims %u are freed. num_bits %d",
(unsigned long long)le64_to_cpu(bg->bg_blkno),
le16_to_cpu(bg->bg_bits),
le16_to_cpu(bg->bg_free_bits_count), num_bits);
return -EROFS;
}
if (undo_fn) if (undo_fn)
jbd_unlock_bh_state(group_bh); jbd_unlock_bh_state(group_bh);
......
...@@ -162,6 +162,7 @@ enum { ...@@ -162,6 +162,7 @@ enum {
Opt_nointr, Opt_nointr,
Opt_hb_none, Opt_hb_none,
Opt_hb_local, Opt_hb_local,
Opt_hb_global,
Opt_data_ordered, Opt_data_ordered,
Opt_data_writeback, Opt_data_writeback,
Opt_atime_quantum, Opt_atime_quantum,
...@@ -177,6 +178,8 @@ enum { ...@@ -177,6 +178,8 @@ enum {
Opt_noacl, Opt_noacl,
Opt_usrquota, Opt_usrquota,
Opt_grpquota, Opt_grpquota,
Opt_coherency_buffered,
Opt_coherency_full,
Opt_resv_level, Opt_resv_level,
Opt_dir_resv_level, Opt_dir_resv_level,
Opt_err, Opt_err,
...@@ -190,6 +193,7 @@ static const match_table_t tokens = { ...@@ -190,6 +193,7 @@ static const match_table_t tokens = {
{Opt_nointr, "nointr"}, {Opt_nointr, "nointr"},
{Opt_hb_none, OCFS2_HB_NONE}, {Opt_hb_none, OCFS2_HB_NONE},
{Opt_hb_local, OCFS2_HB_LOCAL}, {Opt_hb_local, OCFS2_HB_LOCAL},
{Opt_hb_global, OCFS2_HB_GLOBAL},
{Opt_data_ordered, "data=ordered"}, {Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"}, {Opt_data_writeback, "data=writeback"},
{Opt_atime_quantum, "atime_quantum=%u"}, {Opt_atime_quantum, "atime_quantum=%u"},
...@@ -205,6 +209,8 @@ static const match_table_t tokens = { ...@@ -205,6 +209,8 @@ static const match_table_t tokens = {
{Opt_noacl, "noacl"}, {Opt_noacl, "noacl"},
{Opt_usrquota, "usrquota"}, {Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"}, {Opt_grpquota, "grpquota"},
{Opt_coherency_buffered, "coherency=buffered"},
{Opt_coherency_full, "coherency=full"},
{Opt_resv_level, "resv_level=%u"}, {Opt_resv_level, "resv_level=%u"},
{Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_err, NULL} {Opt_err, NULL}
...@@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) ...@@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
mlog_entry_void(); mlog_entry_void();
for (i = 0; i < NUM_SYSTEM_INODES; i++) { for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) {
inode = osb->system_inodes[i]; inode = osb->global_system_inodes[i];
if (inode) { if (inode) {
iput(inode); iput(inode);
osb->system_inodes[i] = NULL; osb->global_system_inodes[i] = NULL;
} }
} }
...@@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) ...@@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
osb->root_inode = NULL; osb->root_inode = NULL;
} }
if (!osb->local_system_inodes)
goto out;
for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) {
if (osb->local_system_inodes[i]) {
iput(osb->local_system_inodes[i]);
osb->local_system_inodes[i] = NULL;
}
}
kfree(osb->local_system_inodes);
osb->local_system_inodes = NULL;
out:
mlog_exit(0); mlog_exit(0);
} }
...@@ -608,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) ...@@ -608,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
int ret = 0; int ret = 0;
struct mount_options parsed_options; struct mount_options parsed_options;
struct ocfs2_super *osb = OCFS2_SB(sb); struct ocfs2_super *osb = OCFS2_SB(sb);
u32 tmp;
lock_kernel(); lock_kernel();
...@@ -617,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) ...@@ -617,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out; goto out;
} }
if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
(parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
ret = -EINVAL; ret = -EINVAL;
mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
goto out; goto out;
...@@ -809,23 +831,29 @@ static int ocfs2_sb_probe(struct super_block *sb, ...@@ -809,23 +831,29 @@ static int ocfs2_sb_probe(struct super_block *sb,
static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
{ {
if (ocfs2_mount_local(osb)) { u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL;
if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
if (osb->s_mount_opt & hb_enabled) {
if (ocfs2_mount_local(osb)) {
mlog(ML_ERROR, "Cannot heartbeat on a locally " mlog(ML_ERROR, "Cannot heartbeat on a locally "
"mounted device.\n"); "mounted device.\n");
return -EINVAL; return -EINVAL;
} }
} if (ocfs2_userspace_stack(osb)) {
if (ocfs2_userspace_stack(osb)) {
if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
mlog(ML_ERROR, "Userspace stack expected, but " mlog(ML_ERROR, "Userspace stack expected, but "
"o2cb heartbeat arguments passed to mount\n"); "o2cb heartbeat arguments passed to mount\n");
return -EINVAL; return -EINVAL;
} }
if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) &&
!ocfs2_cluster_o2cb_global_heartbeat(osb)) ||
((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) &&
ocfs2_cluster_o2cb_global_heartbeat(osb))) {
mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n");
return -EINVAL;
}
} }
if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { if (!(osb->s_mount_opt & hb_enabled)) {
if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
!ocfs2_userspace_stack(osb)) { !ocfs2_userspace_stack(osb)) {
mlog(ML_ERROR, "Heartbeat has to be started to mount " mlog(ML_ERROR, "Heartbeat has to be started to mount "
...@@ -1291,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb, ...@@ -1291,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb,
{ {
int status; int status;
char *p; char *p;
u32 tmp;
mlog_entry("remount: %d, options: \"%s\"\n", is_remount, mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
options ? options : "(none)"); options ? options : "(none)");
...@@ -1322,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb, ...@@ -1322,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
break; break;
case Opt_hb_none: case Opt_hb_none:
mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
break;
case Opt_hb_global:
mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
break; break;
case Opt_barrier: case Opt_barrier:
if (match_int(&args[0], &option)) { if (match_int(&args[0], &option)) {
...@@ -1438,6 +1470,12 @@ static int ocfs2_parse_options(struct super_block *sb, ...@@ -1438,6 +1470,12 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_grpquota: case Opt_grpquota:
mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
break; break;
case Opt_coherency_buffered:
mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
break;
case Opt_coherency_full:
mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
break;
case Opt_acl: case Opt_acl:
mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
...@@ -1477,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb, ...@@ -1477,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb,
} }
} }
/* Ensure only one heartbeat mode */
tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE);
if (hweight32(tmp) != 1) {
mlog(ML_ERROR, "Invalid heartbeat mount options\n");
status = 0;
goto bail;
}
status = 1; status = 1;
bail: bail:
...@@ -1490,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) ...@@ -1490,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
unsigned long opts = osb->s_mount_opt; unsigned long opts = osb->s_mount_opt;
unsigned int local_alloc_megs; unsigned int local_alloc_megs;
if (opts & OCFS2_MOUNT_HB_LOCAL) if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) {
seq_printf(s, ",_netdev,heartbeat=local"); seq_printf(s, ",_netdev");
else if (opts & OCFS2_MOUNT_HB_LOCAL)
seq_printf(s, ",heartbeat=none"); seq_printf(s, ",%s", OCFS2_HB_LOCAL);
else
seq_printf(s, ",%s", OCFS2_HB_GLOBAL);
} else
seq_printf(s, ",%s", OCFS2_HB_NONE);
if (opts & OCFS2_MOUNT_NOINTR) if (opts & OCFS2_MOUNT_NOINTR)
seq_printf(s, ",nointr"); seq_printf(s, ",nointr");
...@@ -1536,6 +1587,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) ...@@ -1536,6 +1587,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_GRPQUOTA) if (opts & OCFS2_MOUNT_GRPQUOTA)
seq_printf(s, ",grpquota"); seq_printf(s, ",grpquota");
if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED)
seq_printf(s, ",coherency=buffered");
else
seq_printf(s, ",coherency=full");
if (opts & OCFS2_MOUNT_NOUSERXATTR) if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr"); seq_printf(s, ",nouser_xattr");
else else
...@@ -1990,6 +2046,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu ...@@ -1990,6 +2046,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
return 0; return 0;
} }
/* Make sure entire volume is addressable by our journal. Requires
osb_clusters_at_boot to be valid and for the journal to have been
initialized by ocfs2_journal_init(). */
static int ocfs2_journal_addressable(struct ocfs2_super *osb)
{
int status = 0;
u64 max_block =
ocfs2_clusters_to_blocks(osb->sb,
osb->osb_clusters_at_boot) - 1;
/* 32-bit block number is always OK. */
if (max_block <= (u32)~0ULL)
goto out;
/* Volume is "huge", so see if our journal is new enough to
support it. */
if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb,
OCFS2_FEATURE_COMPAT_JBD2_SB) &&
jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT))) {
mlog(ML_ERROR, "The journal cannot address the entire volume. "
"Enable the 'block64' journal option with tunefs.ocfs2");
status = -EFBIG;
goto out;
}
out:
return status;
}
static int ocfs2_initialize_super(struct super_block *sb, static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh, struct buffer_head *bh,
int sector_size, int sector_size,
...@@ -2002,6 +2088,7 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -2002,6 +2088,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
struct ocfs2_journal *journal; struct ocfs2_journal *journal;
__le32 uuid_net_key; __le32 uuid_net_key;
struct ocfs2_super *osb; struct ocfs2_super *osb;
u64 total_blocks;
mlog_entry_void(); mlog_entry_void();
...@@ -2060,6 +2147,15 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -2060,6 +2147,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
osb->max_slots);
status = -EINVAL;
goto bail;
}
mlog(0, "max_slots for this device: %u\n", osb->max_slots);
ocfs2_orphan_scan_init(osb); ocfs2_orphan_scan_init(osb);
status = ocfs2_recovery_init(osb); status = ocfs2_recovery_init(osb);
...@@ -2098,15 +2194,6 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -2098,15 +2194,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail; goto bail;
} }
osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
osb->max_slots);
status = -EINVAL;
goto bail;
}
mlog(0, "max_slots for this device: %u\n", osb->max_slots);
osb->slot_recovery_generations = osb->slot_recovery_generations =
kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
GFP_KERNEL); GFP_KERNEL);
...@@ -2149,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -2149,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail; goto bail;
} }
if (ocfs2_userspace_stack(osb)) { if (ocfs2_clusterinfo_valid(osb)) {
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
memcpy(osb->osb_cluster_stack, memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
OCFS2_STACK_LABEL_LEN); OCFS2_STACK_LABEL_LEN);
...@@ -2214,11 +2303,15 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -2214,11 +2303,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail; goto bail;
} }
if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) total_blocks = ocfs2_clusters_to_blocks(osb->sb,
> (u32)~0UL) { le32_to_cpu(di->i_clusters));
mlog(ML_ERROR, "Volume might try to write to blocks beyond "
"what jbd can address in 32 bits.\n"); status = generic_check_addressable(osb->sb->s_blocksize_bits,
status = -EINVAL; total_blocks);
if (status) {
mlog(ML_ERROR, "Volume too large "
"to mount safely on this system");
status = -EFBIG;
goto bail; goto bail;
} }
...@@ -2380,6 +2473,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) ...@@ -2380,6 +2473,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
goto finally; goto finally;
} }
/* Now that journal has been initialized, check to make sure
entire volume is addressable. */
status = ocfs2_journal_addressable(osb);
if (status)
goto finally;
/* If the journal was unmounted cleanly then we don't want to /* If the journal was unmounted cleanly then we don't want to
* recover anything. Otherwise, journal_load will do that * recover anything. Otherwise, journal_load will do that
* dirty work for us :) */ * dirty work for us :) */
......
...@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, ...@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
int type, int type,
u32 slot); u32 slot);
static inline int is_global_system_inode(int type);
static inline int is_in_system_inode_array(struct ocfs2_super *osb,
int type,
u32 slot);
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
#endif #endif
...@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type) ...@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type)
type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE;
} }
static inline int is_in_system_inode_array(struct ocfs2_super *osb, static struct inode **get_local_system_inode(struct ocfs2_super *osb,
int type, int type,
u32 slot) u32 slot)
{ {
return slot == osb->slot_num || is_global_system_inode(type); int index;
struct inode **local_system_inodes, **free = NULL;
BUG_ON(slot == OCFS2_INVALID_SLOT);
BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE ||
type > OCFS2_LAST_LOCAL_SYSTEM_INODE);
spin_lock(&osb->osb_lock);
local_system_inodes = osb->local_system_inodes;
spin_unlock(&osb->osb_lock);
if (unlikely(!local_system_inodes)) {
local_system_inodes = kzalloc(sizeof(struct inode *) *
NUM_LOCAL_SYSTEM_INODES *
osb->max_slots,
GFP_NOFS);
if (!local_system_inodes) {
mlog_errno(-ENOMEM);
/*
* return NULL here so that ocfs2_get_sytem_file_inodes
* will try to create an inode and use it. We will try
* to initialize local_system_inodes next time.
*/
return NULL;
}
spin_lock(&osb->osb_lock);
if (osb->local_system_inodes) {
/* Someone has initialized it for us. */
free = local_system_inodes;
local_system_inodes = osb->local_system_inodes;
} else
osb->local_system_inodes = local_system_inodes;
spin_unlock(&osb->osb_lock);
if (unlikely(free))
kfree(free);
}
index = (slot * NUM_LOCAL_SYSTEM_INODES) +
(type - OCFS2_FIRST_LOCAL_SYSTEM_INODE);
return &local_system_inodes[index];
} }
struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
...@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, ...@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
struct inode **arr = NULL; struct inode **arr = NULL;
/* avoid the lookup if cached in local system file array */ /* avoid the lookup if cached in local system file array */
if (is_in_system_inode_array(osb, type, slot)) if (is_global_system_inode(type)) {
arr = &(osb->system_inodes[type]); arr = &(osb->global_system_inodes[type]);
} else
arr = get_local_system_inode(osb, type, slot);
if (arr && ((inode = *arr) != NULL)) { if (arr && ((inode = *arr) != NULL)) {
/* get a ref in addition to the array ref */ /* get a ref in addition to the array ref */
......
...@@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, ...@@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
goto out; goto out;
} }
if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) if (!indexed)
ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
else else
ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
......
...@@ -2378,6 +2378,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, ...@@ -2378,6 +2378,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
extern int generic_file_fsync(struct file *, int); extern int generic_file_fsync(struct file *, int);
extern int generic_check_addressable(unsigned, u64);
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
extern int buffer_migrate_page(struct address_space *, extern int buffer_migrate_page(struct address_space *,
struct page *, struct page *); struct page *, struct page *);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册