提交 ea6db58f 编写于 作者: L Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (27 commits)
  ocfs2: Cache extent records
  ocfs2: Remember rw lock level during direct io
  ocfs2: Fix up i_blocks calculation to know about holes
  ocfs2: Fix extent lookup to return true size of holes
  ocfs2: Read from an unwritten extent returns zeros
  ocfs2: make room for unwritten extents flag
  ocfs2: Use own splice write actor
  ocfs2: Use do_sync_mapping_range() in ocfs2_zero_tail_for_truncate()
  [PATCH] Turn do_sync_file_range() into do_sync_mapping_range()
  ocfs2: zero tail of sparse files on truncate
  ocfs2: Teach ocfs2_get_block() about holes
  ocfs2: remove ocfs2_prepare_write() and ocfs2_commit_write()
  ocfs2: teach ocfs2_file_aio_write() about sparse files
  ocfs2: Turn off shared writeable mmap for local files systems with holes.
  ocfs2: abstract out allocation locking
  ocfs2: teach extend/truncate about sparse files
  ocfs2: temporarily remove extent map caching
  ocfs2: sparse b-tree support
  ocfs2: small cleanup of ocfs2_request_delete()
  ocfs2: remove unused code
  ...
此差异已折叠。
......@@ -31,7 +31,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
u64 blkno,
u32 cpos,
u64 start_blk,
u32 new_clusters,
struct ocfs2_alloc_context *meta_ac);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
......@@ -70,6 +71,8 @@ struct ocfs2_truncate_context {
struct buffer_head *tc_last_eb_bh;
};
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
u64 new_i_size);
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
......@@ -79,4 +82,26 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
struct ocfs2_truncate_context *tc);
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
u32 cpos, struct buffer_head **leaf_bh);
/*
* Helper function to look at the # of clusters in an extent record.
*/
static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
struct ocfs2_extent_rec *rec)
{
/*
* Cluster count in extent records is slightly different
* between interior nodes and leaf nodes. This is to support
* unwritten extents which need a flags field in leaf node
* records, thus shrinking the available space for a clusters
* field.
*/
if (el->l_tree_depth)
return le32_to_cpu(rec->e_int_clusters);
else
return le16_to_cpu(rec->e_leaf_clusters);
}
#endif /* OCFS2_ALLOC_H */
此差异已折叠。
......@@ -30,12 +30,83 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
unsigned from,
unsigned to);
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
int *partial,
int (*fn)( handle_t *handle,
struct buffer_head *bh));
struct ocfs2_write_ctxt;
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
u64 *, unsigned int *, unsigned int *);
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
size_t count, ocfs2_page_writer *actor,
void *priv);
struct ocfs2_write_ctxt {
size_t w_count;
loff_t w_pos;
u32 w_cpos;
unsigned int w_finished_copy;
/* This is true if page_size > cluster_size */
unsigned int w_large_pages;
/* Filler callback and private data */
ocfs2_page_writer *w_write_data_page;
void *w_private;
/* Only valid for the filler callback */
struct page *w_this_page;
unsigned int w_this_page_new;
};
struct ocfs2_buffered_write_priv {
char *b_src_buf;
const struct iovec *b_cur_iov; /* Current iovec */
size_t b_cur_off; /* Offset in the
* current iovec */
};
int ocfs2_map_and_write_user_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
struct ocfs2_splice_write_priv {
struct splice_desc *s_sd;
struct pipe_buffer *s_buf;
struct pipe_inode_info *s_pipe;
/* Neither offset value is ever larger than one page */
unsigned int s_offset;
unsigned int s_buf_offset;
};
int ocfs2_map_and_write_splice_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
/* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \
test_bit(0, (unsigned long *)&iocb->private)
#define ocfs2_iocb_set_rw_locked(iocb) \
set_bit(0, (unsigned long *)&iocb->private)
static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
{
set_bit(0, (unsigned long *)&iocb->private);
if (level)
set_bit(1, (unsigned long *)&iocb->private);
else
clear_bit(1, (unsigned long *)&iocb->private);
}
#define ocfs2_iocb_clear_rw_locked(iocb) \
clear_bit(0, (unsigned long *)&iocb->private)
#define ocfs2_iocb_rw_locked_level(iocb) \
test_bit(1, (unsigned long *)&iocb->private)
#endif /* OCFS2_FILE_H */
......@@ -46,6 +46,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
#include "heartbeat.h"
#include "nodemanager.h"
......@@ -72,7 +73,9 @@ static void o2quo_fence_self(void)
/* panic spins with interrupts enabled. with preempt
* threads can still schedule, etc, etc */
o2hb_stop_all_regions();
panic("ocfs2 is very sorry to be fencing this system by panicing\n");
printk("ocfs2 is very sorry to be fencing this system by restarting\n");
emergency_restart();
}
/* Indicate that a timeout occured on a hearbeat region write. The
......
......@@ -38,6 +38,9 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* New in version 8:
* - Replace delete inode votes with a cluster lock
*
* New in version 7:
* - DLM join domain includes the live nodemap
*
......@@ -57,7 +60,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
#define O2NET_PROTOCOL_VERSION 7ULL
#define O2NET_PROTOCOL_VERSION 8ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
......
......@@ -358,15 +358,17 @@ int ocfs2_do_extend_dir(struct super_block *sb,
{
int status;
int extend;
u64 p_blkno;
u64 p_blkno, v_blkno;
spin_lock(&OCFS2_I(dir)->ip_lock);
extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
spin_unlock(&OCFS2_I(dir)->ip_lock);
if (extend) {
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
parent_fe_bh, handle,
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
1, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
if (status < 0) {
......@@ -375,9 +377,8 @@ int ocfs2_do_extend_dir(struct super_block *sb,
}
}
status = ocfs2_extent_map_get_blocks(dir, (dir->i_blocks >>
(sb->s_blocksize_bits - 9)),
1, &p_blkno, NULL);
v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -486,7 +487,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
dir_i_size += dir->i_sb->s_blocksize;
i_size_write(dir, dir_i_size);
dir->i_blocks = ocfs2_align_bytes_to_sectors(dir_i_size);
dir->i_blocks = ocfs2_inode_sector_count(dir);
status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (status < 0) {
mlog_errno(status);
......
......@@ -430,11 +430,10 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
dlm_lockres_put(res);
cond_resched_lock(&dlm->spinlock);
if (dropped)
goto redo_bucket;
}
cond_resched_lock(&dlm->spinlock);
num += n;
mlog(0, "%s: touched %d lockreses in bucket %d "
"(tot=%d)\n", dlm->name, n, i, num);
......@@ -1035,7 +1034,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
{
int status = 0, tmpstat, node;
struct domain_join_ctxt *ctxt;
enum dlm_query_join_response response;
enum dlm_query_join_response response = JOIN_DISALLOW;
mlog_entry("%p", dlm);
......
......@@ -611,6 +611,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
}
} while (status != 0);
spin_lock(&dlm_reco_state_lock);
switch (ndata->state) {
case DLM_RECO_NODE_DATA_INIT:
case DLM_RECO_NODE_DATA_FINALIZE_SENT:
......@@ -641,6 +642,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
ndata->node_num, dead_node);
break;
}
spin_unlock(&dlm_reco_state_lock);
}
mlog(0, "done requesting all lock info\n");
......
......@@ -225,11 +225,17 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
lockres->l_type == OCFS2_LOCK_TYPE_RW;
lockres->l_type == OCFS2_LOCK_TYPE_RW ||
lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
}
static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
......@@ -373,6 +379,9 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
case OCFS2_LOCK_TYPE_DATA:
ops = &ocfs2_inode_data_lops;
break;
case OCFS2_LOCK_TYPE_OPEN:
ops = &ocfs2_inode_open_lops;
break;
default:
mlog_bug_on_msg(1, "type: %d\n", type);
ops = NULL; /* thanks, gcc */
......@@ -1129,6 +1138,12 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
goto bail;
}
ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
if (ret) {
mlog_errno(ret);
goto bail;
}
bail:
mlog_exit(ret);
return ret;
......@@ -1182,6 +1197,99 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
mlog_exit_void();
}
/*
* ocfs2_open_lock always get PR mode lock.
*/
int ocfs2_open_lock(struct inode *inode)
{
int status = 0;
struct ocfs2_lock_res *lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!inode);
mlog_entry_void();
mlog(0, "inode %llu take PRMODE open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
if (ocfs2_mount_local(osb))
goto out;
lockres = &OCFS2_I(inode)->ip_open_lockres;
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
LKM_PRMODE, 0, 0);
if (status < 0)
mlog_errno(status);
out:
mlog_exit(status);
return status;
}
int ocfs2_try_open_lock(struct inode *inode, int write)
{
int status = 0, level;
struct ocfs2_lock_res *lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!inode);
mlog_entry_void();
mlog(0, "inode %llu try to take %s open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
write ? "EXMODE" : "PRMODE");
if (ocfs2_mount_local(osb))
goto out;
lockres = &OCFS2_I(inode)->ip_open_lockres;
level = write ? LKM_EXMODE : LKM_PRMODE;
/*
* The file system may already holding a PRMODE/EXMODE open lock.
* Since we pass LKM_NOQUEUE, the request won't block waiting on
* other nodes and the -EAGAIN will indicate to the caller that
* this inode is still in use.
*/
status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
level, LKM_NOQUEUE, 0);
out:
mlog_exit(status);
return status;
}
/*
* ocfs2_open_unlock unlock PR and EX mode open locks.
*/
void ocfs2_open_unlock(struct inode *inode)
{
struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry_void();
mlog(0, "inode %llu drop open lock\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
if (ocfs2_mount_local(osb))
goto out;
if(lockres->l_ro_holders)
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
LKM_PRMODE);
if(lockres->l_ex_holders)
ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
LKM_EXMODE);
out:
mlog_exit_void();
}
int ocfs2_data_lock_full(struct inode *inode,
int write,
int arg_flags)
......@@ -1387,8 +1495,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
inode->i_blocks = 0;
else
inode->i_blocks =
ocfs2_align_bytes_to_sectors(i_size_read(inode));
inode->i_blocks = ocfs2_inode_sector_count(inode);
inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
inode->i_gid = be32_to_cpu(lvb->lvb_igid);
......@@ -1479,12 +1586,15 @@ static int ocfs2_meta_lock_update(struct inode *inode,
{
int status = 0;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = NULL;
struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
struct ocfs2_dinode *fe;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry_void();
if (ocfs2_mount_local(osb))
goto bail;
spin_lock(&oi->ip_lock);
if (oi->ip_flags & OCFS2_INODE_DELETED) {
mlog(0, "Orphaned inode %llu was deleted while we "
......@@ -1496,22 +1606,16 @@ static int ocfs2_meta_lock_update(struct inode *inode,
}
spin_unlock(&oi->ip_lock);
if (!ocfs2_mount_local(osb)) {
lockres = &oi->ip_meta_lockres;
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
}
if (!ocfs2_should_refresh_lock_res(lockres))
goto bail;
/* This will discard any caching information we might have had
* for the inode metadata. */
ocfs2_metadata_cache_purge(inode);
/* will do nothing for inode types that don't use the extent
* map (directories, bitmap files, etc) */
ocfs2_extent_map_trunc(inode, 0);
if (lockres && ocfs2_meta_lvb_is_trustable(inode, lockres)) {
if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
mlog(0, "Trusting LVB on inode %llu\n",
(unsigned long long)oi->ip_blkno);
ocfs2_refresh_inode_from_lvb(inode);
......@@ -1558,8 +1662,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
status = 0;
bail_refresh:
if (lockres)
ocfs2_complete_lock_res_refresh(lockres, status);
ocfs2_complete_lock_res_refresh(lockres, status);
bail:
mlog_exit(status);
return status;
......@@ -1630,7 +1733,6 @@ int ocfs2_meta_lock_full(struct inode *inode,
wait_event(osb->recovery_event,
ocfs2_node_map_is_empty(osb, &osb->recovery_map));
acquired = 0;
lockres = &OCFS2_I(inode)->ip_meta_lockres;
level = ex ? LKM_EXMODE : LKM_PRMODE;
dlm_flags = 0;
......@@ -2458,12 +2560,19 @@ int ocfs2_drop_inode_locks(struct inode *inode)
* ocfs2_clear_inode has done it for us. */
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_data_lockres);
&OCFS2_I(inode)->ip_open_lockres);
if (err < 0)
mlog_errno(err);
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_data_lockres);
if (err < 0)
mlog_errno(err);
if (err < 0 && !status)
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
&OCFS2_I(inode)->ip_meta_lockres);
if (err < 0)
......
......@@ -80,6 +80,9 @@ void ocfs2_data_unlock(struct inode *inode,
int write);
int ocfs2_rw_lock(struct inode *inode, int write);
void ocfs2_rw_unlock(struct inode *inode, int write);
int ocfs2_open_lock(struct inode *inode);
int ocfs2_try_open_lock(struct inode *inode, int write);
void ocfs2_open_unlock(struct inode *inode);
int ocfs2_meta_lock_atime(struct inode *inode,
struct vfsmount *vfsmnt,
int *level);
......
此差异已折叠。
......@@ -25,22 +25,29 @@
#ifndef _EXTENT_MAP_H
#define _EXTENT_MAP_H
int init_ocfs2_extent_maps(void);
void exit_ocfs2_extent_maps(void);
struct ocfs2_extent_map_item {
unsigned int ei_cpos;
unsigned int ei_phys;
unsigned int ei_clusters;
unsigned int ei_flags;
/*
* EVERY CALL here except _init, _trunc, and _drop expects alloc_sem
* to be held. The allocation cannot change at all while the map is
* in the process of being updated.
*/
int ocfs2_extent_map_init(struct inode *inode);
int ocfs2_extent_map_append(struct inode *inode,
struct ocfs2_extent_rec *rec,
u32 new_clusters);
int ocfs2_extent_map_get_blocks(struct inode *inode,
u64 v_blkno, int count,
u64 *p_blkno, int *ret_count);
int ocfs2_extent_map_drop(struct inode *inode, u32 new_clusters);
int ocfs2_extent_map_trunc(struct inode *inode, u32 new_clusters);
struct list_head ei_list;
};
#define OCFS2_MAX_EXTENT_MAP_ITEMS 3
struct ocfs2_extent_map {
unsigned int em_num_items;
struct list_head em_list;
};
void ocfs2_extent_map_init(struct inode *inode);
void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
void ocfs2_extent_map_insert_rec(struct inode *inode,
struct ocfs2_extent_rec *rec);
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
u32 *num_clusters, unsigned int *extent_flags);
int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
u64 *ret_count, unsigned int *extent_flags);
#endif /* _EXTENT_MAP_H */
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册