Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
88e88374
K
Kernel
项目概览
openeuler
/
Kernel
2 年多 前同步成功
通知
10
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
88e88374
编写于
5月 24, 2010
作者:
A
Alex Elder
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'delayed-logging-for-2.6.35' into for-linus
上级
7e125f7b
ccf7c23f
变更
27
展开全部
隐藏空白更改
内联
并排
Showing
27 changed file
with
2382 addition
and
513 deletion
+2382
-513
Documentation/filesystems/xfs-delayed-logging-design.txt
Documentation/filesystems/xfs-delayed-logging-design.txt
+816
-0
fs/xfs/Makefile
fs/xfs/Makefile
+1
-0
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.c
+9
-0
fs/xfs/linux-2.6/xfs_quotaops.c
fs/xfs/linux-2.6/xfs_quotaops.c
+1
-0
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_super.c
+11
-1
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/linux-2.6/xfs_trace.h
+56
-27
fs/xfs/quota/xfs_dquot.c
fs/xfs/quota/xfs_dquot.c
+3
-3
fs/xfs/xfs_ag.h
fs/xfs/xfs_ag.h
+15
-9
fs/xfs/xfs_alloc.c
fs/xfs/xfs_alloc.c
+252
-105
fs/xfs/xfs_alloc.h
fs/xfs/xfs_alloc.h
+3
-4
fs/xfs/xfs_alloc_btree.c
fs/xfs/xfs_alloc_btree.c
+1
-1
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.c
+85
-81
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_buf_item.h
+10
-8
fs/xfs/xfs_error.c
fs/xfs/xfs_error.c
+1
-1
fs/xfs/xfs_log.c
fs/xfs/xfs_log.c
+88
-32
fs/xfs/xfs_log.h
fs/xfs/xfs_log.h
+12
-2
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_cil.c
+725
-0
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_priv.h
+114
-4
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_log_recover.c
+23
-23
fs/xfs/xfs_log_recover.h
fs/xfs/xfs_log_recover.h
+1
-1
fs/xfs/xfs_mount.h
fs/xfs/xfs_mount.h
+1
-0
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.c
+108
-36
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans.h
+10
-34
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_buf.c
+23
-23
fs/xfs/xfs_trans_item.c
fs/xfs/xfs_trans_item.c
+3
-111
fs/xfs/xfs_trans_priv.h
fs/xfs/xfs_trans_priv.h
+8
-7
fs/xfs/xfs_types.h
fs/xfs/xfs_types.h
+2
-0
未找到文件。
Documentation/filesystems/xfs-delayed-logging-design.txt
0 → 100644
浏览文件 @
88e88374
此差异已折叠。
点击以展开。
fs/xfs/Makefile
浏览文件 @
88e88374
...
...
@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \
xfs_itable.o
\
xfs_dfrag.o
\
xfs_log.o
\
xfs_log_cil.o
\
xfs_log_recover.o
\
xfs_mount.o
\
xfs_mru_cache.o
\
...
...
fs/xfs/linux-2.6/xfs_buf.c
浏览文件 @
88e88374
...
...
@@ -37,6 +37,7 @@
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
...
...
@@ -850,6 +851,12 @@ xfs_buf_lock_value(
* Note that this in no way locks the underlying pages, so it is only
* useful for synchronizing concurrent use of buffer objects, not for
* synchronizing independent access to the underlying pages.
*
* If we come across a stale, pinned, locked buffer, we know that we
* are being asked to lock a buffer that has been reallocated. Because
* it is pinned, we know that the log has not been pushed to disk and
* hence it will still be locked. Rather than sleeping until someone
* else pushes the log, push it ourselves before trying to get the lock.
*/
void
xfs_buf_lock
(
...
...
@@ -857,6 +864,8 @@ xfs_buf_lock(
{
trace_xfs_buf_lock
(
bp
,
_RET_IP_
);
if
(
atomic_read
(
&
bp
->
b_pin_count
)
&&
(
bp
->
b_flags
&
XBF_STALE
))
xfs_log_force
(
bp
->
b_mount
,
0
);
if
(
atomic_read
(
&
bp
->
b_io_remaining
))
blk_run_address_space
(
bp
->
b_target
->
bt_mapping
);
down
(
&
bp
->
b_sema
);
...
...
fs/xfs/linux-2.6/xfs_quotaops.c
浏览文件 @
88e88374
...
...
@@ -19,6 +19,7 @@
#include "xfs_dmapi.h"
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
...
...
fs/xfs/linux-2.6/xfs_super.c
浏览文件 @
88e88374
...
...
@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_DMAPI "dmapi"
/* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_XDSM "xdsm"
/* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DMI "dmi"
/* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DELAYLOG "delaylog"
/* Delayed loging enabled */
#define MNTOPT_NODELAYLOG "nodelaylog"
/* Delayed loging disabled */
/*
* Table driven mount option parser.
...
...
@@ -374,6 +376,13 @@ xfs_parseargs(
mp
->
m_flags
|=
XFS_MOUNT_DMAPI
;
}
else
if
(
!
strcmp
(
this_char
,
MNTOPT_DMI
))
{
mp
->
m_flags
|=
XFS_MOUNT_DMAPI
;
}
else
if
(
!
strcmp
(
this_char
,
MNTOPT_DELAYLOG
))
{
mp
->
m_flags
|=
XFS_MOUNT_DELAYLOG
;
cmn_err
(
CE_WARN
,
"Enabling EXPERIMENTAL delayed logging feature "
"- use at your own risk.
\n
"
);
}
else
if
(
!
strcmp
(
this_char
,
MNTOPT_NODELAYLOG
))
{
mp
->
m_flags
&=
~
XFS_MOUNT_DELAYLOG
;
}
else
if
(
!
strcmp
(
this_char
,
"ihashsize"
))
{
cmn_err
(
CE_WARN
,
"XFS: ihashsize no longer used, option is deprecated."
);
...
...
@@ -535,6 +544,7 @@ xfs_showargs(
{
XFS_MOUNT_FILESTREAMS
,
","
MNTOPT_FILESTREAM
},
{
XFS_MOUNT_DMAPI
,
","
MNTOPT_DMAPI
},
{
XFS_MOUNT_GRPID
,
","
MNTOPT_GRPID
},
{
XFS_MOUNT_DELAYLOG
,
","
MNTOPT_DELAYLOG
},
{
0
,
NULL
}
};
static
struct
proc_xfs_info
xfs_info_unset
[]
=
{
...
...
@@ -1755,7 +1765,7 @@ xfs_init_zones(void)
* but it is much faster.
*/
xfs_buf_item_zone
=
kmem_zone_init
((
sizeof
(
xfs_buf_log_item_t
)
+
(((
XFS_MAX_BLOCKSIZE
/
XFS_BL
I
_CHUNK
)
/
(((
XFS_MAX_BLOCKSIZE
/
XFS_BL
F
_CHUNK
)
/
NBWORD
)
*
sizeof
(
int
))),
"xfs_buf_item"
);
if
(
!
xfs_buf_item_zone
)
goto
out_destroy_trans_zone
;
...
...
fs/xfs/linux-2.6/xfs_trace.h
浏览文件 @
88e88374
...
...
@@ -1059,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap,
);
#define XFS_BUSY_SYNC \
{ 0, "async" }, \
{ 1, "sync" }
TRACE_EVENT
(
xfs_alloc_busy
,
TP_PROTO
(
struct
xfs_
mount
*
mp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
agb
no
,
xfs_
extlen_t
len
,
int
slot
),
TP_ARGS
(
mp
,
agno
,
agbno
,
len
,
slot
),
TP_PROTO
(
struct
xfs_
trans
*
trans
,
xfs_agnumber_t
ag
no
,
xfs_
agblock_t
agbno
,
xfs_extlen_t
len
,
int
sync
),
TP_ARGS
(
trans
,
agno
,
agbno
,
len
,
sync
),
TP_STRUCT__entry
(
__field
(
dev_t
,
dev
)
__field
(
struct
xfs_trans
*
,
tp
)
__field
(
int
,
tid
)
__field
(
xfs_agnumber_t
,
agno
)
__field
(
xfs_agblock_t
,
agbno
)
__field
(
xfs_extlen_t
,
len
)
__field
(
int
,
s
lot
)
__field
(
int
,
s
ync
)
),
TP_fast_assign
(
__entry
->
dev
=
mp
->
m_super
->
s_dev
;
__entry
->
dev
=
trans
->
t_mountp
->
m_super
->
s_dev
;
__entry
->
tp
=
trans
;
__entry
->
tid
=
trans
->
t_ticket
->
t_tid
;
__entry
->
agno
=
agno
;
__entry
->
agbno
=
agbno
;
__entry
->
len
=
len
;
__entry
->
s
lot
=
slot
;
__entry
->
s
ync
=
sync
;
),
TP_printk
(
"dev %d:%d
agno %u agbno %u len %u slot %d
"
,
TP_printk
(
"dev %d:%d
trans 0x%p tid 0x%x agno %u agbno %u len %u %s
"
,
MAJOR
(
__entry
->
dev
),
MINOR
(
__entry
->
dev
),
__entry
->
tp
,
__entry
->
tid
,
__entry
->
agno
,
__entry
->
agbno
,
__entry
->
len
,
__
entry
->
slot
)
__
print_symbolic
(
__entry
->
sync
,
XFS_BUSY_SYNC
)
)
);
#define XFS_BUSY_STATES \
{ 0, "found" }, \
{ 1, "missing" }
TRACE_EVENT
(
xfs_alloc_unbusy
,
TP_PROTO
(
struct
xfs_mount
*
mp
,
xfs_agnumber_t
agno
,
int
slot
,
int
found
),
TP_ARGS
(
mp
,
agno
,
slot
,
found
),
xfs_agblock_t
agbno
,
xfs_extlen_t
len
),
TP_ARGS
(
mp
,
agno
,
agbno
,
len
),
TP_STRUCT__entry
(
__field
(
dev_t
,
dev
)
__field
(
xfs_agnumber_t
,
agno
)
__field
(
int
,
slot
)
__field
(
int
,
found
)
__field
(
xfs_agblock_t
,
agbno
)
__field
(
xfs_extlen_t
,
len
)
),
TP_fast_assign
(
__entry
->
dev
=
mp
->
m_super
->
s_dev
;
__entry
->
agno
=
agno
;
__entry
->
slot
=
slot
;
__entry
->
found
=
found
;
__entry
->
agbno
=
agbno
;
__entry
->
len
=
len
;
),
TP_printk
(
"dev %d:%d agno %u
slot %d %s
"
,
TP_printk
(
"dev %d:%d agno %u
agbno %u len %u
"
,
MAJOR
(
__entry
->
dev
),
MINOR
(
__entry
->
dev
),
__entry
->
agno
,
__entry
->
slot
,
__
print_symbolic
(
__entry
->
found
,
XFS_BUSY_STATES
)
)
__entry
->
agbno
,
__
entry
->
len
)
);
#define XFS_BUSY_STATES \
{ 0, "missing" }, \
{ 1, "found" }
TRACE_EVENT
(
xfs_alloc_busysearch
,
TP_PROTO
(
struct
xfs_mount
*
mp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
agbno
,
xfs_
extlen_t
len
,
xfs_lsn_t
lsn
),
TP_ARGS
(
mp
,
agno
,
agbno
,
len
,
lsn
),
TP_PROTO
(
struct
xfs_mount
*
mp
,
xfs_agnumber_t
agno
,
xfs_
agblock_t
agbno
,
xfs_extlen_t
len
,
int
found
),
TP_ARGS
(
mp
,
agno
,
agbno
,
len
,
found
),
TP_STRUCT__entry
(
__field
(
dev_t
,
dev
)
__field
(
xfs_agnumber_t
,
agno
)
__field
(
xfs_agblock_t
,
agbno
)
__field
(
xfs_extlen_t
,
len
)
__field
(
xfs_lsn_t
,
lsn
)
__field
(
int
,
found
)
),
TP_fast_assign
(
__entry
->
dev
=
mp
->
m_super
->
s_dev
;
__entry
->
agno
=
agno
;
__entry
->
agbno
=
agbno
;
__entry
->
len
=
len
;
__entry
->
lsn
=
lsn
;
__entry
->
found
=
found
;
),
TP_printk
(
"dev %d:%d agno %u agbno %u len %u
force lsn 0x%llx
"
,
TP_printk
(
"dev %d:%d agno %u agbno %u len %u
%s
"
,
MAJOR
(
__entry
->
dev
),
MINOR
(
__entry
->
dev
),
__entry
->
agno
,
__entry
->
agbno
,
__entry
->
len
,
__print_symbolic
(
__entry
->
found
,
XFS_BUSY_STATES
))
);
TRACE_EVENT
(
xfs_trans_commit_lsn
,
TP_PROTO
(
struct
xfs_trans
*
trans
),
TP_ARGS
(
trans
),
TP_STRUCT__entry
(
__field
(
dev_t
,
dev
)
__field
(
struct
xfs_trans
*
,
tp
)
__field
(
xfs_lsn_t
,
lsn
)
),
TP_fast_assign
(
__entry
->
dev
=
trans
->
t_mountp
->
m_super
->
s_dev
;
__entry
->
tp
=
trans
;
__entry
->
lsn
=
trans
->
t_commit_lsn
;
),
TP_printk
(
"dev %d:%d trans 0x%p commit_lsn 0x%llx"
,
MAJOR
(
__entry
->
dev
),
MINOR
(
__entry
->
dev
),
__entry
->
tp
,
__entry
->
lsn
)
);
...
...
fs/xfs/quota/xfs_dquot.c
浏览文件 @
88e88374
...
...
@@ -344,9 +344,9 @@ xfs_qm_init_dquot_blk(
for
(
i
=
0
;
i
<
q
->
qi_dqperchunk
;
i
++
,
d
++
,
curid
++
)
xfs_qm_dqinit_core
(
curid
,
type
,
d
);
xfs_trans_dquot_buf
(
tp
,
bp
,
(
type
&
XFS_DQ_USER
?
XFS_BL
I
_UDQUOT_BUF
:
((
type
&
XFS_DQ_PROJ
)
?
XFS_BL
I
_PDQUOT_BUF
:
XFS_BL
I
_GDQUOT_BUF
)));
(
type
&
XFS_DQ_USER
?
XFS_BL
F
_UDQUOT_BUF
:
((
type
&
XFS_DQ_PROJ
)
?
XFS_BL
F
_PDQUOT_BUF
:
XFS_BL
F
_GDQUOT_BUF
)));
xfs_trans_log_buf
(
tp
,
bp
,
0
,
BBTOB
(
q
->
qi_dqchunklen
)
-
1
);
}
...
...
fs/xfs/xfs_ag.h
浏览文件 @
88e88374
...
...
@@ -175,14 +175,20 @@ typedef struct xfs_agfl {
}
xfs_agfl_t
;
/*
* Busy block/extent entry. Used in perag to mark blocks that have been freed
* but whose transactions aren't committed to disk yet.
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
* have been freed but whose transactions aren't committed to disk yet.
*
* Note that we use the transaction ID to record the transaction, not the
* transaction structure itself. See xfs_alloc_busy_insert() for details.
*/
typedef
struct
xfs_perag_busy
{
xfs_agblock_t
busy_start
;
xfs_extlen_t
busy_length
;
struct
xfs_trans
*
busy_tp
;
/* transaction that did the free */
}
xfs_perag_busy_t
;
struct
xfs_busy_extent
{
struct
rb_node
rb_node
;
/* ag by-bno indexed search tree */
struct
list_head
list
;
/* transaction busy extent list */
xfs_agnumber_t
agno
;
xfs_agblock_t
bno
;
xfs_extlen_t
length
;
xlog_tid_t
tid
;
/* transaction that created this */
};
/*
* Per-ag incore structure, copies of information in agf and agi,
...
...
@@ -216,7 +222,8 @@ typedef struct xfs_perag {
xfs_agino_t
pagl_leftrec
;
xfs_agino_t
pagl_rightrec
;
#ifdef __KERNEL__
spinlock_t
pagb_lock
;
/* lock for pagb_list */
spinlock_t
pagb_lock
;
/* lock for pagb_tree */
struct
rb_root
pagb_tree
;
/* ordered tree of busy extents */
atomic_t
pagf_fstrms
;
/* # of filestreams active in this AG */
...
...
@@ -226,7 +233,6 @@ typedef struct xfs_perag {
int
pag_ici_reclaimable
;
/* reclaimable inodes */
#endif
int
pagb_count
;
/* pagb slots in use */
xfs_perag_busy_t
pagb_list
[
XFS_PAGB_NUM_SLOTS
];
/* unstable blocks */
}
xfs_perag_t
;
/*
...
...
fs/xfs/xfs_alloc.c
浏览文件 @
88e88374
...
...
@@ -46,11 +46,9 @@
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
STATIC
void
xfs_alloc_search_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
);
static
int
xfs_alloc_busy_search
(
struct
xfs_mount
*
mp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
);
/*
* Prototypes for per-ag allocation routines
...
...
@@ -540,9 +538,16 @@ xfs_alloc_ag_vextent(
be32_to_cpu
(
agf
->
agf_length
));
xfs_alloc_log_agf
(
args
->
tp
,
args
->
agbp
,
XFS_AGF_FREEBLKS
);
/* search the busylist for these blocks */
xfs_alloc_search_busy
(
args
->
tp
,
args
->
agno
,
args
->
agbno
,
args
->
len
);
/*
* Search the busylist for these blocks and mark the
* transaction as synchronous if blocks are found. This
* avoids the need to block due to a synchronous log
* force to ensure correct ordering as the synchronous
* transaction will guarantee that for us.
*/
if
(
xfs_alloc_busy_search
(
args
->
mp
,
args
->
agno
,
args
->
agbno
,
args
->
len
))
xfs_trans_set_sync
(
args
->
tp
);
}
if
(
!
args
->
isfl
)
xfs_trans_mod_sb
(
args
->
tp
,
...
...
@@ -1693,7 +1698,7 @@ xfs_free_ag_extent(
* when the iclog commits to disk. If a busy block is allocated,
* the iclog is pushed up to the LSN that freed the block.
*/
xfs_alloc_
mark_busy
(
tp
,
agno
,
bno
,
len
);
xfs_alloc_
busy_insert
(
tp
,
agno
,
bno
,
len
);
return
0
;
error0:
...
...
@@ -1989,14 +1994,20 @@ xfs_alloc_get_freelist(
*
bnop
=
bno
;
/*
* As blocks are freed, they are added to the per-ag busy list
* and remain there until the freeing transaction is committed to
* disk. Now that we have allocated blocks, this list must be
* searched to see if a block is being reused. If one is, then
* the freeing transaction must be pushed to disk NOW by forcing
* to disk all iclogs up that transaction's LSN.
* As blocks are freed, they are added to the per-ag busy list and
* remain there until the freeing transaction is committed to disk.
* Now that we have allocated blocks, this list must be searched to see
* if a block is being reused. If one is, then the freeing transaction
* must be pushed to disk before this transaction.
*
* We do this by setting the current transaction to a sync transaction
* which guarantees that the freeing transaction is on disk before this
* transaction. This is done instead of a synchronous log force here so
* that we don't sit and wait with the AGF locked in the transaction
* during the log force.
*/
xfs_alloc_search_busy
(
tp
,
be32_to_cpu
(
agf
->
agf_seqno
),
bno
,
1
);
if
(
xfs_alloc_busy_search
(
mp
,
be32_to_cpu
(
agf
->
agf_seqno
),
bno
,
1
))
xfs_trans_set_sync
(
tp
);
return
0
;
}
...
...
@@ -2201,7 +2212,7 @@ xfs_alloc_read_agf(
be32_to_cpu
(
agf
->
agf_levels
[
XFS_BTNUM_CNTi
]);
spin_lock_init
(
&
pag
->
pagb_lock
);
pag
->
pagb_count
=
0
;
memset
(
pag
->
pagb_list
,
0
,
sizeof
(
pag
->
pagb_list
))
;
pag
->
pagb_tree
=
RB_ROOT
;
pag
->
pagf_init
=
1
;
}
#ifdef DEBUG
...
...
@@ -2479,127 +2490,263 @@ xfs_free_extent(
* list is reused, the transaction that freed it must be forced to disk
* before continuing to use the block.
*
* xfs_alloc_mark_busy - add to the per-ag busy list
* xfs_alloc_clear_busy - remove an item from the per-ag busy list
* xfs_alloc_busy_insert - add to the per-ag busy list
* xfs_alloc_busy_clear - remove an item from the per-ag busy list
* xfs_alloc_busy_search - search for a busy extent
*/
/*
* Insert a new extent into the busy tree.
*
* The busy extent tree is indexed by the start block of the busy extent.
* there can be multiple overlapping ranges in the busy extent tree but only
* ever one entry at a given start block. The reason for this is that
* multi-block extents can be freed, then smaller chunks of that extent
* allocated and freed again before the first transaction commit is on disk.
* If the exact same start block is freed a second time, we have to wait for
* that busy extent to pass out of the tree before the new extent is inserted.
* There are two main cases we have to handle here.
*
* The first case is a transaction that triggers a "free - allocate - free"
* cycle. This can occur during btree manipulations as a btree block is freed
* to the freelist, then allocated from the free list, then freed again. In
* this case, the second extxpnet free is what triggers the duplicate and as
* such the transaction IDs should match. Because the extent was allocated in
* this transaction, the transaction must be marked as synchronous. This is
* true for all cases where the free/alloc/free occurs in the one transaction,
* hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
* This serves to catch violations of the second case quite effectively.
*
* The second case is where the free/alloc/free occur in different
* transactions. In this case, the thread freeing the extent the second time
* can't mark the extent busy immediately because it is already tracked in a
* transaction that may be committing. When the log commit for the existing
* busy extent completes, the busy extent will be removed from the tree. If we
* allow the second busy insert to continue using that busy extent structure,
* it can be freed before this transaction is safely in the log. Hence our
* only option in this case is to force the log to remove the existing busy
* extent from the list before we insert the new one with the current
* transaction ID.
*
* The problem we are trying to avoid in the free-alloc-free in separate
* transactions is most easily described with a timeline:
*
* Thread 1 Thread 2 Thread 3 xfslogd
* xact alloc
* free X
* mark busy
* commit xact
* free xact
* xact alloc
* alloc X
* busy search
* mark xact sync
* commit xact
* free xact
* force log
* checkpoint starts
* ....
* xact alloc
* free X
* mark busy
* finds match
* *** KABOOM! ***
* ....
* log IO completes
* unbusy X
* checkpoint completes
*
* By issuing a log force in thread 3 @ "KABOOM", the thread will block until
* the checkpoint completes, and the busy extent it matched will have been
* removed from the tree when it is woken. Hence it can then continue safely.
*
* However, to ensure this matching process is robust, we need to use the
* transaction ID for identifying transaction, as delayed logging results in
* the busy extent and transaction lifecycles being different. i.e. the busy
* extent is active for a lot longer than the transaction. Hence the
* transaction structure can be freed and reallocated, then mark the same
* extent busy again in the new transaction. In this case the new transaction
* will have a different tid but can have the same address, and hence we need
* to check against the tid.
*
* Future: for delayed logging, we could avoid the log force if the extent was
* first freed in the current checkpoint sequence. This, however, requires the
* ability to pin the current checkpoint in memory until this transaction
* commits to ensure that both the original free and the current one combine
* logically into the one checkpoint. If the checkpoint sequences are
* different, however, we still need to wait on a log force.
*/
void
xfs_alloc_mark_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
)
xfs_alloc_busy_insert
(
struct
xfs_trans
*
tp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
)
{
xfs_perag_busy_t
*
bsy
;
struct
xfs_busy_extent
*
new
;
struct
xfs_busy_extent
*
busyp
;
struct
xfs_perag
*
pag
;
int
n
;
struct
rb_node
**
rbp
;
struct
rb_node
*
parent
;
int
match
;
pag
=
xfs_perag_get
(
tp
->
t_mountp
,
agno
);
spin_lock
(
&
pag
->
pagb_lock
);
/* search pagb_list for an open slot */
for
(
bsy
=
pag
->
pagb_list
,
n
=
0
;
n
<
XFS_PAGB_NUM_SLOTS
;
bsy
++
,
n
++
)
{
if
(
bsy
->
busy_tp
==
NULL
)
{
break
;
}
new
=
kmem_zalloc
(
sizeof
(
struct
xfs_busy_extent
),
KM_MAYFAIL
);
if
(
!
new
)
{
/*
* No Memory! Since it is now not possible to track the free
* block, make this a synchronous transaction to insure that
* the block is not reused before this transaction commits.
*/
trace_xfs_alloc_busy
(
tp
,
agno
,
bno
,
len
,
1
);
xfs_trans_set_sync
(
tp
);
return
;
}
trace_xfs_alloc_busy
(
tp
->
t_mountp
,
agno
,
bno
,
len
,
n
);
new
->
agno
=
agno
;
new
->
bno
=
bno
;
new
->
length
=
len
;
new
->
tid
=
xfs_log_get_trans_ident
(
tp
);
if
(
n
<
XFS_PAGB_NUM_SLOTS
)
{
bsy
=
&
pag
->
pagb_list
[
n
];
pag
->
pagb_count
++
;
bsy
->
busy_start
=
bno
;
bsy
->
busy_length
=
len
;
bsy
->
busy_tp
=
tp
;
xfs_trans_add_busy
(
tp
,
agno
,
n
);
}
else
{
INIT_LIST_HEAD
(
&
new
->
list
);
/* trace before insert to be able to see failed inserts */
trace_xfs_alloc_busy
(
tp
,
agno
,
bno
,
len
,
0
);
pag
=
xfs_perag_get
(
tp
->
t_mountp
,
new
->
agno
);
restart:
spin_lock
(
&
pag
->
pagb_lock
);
rbp
=
&
pag
->
pagb_tree
.
rb_node
;
parent
=
NULL
;
busyp
=
NULL
;
match
=
0
;
while
(
*
rbp
&&
match
>=
0
)
{
parent
=
*
rbp
;
busyp
=
rb_entry
(
parent
,
struct
xfs_busy_extent
,
rb_node
);
if
(
new
->
bno
<
busyp
->
bno
)
{
/* may overlap, but exact start block is lower */
rbp
=
&
(
*
rbp
)
->
rb_left
;
if
(
new
->
bno
+
new
->
length
>
busyp
->
bno
)
match
=
busyp
->
tid
==
new
->
tid
?
1
:
-
1
;
}
else
if
(
new
->
bno
>
busyp
->
bno
)
{
/* may overlap, but exact start block is higher */
rbp
=
&
(
*
rbp
)
->
rb_right
;
if
(
bno
<
busyp
->
bno
+
busyp
->
length
)
match
=
busyp
->
tid
==
new
->
tid
?
1
:
-
1
;
}
else
{
match
=
busyp
->
tid
==
new
->
tid
?
1
:
-
1
;
break
;
}
}
if
(
match
<
0
)
{
/* overlap marked busy in different transaction */
spin_unlock
(
&
pag
->
pagb_lock
);
xfs_log_force
(
tp
->
t_mountp
,
XFS_LOG_SYNC
);
goto
restart
;
}
if
(
match
>
0
)
{
/*
* The busy list is full! Since it is now not possible to
* track the free block, make this a synchronous transaction
* to insure that the block is not reused before this
* transaction commits.
* overlap marked busy in same transaction. Update if exact
* start block match, otherwise combine the busy extents into
* a single range.
*/
xfs_trans_set_sync
(
tp
);
}
if
(
busyp
->
bno
==
new
->
bno
)
{
busyp
->
length
=
max
(
busyp
->
length
,
new
->
length
);
spin_unlock
(
&
pag
->
pagb_lock
);
ASSERT
(
tp
->
t_flags
&
XFS_TRANS_SYNC
);
xfs_perag_put
(
pag
);
kmem_free
(
new
);
return
;
}
rb_erase
(
&
busyp
->
rb_node
,
&
pag
->
pagb_tree
);
new
->
length
=
max
(
busyp
->
bno
+
busyp
->
length
,
new
->
bno
+
new
->
length
)
-
min
(
busyp
->
bno
,
new
->
bno
);
new
->
bno
=
min
(
busyp
->
bno
,
new
->
bno
);
}
else
busyp
=
NULL
;
rb_link_node
(
&
new
->
rb_node
,
parent
,
rbp
);
rb_insert_color
(
&
new
->
rb_node
,
&
pag
->
pagb_tree
);
list_add
(
&
new
->
list
,
&
tp
->
t_busy
);
spin_unlock
(
&
pag
->
pagb_lock
);
xfs_perag_put
(
pag
);
kmem_free
(
busyp
);
}
void
xfs_alloc_clear_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
agno
,
int
idx
)
/*
* Search for a busy extent within the range of the extent we are about to
* allocate. You need to be holding the busy extent tree lock when calling
* xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
* extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
* match. This is done so that a non-zero return indicates an overlap that
* will require a synchronous transaction, but it can still be
* used to distinguish between a partial or exact match.
*/
static
int
xfs_alloc_busy_search
(
struct
xfs_mount
*
mp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
)
{
struct
xfs_perag
*
pag
;
xfs_perag_busy_t
*
list
;
struct
rb_node
*
rbp
;
struct
xfs_busy_extent
*
busyp
;
int
match
=
0
;
ASSERT
(
idx
<
XFS_PAGB_NUM_SLOTS
);
pag
=
xfs_perag_get
(
tp
->
t_mountp
,
agno
);
pag
=
xfs_perag_get
(
mp
,
agno
);
spin_lock
(
&
pag
->
pagb_lock
);
list
=
pag
->
pagb_list
;
trace_xfs_alloc_unbusy
(
tp
->
t_mountp
,
agno
,
idx
,
list
[
idx
].
busy_tp
==
tp
);
if
(
list
[
idx
].
busy_tp
==
tp
)
{
list
[
idx
].
busy_tp
=
NULL
;
pag
->
pagb_count
--
;
rbp
=
pag
->
pagb_tree
.
rb_node
;
/* find closest start bno overlap */
while
(
rbp
)
{
busyp
=
rb_entry
(
rbp
,
struct
xfs_busy_extent
,
rb_node
);
if
(
bno
<
busyp
->
bno
)
{
/* may overlap, but exact start block is lower */
if
(
bno
+
len
>
busyp
->
bno
)
match
=
-
1
;
rbp
=
rbp
->
rb_left
;
}
else
if
(
bno
>
busyp
->
bno
)
{
/* may overlap, but exact start block is higher */
if
(
bno
<
busyp
->
bno
+
busyp
->
length
)
match
=
-
1
;
rbp
=
rbp
->
rb_right
;
}
else
{
/* bno matches busyp, length determines exact match */
match
=
(
busyp
->
length
==
len
)
?
1
:
-
1
;
break
;
}
}
spin_unlock
(
&
pag
->
pagb_lock
);
trace_xfs_alloc_busysearch
(
mp
,
agno
,
bno
,
len
,
!!
match
);
xfs_perag_put
(
pag
);
return
match
;
}
/*
* If we find the extent in the busy list, force the log out to get the
* extent out of the busy list so the caller can use it straight away.
*/
STATIC
void
xfs_alloc_search_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
)
void
xfs_alloc_busy_clear
(
struct
xfs_mount
*
mp
,
struct
xfs_busy_extent
*
busyp
)
{
struct
xfs_perag
*
pag
;
xfs_perag_busy_t
*
bsy
;
xfs_agblock_t
uend
,
bend
;
xfs_lsn_t
lsn
=
0
;
int
cnt
;
pag
=
xfs_perag_get
(
tp
->
t_mountp
,
agno
);
spin_lock
(
&
pag
->
pagb_lock
);
cnt
=
pag
->
pagb_count
;
trace_xfs_alloc_unbusy
(
mp
,
busyp
->
agno
,
busyp
->
bno
,
busyp
->
length
);
/*
* search pagb_list for this slot, skipping open slots. We have to
* search the entire array as there may be multiple overlaps and
* we have to get the most recent LSN for the log force to push out
* all the transactions that span the range.
*/
uend
=
bno
+
len
-
1
;
for
(
cnt
=
0
;
cnt
<
pag
->
pagb_count
;
cnt
++
)
{
bsy
=
&
pag
->
pagb_list
[
cnt
];
if
(
!
bsy
->
busy_tp
)
continue
;
ASSERT
(
xfs_alloc_busy_search
(
mp
,
busyp
->
agno
,
busyp
->
bno
,
busyp
->
length
)
==
1
);
bend
=
bsy
->
busy_start
+
bsy
->
busy_length
-
1
;
if
(
bno
>
bend
||
uend
<
bsy
->
busy_start
)
continue
;
list_del_init
(
&
busyp
->
list
);
/* (start1,length1) within (start2, length2) */
if
(
XFS_LSN_CMP
(
bsy
->
busy_tp
->
t_commit_lsn
,
lsn
)
>
0
)
lsn
=
bsy
->
busy_tp
->
t_commit_lsn
;
}
pag
=
xfs_perag_get
(
mp
,
busyp
->
agno
);
spin_lock
(
&
pag
->
pagb_lock
);
rb_erase
(
&
busyp
->
rb_node
,
&
pag
->
pagb_tree
);
spin_unlock
(
&
pag
->
pagb_lock
);
xfs_perag_put
(
pag
);
trace_xfs_alloc_busysearch
(
tp
->
t_mountp
,
agno
,
bno
,
len
,
lsn
);
/*
* If a block was found, force the log through the LSN of the
* transaction that freed the block
*/
if
(
lsn
)
xfs_log_force_lsn
(
tp
->
t_mountp
,
lsn
,
XFS_LOG_SYNC
);
kmem_free
(
busyp
);
}
fs/xfs/xfs_alloc.h
浏览文件 @
88e88374
...
...
@@ -22,6 +22,7 @@ struct xfs_buf;
struct
xfs_mount
;
struct
xfs_perag
;
struct
xfs_trans
;
struct
xfs_busy_extent
;
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
...
...
@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
#ifdef __KERNEL__
void
xfs_alloc_
mark_busy
(
xfs_trans_t
*
tp
,
xfs_alloc_
busy_insert
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
agno
,
xfs_agblock_t
bno
,
xfs_extlen_t
len
);
void
xfs_alloc_clear_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
ag
,
int
idx
);
xfs_alloc_busy_clear
(
struct
xfs_mount
*
mp
,
struct
xfs_busy_extent
*
busyp
);
#endif
/* __KERNEL__ */
...
...
fs/xfs/xfs_alloc_btree.c
浏览文件 @
88e88374
...
...
@@ -134,7 +134,7 @@ xfs_allocbt_free_block(
* disk. If a busy block is allocated, the iclog is pushed up to the
* LSN that freed the block.
*/
xfs_alloc_
mark_busy
(
cur
->
bc_tp
,
be32_to_cpu
(
agf
->
agf_seqno
),
bno
,
1
);
xfs_alloc_
busy_insert
(
cur
->
bc_tp
,
be32_to_cpu
(
agf
->
agf_seqno
),
bno
,
1
);
xfs_trans_agbtree_delta
(
cur
->
bc_tp
,
-
1
);
return
0
;
}
...
...
fs/xfs/xfs_buf_item.c
浏览文件 @
88e88374
...
...
@@ -64,7 +64,7 @@ xfs_buf_item_log_debug(
nbytes
=
last
-
first
+
1
;
bfset
(
bip
->
bli_logged
,
first
,
nbytes
);
for
(
x
=
0
;
x
<
nbytes
;
x
++
)
{
chunk_num
=
byte
>>
XFS_BL
I
_SHIFT
;
chunk_num
=
byte
>>
XFS_BL
F
_SHIFT
;
word_num
=
chunk_num
>>
BIT_TO_WORD_SHIFT
;
bit_num
=
chunk_num
&
(
NBWORD
-
1
);
wordp
=
&
(
bip
->
bli_format
.
blf_data_map
[
word_num
]);
...
...
@@ -166,7 +166,7 @@ xfs_buf_item_size(
* cancel flag in it.
*/
trace_xfs_buf_item_size_stale
(
bip
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
);
return
1
;
}
...
...
@@ -197,9 +197,9 @@ xfs_buf_item_size(
}
else
if
(
next_bit
!=
last_bit
+
1
)
{
last_bit
=
next_bit
;
nvecs
++
;
}
else
if
(
xfs_buf_offset
(
bp
,
next_bit
*
XFS_BL
I
_CHUNK
)
!=
(
xfs_buf_offset
(
bp
,
last_bit
*
XFS_BL
I
_CHUNK
)
+
XFS_BL
I
_CHUNK
))
{
}
else
if
(
xfs_buf_offset
(
bp
,
next_bit
*
XFS_BL
F
_CHUNK
)
!=
(
xfs_buf_offset
(
bp
,
last_bit
*
XFS_BL
F
_CHUNK
)
+
XFS_BL
F
_CHUNK
))
{
last_bit
=
next_bit
;
nvecs
++
;
}
else
{
...
...
@@ -254,6 +254,20 @@ xfs_buf_item_format(
vecp
++
;
nvecs
=
1
;
/*
* If it is an inode buffer, transfer the in-memory state to the
* format flags and clear the in-memory state. We do not transfer
* this state if the inode buffer allocation has not yet been committed
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
* correct replay of the inode allocation.
*/
if
(
bip
->
bli_flags
&
XFS_BLI_INODE_BUF
)
{
if
(
!
((
bip
->
bli_flags
&
XFS_BLI_INODE_ALLOC_BUF
)
&&
xfs_log_item_in_current_chkpt
(
&
bip
->
bli_item
)))
bip
->
bli_format
.
blf_flags
|=
XFS_BLF_INODE_BUF
;
bip
->
bli_flags
&=
~
XFS_BLI_INODE_BUF
;
}
if
(
bip
->
bli_flags
&
XFS_BLI_STALE
)
{
/*
* The buffer is stale, so all we need to log
...
...
@@ -261,7 +275,7 @@ xfs_buf_item_format(
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale
(
bip
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
);
bip
->
bli_format
.
blf_size
=
nvecs
;
return
;
}
...
...
@@ -294,28 +308,28 @@ xfs_buf_item_format(
* keep counting and scanning.
*/
if
(
next_bit
==
-
1
)
{
buffer_offset
=
first_bit
*
XFS_BL
I
_CHUNK
;
buffer_offset
=
first_bit
*
XFS_BL
F
_CHUNK
;
vecp
->
i_addr
=
xfs_buf_offset
(
bp
,
buffer_offset
);
vecp
->
i_len
=
nbits
*
XFS_BL
I
_CHUNK
;
vecp
->
i_len
=
nbits
*
XFS_BL
F
_CHUNK
;
vecp
->
i_type
=
XLOG_REG_TYPE_BCHUNK
;
nvecs
++
;
break
;
}
else
if
(
next_bit
!=
last_bit
+
1
)
{
buffer_offset
=
first_bit
*
XFS_BL
I
_CHUNK
;
buffer_offset
=
first_bit
*
XFS_BL
F
_CHUNK
;
vecp
->
i_addr
=
xfs_buf_offset
(
bp
,
buffer_offset
);
vecp
->
i_len
=
nbits
*
XFS_BL
I
_CHUNK
;
vecp
->
i_len
=
nbits
*
XFS_BL
F
_CHUNK
;
vecp
->
i_type
=
XLOG_REG_TYPE_BCHUNK
;
nvecs
++
;
vecp
++
;
first_bit
=
next_bit
;
last_bit
=
next_bit
;
nbits
=
1
;
}
else
if
(
xfs_buf_offset
(
bp
,
next_bit
<<
XFS_BL
I
_SHIFT
)
!=
(
xfs_buf_offset
(
bp
,
last_bit
<<
XFS_BL
I
_SHIFT
)
+
XFS_BL
I
_CHUNK
))
{
buffer_offset
=
first_bit
*
XFS_BL
I
_CHUNK
;
}
else
if
(
xfs_buf_offset
(
bp
,
next_bit
<<
XFS_BL
F
_SHIFT
)
!=
(
xfs_buf_offset
(
bp
,
last_bit
<<
XFS_BL
F
_SHIFT
)
+
XFS_BL
F
_CHUNK
))
{
buffer_offset
=
first_bit
*
XFS_BL
F
_CHUNK
;
vecp
->
i_addr
=
xfs_buf_offset
(
bp
,
buffer_offset
);
vecp
->
i_len
=
nbits
*
XFS_BL
I
_CHUNK
;
vecp
->
i_len
=
nbits
*
XFS_BL
F
_CHUNK
;
vecp
->
i_type
=
XLOG_REG_TYPE_BCHUNK
;
/* You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
...
...
@@ -341,10 +355,15 @@ xfs_buf_item_format(
}
/*
* This is called to pin the buffer associated with the buf log
* item in memory so it cannot be written out. Simply call bpin()
* on the buffer to do this.
* This is called to pin the buffer associated with the buf log item in memory
* so it cannot be written out. Simply call bpin() on the buffer to do this.
*
* We also always take a reference to the buffer log item here so that the bli
* is held while the item is pinned in memory. This means that we can
* unconditionally drop the reference count a transaction holds when the
* transaction is completed.
*/
STATIC
void
xfs_buf_item_pin
(
xfs_buf_log_item_t
*
bip
)
...
...
@@ -356,6 +375,7 @@ xfs_buf_item_pin(
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
ASSERT
((
bip
->
bli_flags
&
XFS_BLI_LOGGED
)
||
(
bip
->
bli_flags
&
XFS_BLI_STALE
));
atomic_inc
(
&
bip
->
bli_refcount
);
trace_xfs_buf_item_pin
(
bip
);
xfs_bpin
(
bp
);
}
...
...
@@ -393,7 +413,7 @@ xfs_buf_item_unpin(
ASSERT
(
XFS_BUF_VALUSEMA
(
bp
)
<=
0
);
ASSERT
(
!
(
XFS_BUF_ISDELAYWRITE
(
bp
)));
ASSERT
(
XFS_BUF_ISSTALE
(
bp
));
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
);
trace_xfs_buf_item_unpin_stale
(
bip
);
/*
...
...
@@ -489,20 +509,23 @@ xfs_buf_item_trylock(
}
/*
* Release the buffer associated with the buf log item.
* If there is no dirty logged data associated with the
* buffer recorded in the buf log item, then free the
* buf log item and remove the reference to it in the
* buffer.
* Release the buffer associated with the buf log item. If there is no dirty
* logged data associated with the buffer recorded in the buf log item, then
* free the buf log item and remove the reference to it in the buffer.
*
* This call ignores the recursion count. It is only called when the buffer
* should REALLY be unlocked, regardless of the recursion count.
*
* This call ignores the recursion count. It is only called
* when the buffer should REALLY be unlocked, regardless
* of the recursion count.
* We unconditionally drop the transaction's reference to the log item. If the
* item was logged, then another reference was taken when it was pinned, so we
* can safely drop the transaction reference now. This also allows us to avoid
* potential races with the unpin code freeing the bli by not referencing the
* bli after we've dropped the reference count.
*
* If the XFS_BLI_HOLD flag is set in the buf log item, then
*
free the log item if necessary but do not unlock the buffer.
*
This is for support of xfs_trans_bhold(). Make sure the
*
XFS_BLI_HOLD field is cleared if we don't
free the item.
* If the XFS_BLI_HOLD flag is set in the buf log item, then
free the log item
*
if necessary but do not unlock the buffer. This is for support of
*
xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
* free the item.
*/
STATIC
void
xfs_buf_item_unlock
(
...
...
@@ -514,73 +537,54 @@ xfs_buf_item_unlock(
bp
=
bip
->
bli_buf
;
/*
* Clear the buffer's association with this transaction.
*/
/* Clear the buffer's association with this transaction. */
XFS_BUF_SET_FSPRIVATE2
(
bp
,
NULL
);
/*
* If this is a transaction abort, don't return early.
* Instead, allow the brelse to happen.
* Normally it would be done for stale (cancelled) buffers
* at unpin time, but we'll never go through the pin/unpin
* cycle if we abort inside commit.
* If this is a transaction abort, don't return early. Instead, allow
* the brelse to happen. Normally it would be done for stale
* (cancelled) buffers at unpin time, but we'll never go through the
* pin/unpin cycle if we abort inside commit.
*/
aborted
=
(
bip
->
bli_item
.
li_flags
&
XFS_LI_ABORTED
)
!=
0
;
/*
* If the buf item is marked stale, then don't do anything.
* We'll unlock the buffer and free the buf item when the
* buffer is unpinned for the last time.
* Before possibly freeing the buf item, determine if we should
* release the buffer at the end of this routine.
*/
if
(
bip
->
bli_flags
&
XFS_BLI_STALE
)
{
bip
->
bli_flags
&=
~
XFS_BLI_LOGGED
;
trace_xfs_buf_item_unlock_stale
(
bip
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BLI_CANCEL
);
if
(
!
aborted
)
return
;
}
hold
=
bip
->
bli_flags
&
XFS_BLI_HOLD
;
/* Clear the per transaction state. */
bip
->
bli_flags
&=
~
(
XFS_BLI_LOGGED
|
XFS_BLI_HOLD
);
/*
* Drop the transaction's reference to the log item if
* it was not logged as part of the transaction. Otherwise
* we'll drop the reference in xfs_buf_item_unpin() when
* the transaction is really through with the buffer.
* If the buf item is marked stale, then don't do anything. We'll
* unlock the buffer and free the buf item when the buffer is unpinned
* for the last time.
*/
if
(
!
(
bip
->
bli_flags
&
XFS_BLI_LOGGED
))
{
atomic_dec
(
&
bip
->
bli_refcount
);
}
else
{
/*
* Clear the logged flag since this is per
* transaction state.
*/
bip
->
bli_flags
&=
~
XFS_BLI_LOGGED
;
if
(
bip
->
bli_flags
&
XFS_BLI_STALE
)
{
trace_xfs_buf_item_unlock_stale
(
bip
);
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BLF_CANCEL
);
if
(
!
aborted
)
{
atomic_dec
(
&
bip
->
bli_refcount
);
return
;
}
}
/*
* Before possibly freeing the buf item, determine if we should
* release the buffer at the end of this routine.
*/
hold
=
bip
->
bli_flags
&
XFS_BLI_HOLD
;
trace_xfs_buf_item_unlock
(
bip
);
/*
* If the buf item isn't tracking any data, free it
.
*
Otherwise, if XFS_BLI_HOLD is set clear
it.
* If the buf item isn't tracking any data, free it
, otherwise drop the
*
reference we hold to
it.
*/
if
(
xfs_bitmap_empty
(
bip
->
bli_format
.
blf_data_map
,
bip
->
bli_format
.
blf_map_size
))
{
bip
->
bli_format
.
blf_map_size
))
xfs_buf_item_relse
(
bp
);
}
else
if
(
hold
)
{
bip
->
bli_flags
&=
~
XFS_BLI_HOLD
;
}
else
atomic_dec
(
&
bip
->
bli_refcount
);
/*
* Release the buffer if XFS_BLI_HOLD was not set.
*/
if
(
!
hold
)
{
if
(
!
hold
)
xfs_buf_relse
(
bp
);
}
}
/*
...
...
@@ -717,12 +721,12 @@ xfs_buf_item_init(
}
/*
* chunks is the number of XFS_BL
I
_CHUNK size pieces
* chunks is the number of XFS_BL
F
_CHUNK size pieces
* the buffer can be divided into. Make sure not to
* truncate any pieces. map_size is the size of the
* bitmap needed to describe the chunks of the buffer.
*/
chunks
=
(
int
)((
XFS_BUF_COUNT
(
bp
)
+
(
XFS_BL
I_CHUNK
-
1
))
>>
XFS_BLI
_SHIFT
);
chunks
=
(
int
)((
XFS_BUF_COUNT
(
bp
)
+
(
XFS_BL
F_CHUNK
-
1
))
>>
XFS_BLF
_SHIFT
);
map_size
=
(
int
)((
chunks
+
NBWORD
)
>>
BIT_TO_WORD_SHIFT
);
bip
=
(
xfs_buf_log_item_t
*
)
kmem_zone_zalloc
(
xfs_buf_item_zone
,
...
...
@@ -790,8 +794,8 @@ xfs_buf_item_log(
/*
* Convert byte offsets to bit numbers.
*/
first_bit
=
first
>>
XFS_BL
I
_SHIFT
;
last_bit
=
last
>>
XFS_BL
I
_SHIFT
;
first_bit
=
first
>>
XFS_BL
F
_SHIFT
;
last_bit
=
last
>>
XFS_BL
F
_SHIFT
;
/*
* Calculate the total number of bits to be set.
...
...
fs/xfs/xfs_buf_item.h
浏览文件 @
88e88374
...
...
@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format {
* This flag indicates that the buffer contains on disk inodes
* and requires special recovery handling.
*/
#define XFS_BL
I
_INODE_BUF 0x1
#define XFS_BL
F
_INODE_BUF 0x1
/*
* This flag indicates that the buffer should not be replayed
* during recovery because its blocks are being freed.
*/
#define XFS_BL
I
_CANCEL 0x2
#define XFS_BL
F
_CANCEL 0x2
/*
* This flag indicates that the buffer contains on disk
* user or group dquots and may require special recovery handling.
*/
#define XFS_BL
I
_UDQUOT_BUF 0x4
#define XFS_BL
I
_PDQUOT_BUF 0x8
#define XFS_BL
I
_GDQUOT_BUF 0x10
#define XFS_BL
F
_UDQUOT_BUF 0x4
#define XFS_BL
F
_PDQUOT_BUF 0x8
#define XFS_BL
F
_GDQUOT_BUF 0x10
#define XFS_BL
I
_CHUNK 128
#define XFS_BL
I
_SHIFT 7
#define XFS_BL
F
_CHUNK 128
#define XFS_BL
F
_SHIFT 7
#define BIT_TO_WORD_SHIFT 5
#define NBWORD (NBBY * sizeof(unsigned int))
...
...
@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format {
#define XFS_BLI_LOGGED 0x08
#define XFS_BLI_INODE_ALLOC_BUF 0x10
#define XFS_BLI_STALE_INODE 0x20
#define XFS_BLI_INODE_BUF 0x40
#define XFS_BLI_FLAGS \
{ XFS_BLI_HOLD, "HOLD" }, \
...
...
@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format {
{ XFS_BLI_STALE, "STALE" }, \
{ XFS_BLI_LOGGED, "LOGGED" }, \
{ XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
{ XFS_BLI_STALE_INODE, "STALE_INODE" }
{ XFS_BLI_STALE_INODE, "STALE_INODE" }, \
{ XFS_BLI_INODE_BUF, "INODE_BUF" }
#ifdef __KERNEL__
...
...
fs/xfs/xfs_error.c
浏览文件 @
88e88374
...
...
@@ -170,7 +170,7 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
va_list
ap
;
#ifdef DEBUG
xfs_panic_mask
|=
XFS_PTAG_SHUTDOWN_CORRUPT
;
xfs_panic_mask
|=
(
XFS_PTAG_SHUTDOWN_CORRUPT
|
XFS_PTAG_LOGRES
)
;
#endif
if
(
xfs_panic_mask
&&
(
xfs_panic_mask
&
panic_tag
)
...
...
fs/xfs/xfs_log.c
浏览文件 @
88e88374
...
...
@@ -54,9 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
STATIC
int
xlog_space_left
(
xlog_t
*
log
,
int
cycle
,
int
bytes
);
STATIC
int
xlog_sync
(
xlog_t
*
log
,
xlog_in_core_t
*
iclog
);
STATIC
void
xlog_dealloc_log
(
xlog_t
*
log
);
STATIC
int
xlog_write
(
struct
log
*
log
,
struct
xfs_log_vec
*
log_vector
,
struct
xlog_ticket
*
tic
,
xfs_lsn_t
*
start_lsn
,
xlog_in_core_t
**
commit_iclog
,
uint
flags
);
/* local state machine functions */
STATIC
void
xlog_state_done_syncing
(
xlog_in_core_t
*
iclog
,
int
);
...
...
@@ -86,14 +83,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log,
STATIC
void
xlog_ungrant_log_space
(
xlog_t
*
log
,
xlog_ticket_t
*
ticket
);
/* local ticket functions */
STATIC
xlog_ticket_t
*
xlog_ticket_alloc
(
xlog_t
*
log
,
int
unit_bytes
,
int
count
,
char
clientid
,
uint
flags
);
#if defined(DEBUG)
STATIC
void
xlog_verify_dest_ptr
(
xlog_t
*
log
,
char
*
ptr
);
STATIC
void
xlog_verify_grant_head
(
xlog_t
*
log
,
int
equals
);
...
...
@@ -360,6 +349,15 @@ xfs_log_reserve(
ASSERT
(
flags
&
XFS_LOG_PERM_RESERV
);
internal_ticket
=
*
ticket
;
/*
* this is a new transaction on the ticket, so we need to
* change the transaction ID so that the next transaction has a
* different TID in the log. Just add one to the existing tid
* so that we can see chains of rolling transactions in the log
* easily.
*/
internal_ticket
->
t_tid
++
;
trace_xfs_log_reserve
(
log
,
internal_ticket
);
xlog_grant_push_ail
(
mp
,
internal_ticket
->
t_unit_res
);
...
...
@@ -367,7 +365,8 @@ xfs_log_reserve(
}
else
{
/* may sleep if need to allocate more tickets */
internal_ticket
=
xlog_ticket_alloc
(
log
,
unit_bytes
,
cnt
,
client
,
flags
);
client
,
flags
,
KM_SLEEP
|
KM_MAYFAIL
);
if
(
!
internal_ticket
)
return
XFS_ERROR
(
ENOMEM
);
internal_ticket
->
t_trans_type
=
t_type
;
...
...
@@ -452,6 +451,13 @@ xfs_log_mount(
/* Normal transactions can now occur */
mp
->
m_log
->
l_flags
&=
~
XLOG_ACTIVE_RECOVERY
;
/*
* Now the log has been fully initialised and we know were our
* space grant counters are, we can initialise the permanent ticket
* needed for delayed logging to work.
*/
xlog_cil_init_post_recovery
(
mp
->
m_log
);
return
0
;
out_destroy_ail:
...
...
@@ -658,6 +664,10 @@ xfs_log_item_init(
item
->
li_ailp
=
mp
->
m_ail
;
item
->
li_type
=
type
;
item
->
li_ops
=
ops
;
item
->
li_lv
=
NULL
;
INIT_LIST_HEAD
(
&
item
->
li_ail
);
INIT_LIST_HEAD
(
&
item
->
li_cil
);
}
/*
...
...
@@ -1168,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp,
*
iclogp
=
log
->
l_iclog
;
/* complete ring */
log
->
l_iclog
->
ic_prev
=
prev_iclog
;
/* re-write 1st prev ptr */
error
=
xlog_cil_init
(
log
);
if
(
error
)
goto
out_free_iclog
;
return
log
;
out_free_iclog:
...
...
@@ -1494,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log)
xlog_in_core_t
*
iclog
,
*
next_iclog
;
int
i
;
xlog_cil_destroy
(
log
);
iclog
=
log
->
l_iclog
;
for
(
i
=
0
;
i
<
log
->
l_iclog_bufs
;
i
++
)
{
sv_destroy
(
&
iclog
->
ic_force_wait
);
...
...
@@ -1536,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log,
* print out info relating to regions written which consume
* the reservation
*/
STATIC
void
xlog_print_tic_res
(
xfs_mount_t
*
mp
,
xlog_ticket_t
*
ticket
)
void
xlog_print_tic_res
(
struct
xfs_mount
*
mp
,
struct
xlog_ticket
*
ticket
)
{
uint
i
;
uint
ophdr_spc
=
ticket
->
t_res_num_ophdrs
*
(
uint
)
sizeof
(
xlog_op_header_t
);
...
...
@@ -1637,6 +1654,10 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
"bad-rtype"
:
res_type_str
[
r_type
-
1
]),
ticket
->
t_res_arr
[
i
].
r_len
);
}
xfs_cmn_err
(
XFS_PTAG_LOGRES
,
CE_ALERT
,
mp
,
"xfs_log_write: reservation ran out. Need to up reservation"
);
xfs_force_shutdown
(
mp
,
SHUTDOWN_CORRUPT_INCORE
);
}
/*
...
...
@@ -1865,7 +1886,7 @@ xlog_write_copy_finish(
* we don't update ic_offset until the end when we know exactly how many
* bytes have been written out.
*/
STATIC
int
int
xlog_write
(
struct
log
*
log
,
struct
xfs_log_vec
*
log_vector
,
...
...
@@ -1889,22 +1910,26 @@ xlog_write(
*
start_lsn
=
0
;
len
=
xlog_write_calc_vec_length
(
ticket
,
log_vector
);
if
(
ticket
->
t_curr_res
<
len
)
{
xlog_print_tic_res
(
log
->
l_mp
,
ticket
);
#ifdef DEBUG
xlog_panic
(
"xfs_log_write: reservation ran out. Need to up reservation"
);
#else
/* Customer configurable panic */
xfs_cmn_err
(
XFS_PTAG_LOGRES
,
CE_ALERT
,
log
->
l_mp
,
"xfs_log_write: reservation ran out. Need to up reservation"
);
if
(
log
->
l_cilp
)
{
/*
* Region headers and bytes are already accounted for.
* We only need to take into account start records and
* split regions in this function.
*/
if
(
ticket
->
t_flags
&
XLOG_TIC_INITED
)
ticket
->
t_curr_res
-=
sizeof
(
xlog_op_header_t
);
/* If we did not panic, shutdown the filesystem */
xfs_force_shutdown
(
log
->
l_mp
,
SHUTDOWN_CORRUPT_INCORE
);
#endif
}
/*
* Commit record headers need to be accounted for. These
* come in as separate writes so are easy to detect.
*/
if
(
flags
&
(
XLOG_COMMIT_TRANS
|
XLOG_UNMOUNT_TRANS
))
ticket
->
t_curr_res
-=
sizeof
(
xlog_op_header_t
);
}
else
ticket
->
t_curr_res
-=
len
;
ticket
->
t_curr_res
-=
len
;
if
(
ticket
->
t_curr_res
<
0
)
xlog_print_tic_res
(
log
->
l_mp
,
ticket
);
index
=
0
;
lv
=
log_vector
;
...
...
@@ -3000,6 +3025,8 @@ _xfs_log_force(
XFS_STATS_INC
(
xs_log_force
);
xlog_cil_push
(
log
,
1
);
spin_lock
(
&
log
->
l_icloglock
);
iclog
=
log
->
l_iclog
;
...
...
@@ -3149,6 +3176,12 @@ _xfs_log_force_lsn(
XFS_STATS_INC
(
xs_log_force
);
if
(
log
->
l_cilp
)
{
lsn
=
xlog_cil_push_lsn
(
log
,
lsn
);
if
(
lsn
==
NULLCOMMITLSN
)
return
0
;
}
try_again:
spin_lock
(
&
log
->
l_icloglock
);
iclog
=
log
->
l_iclog
;
...
...
@@ -3313,22 +3346,30 @@ xfs_log_ticket_get(
return
ticket
;
}
xlog_tid_t
xfs_log_get_trans_ident
(
struct
xfs_trans
*
tp
)
{
return
tp
->
t_ticket
->
t_tid
;
}
/*
* Allocate and initialise a new log ticket.
*/
STATIC
xlog_ticket_t
*
xlog_ticket_t
*
xlog_ticket_alloc
(
struct
log
*
log
,
int
unit_bytes
,
int
cnt
,
char
client
,
uint
xflags
)
uint
xflags
,
int
alloc_flags
)
{
struct
xlog_ticket
*
tic
;
uint
num_headers
;
int
iclog_space
;
tic
=
kmem_zone_zalloc
(
xfs_log_ticket_zone
,
KM_SLEEP
|
KM_MAYFAIL
);
tic
=
kmem_zone_zalloc
(
xfs_log_ticket_zone
,
alloc_flags
);
if
(
!
tic
)
return
NULL
;
...
...
@@ -3647,6 +3688,11 @@ xlog_state_ioerror(
* c. nothing new gets queued up after (a) and (b) are done.
* d. if !logerror, flush the iclogs to disk, then seal them off
* for business.
*
* Note: for delayed logging the !logerror case needs to flush the regions
* held in memory out to the iclogs before flushing them to disk. This needs
* to be done before the log is marked as shutdown, otherwise the flush to the
* iclogs will fail.
*/
int
xfs_log_force_umount
(
...
...
@@ -3680,6 +3726,16 @@ xfs_log_force_umount(
return
1
;
}
retval
=
0
;
/*
* Flush the in memory commit item list before marking the log as
* being shut down. We need to do it in this order to ensure all the
* completed transactions are flushed to disk with the xfs_log_force()
* call below.
*/
if
(
!
logerror
&&
(
mp
->
m_flags
&
XFS_MOUNT_DELAYLOG
))
xlog_cil_push
(
log
,
1
);
/*
* We must hold both the GRANT lock and the LOG lock,
* before we mark the filesystem SHUTDOWN and wake
...
...
fs/xfs/xfs_log.h
浏览文件 @
88e88374
...
...
@@ -19,7 +19,6 @@
#define __XFS_LOG_H__
/* get lsn fields */
#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
#define BLOCK_LSN(lsn) ((uint)(lsn))
...
...
@@ -114,6 +113,9 @@ struct xfs_log_vec {
struct
xfs_log_vec
*
lv_next
;
/* next lv in build list */
int
lv_niovecs
;
/* number of iovecs in lv */
struct
xfs_log_iovec
*
lv_iovecp
;
/* iovec array */
struct
xfs_log_item
*
lv_item
;
/* owner */
char
*
lv_buf
;
/* formatted buffer */
int
lv_buf_len
;
/* size of formatted buffer */
};
/*
...
...
@@ -134,6 +136,7 @@ struct xlog_in_core;
struct
xlog_ticket
;
struct
xfs_log_item
;
struct
xfs_item_ops
;
struct
xfs_trans
;
void
xfs_log_item_init
(
struct
xfs_mount
*
mp
,
struct
xfs_log_item
*
item
,
...
...
@@ -187,9 +190,16 @@ int xfs_log_need_covered(struct xfs_mount *mp);
void
xlog_iodone
(
struct
xfs_buf
*
);
struct
xlog_ticket
*
xfs_log_ticket_get
(
struct
xlog_ticket
*
ticket
);
struct
xlog_ticket
*
xfs_log_ticket_get
(
struct
xlog_ticket
*
ticket
);
void
xfs_log_ticket_put
(
struct
xlog_ticket
*
ticket
);
xlog_tid_t
xfs_log_get_trans_ident
(
struct
xfs_trans
*
tp
);
int
xfs_log_commit_cil
(
struct
xfs_mount
*
mp
,
struct
xfs_trans
*
tp
,
struct
xfs_log_vec
*
log_vector
,
xfs_lsn_t
*
commit_lsn
,
int
flags
);
bool
xfs_log_item_in_current_chkpt
(
struct
xfs_log_item
*
lip
);
#endif
...
...
fs/xfs/xfs_log_cil.c
0 → 100644
浏览文件 @
88e88374
此差异已折叠。
点击以展开。
fs/xfs/xfs_log_priv.h
浏览文件 @
88e88374
...
...
@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_RECOVERY_NEEDED 0x4
/* log was recovered */
#define XLOG_IO_ERROR 0x8
/* log hit an I/O error, and being
shutdown */
typedef
__uint32_t
xlog_tid_t
;
#ifdef __KERNEL__
/*
...
...
@@ -378,6 +376,99 @@ typedef struct xlog_in_core {
#define ic_header ic_data->hic_header
}
xlog_in_core_t
;
/*
* The CIL context is used to aggregate per-transaction details as well be
* passed to the iclog for checkpoint post-commit processing. After being
* passed to the iclog, another context needs to be allocated for tracking the
* next set of transactions to be aggregated into a checkpoint.
*/
struct
xfs_cil
;
struct
xfs_cil_ctx
{
struct
xfs_cil
*
cil
;
xfs_lsn_t
sequence
;
/* chkpt sequence # */
xfs_lsn_t
start_lsn
;
/* first LSN of chkpt commit */
xfs_lsn_t
commit_lsn
;
/* chkpt commit record lsn */
struct
xlog_ticket
*
ticket
;
/* chkpt ticket */
int
nvecs
;
/* number of regions */
int
space_used
;
/* aggregate size of regions */
struct
list_head
busy_extents
;
/* busy extents in chkpt */
struct
xfs_log_vec
*
lv_chain
;
/* logvecs being pushed */
xfs_log_callback_t
log_cb
;
/* completion callback hook. */
struct
list_head
committing
;
/* ctx committing list */
};
/*
* Committed Item List structure
*
* This structure is used to track log items that have been committed but not
* yet written into the log. It is used only when the delayed logging mount
* option is enabled.
*
* This structure tracks the list of committing checkpoint contexts so
* we can avoid the problem of having to hold out new transactions during a
* flush until we have a the commit record LSN of the checkpoint. We can
* traverse the list of committing contexts in xlog_cil_push_lsn() to find a
* sequence match and extract the commit LSN directly from there. If the
* checkpoint is still in the process of committing, we can block waiting for
* the commit LSN to be determined as well. This should make synchronous
* operations almost as efficient as the old logging methods.
*/
struct
xfs_cil
{
struct
log
*
xc_log
;
struct
list_head
xc_cil
;
spinlock_t
xc_cil_lock
;
struct
xfs_cil_ctx
*
xc_ctx
;
struct
rw_semaphore
xc_ctx_lock
;
struct
list_head
xc_committing
;
sv_t
xc_commit_wait
;
};
/*
* The amount of log space we should the CIL to aggregate is difficult to size.
* Whatever we chose we have to make we can get a reservation for the log space
* effectively, that it is large enough to capture sufficient relogging to
* reduce log buffer IO significantly, but it is not too large for the log or
* induces too much latency when writing out through the iclogs. We track both
* space consumed and the number of vectors in the checkpoint context, so we
* need to decide which to use for limiting.
*
* Every log buffer we write out during a push needs a header reserved, which
* is at least one sector and more for v2 logs. Hence we need a reservation of
* at least 512 bytes per 32k of log space just for the LR headers. That means
* 16KB of reservation per megabyte of delayed logging space we will consume,
* plus various headers. The number of headers will vary based on the num of
* io vectors, so limiting on a specific number of vectors is going to result
* in transactions of varying size. IOWs, it is more consistent to track and
* limit space consumed in the log rather than by the number of objects being
* logged in order to prevent checkpoint ticket overruns.
*
* Further, use of static reservations through the log grant mechanism is
* problematic. It introduces a lot of complexity (e.g. reserve grant vs write
* grant) and a significant deadlock potential because regranting write space
* can block on log pushes. Hence if we have to regrant log space during a log
* push, we can deadlock.
*
* However, we can avoid this by use of a dynamic "reservation stealing"
* technique during transaction commit whereby unused reservation space in the
* transaction ticket is transferred to the CIL ctx commit ticket to cover the
* space needed by the checkpoint transaction. This means that we never need to
* specifically reserve space for the CIL checkpoint transaction, nor do we
* need to regrant space once the checkpoint completes. This also means the
* checkpoint transaction ticket is specific to the checkpoint context, rather
* than the CIL itself.
*
* With dynamic reservations, we can basically make up arbitrary limits for the
* checkpoint size so long as they don't violate any other size rules. Hence
* the initial maximum size for the checkpoint transaction will be set to a
* quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
* right now based on the latency of writing out a large amount of data through
* the circular iclog buffers.
*/
#define XLOG_CIL_SPACE_LIMIT(log) \
(min((log->l_logsize >> 2), (8 * 1024 * 1024)))
/*
* The reservation head lsn is not made up of a cycle number and block number.
* Instead, it uses a cycle number and byte number. Logs don't expect to
...
...
@@ -388,6 +479,7 @@ typedef struct log {
/* The following fields don't need locking */
struct
xfs_mount
*
l_mp
;
/* mount point */
struct
xfs_ail
*
l_ailp
;
/* AIL log is working with */
struct
xfs_cil
*
l_cilp
;
/* CIL log is working with */
struct
xfs_buf
*
l_xbuf
;
/* extra buffer for log
* wrapping */
struct
xfs_buftarg
*
l_targ
;
/* buftarg of log */
...
...
@@ -438,14 +530,17 @@ typedef struct log {
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
/* common routines */
extern
xfs_lsn_t
xlog_assign_tail_lsn
(
struct
xfs_mount
*
mp
);
extern
int
xlog_recover
(
xlog_t
*
log
);
extern
int
xlog_recover_finish
(
xlog_t
*
log
);
extern
void
xlog_pack_data
(
xlog_t
*
log
,
xlog_in_core_t
*
iclog
,
int
);
extern
kmem_zone_t
*
xfs_log_ticket_zone
;
extern
kmem_zone_t
*
xfs_log_ticket_zone
;
struct
xlog_ticket
*
xlog_ticket_alloc
(
struct
log
*
log
,
int
unit_bytes
,
int
count
,
char
client
,
uint
xflags
,
int
alloc_flags
);
static
inline
void
xlog_write_adv_cnt
(
void
**
ptr
,
int
*
len
,
int
*
off
,
size_t
bytes
)
...
...
@@ -455,6 +550,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
*
off
+=
bytes
;
}
void
xlog_print_tic_res
(
struct
xfs_mount
*
mp
,
struct
xlog_ticket
*
ticket
);
int
xlog_write
(
struct
log
*
log
,
struct
xfs_log_vec
*
log_vector
,
struct
xlog_ticket
*
tic
,
xfs_lsn_t
*
start_lsn
,
xlog_in_core_t
**
commit_iclog
,
uint
flags
);
/*
* Committed Item List interfaces
*/
int
xlog_cil_init
(
struct
log
*
log
);
void
xlog_cil_init_post_recovery
(
struct
log
*
log
);
void
xlog_cil_destroy
(
struct
log
*
log
);
int
xlog_cil_push
(
struct
log
*
log
,
int
push_now
);
xfs_lsn_t
xlog_cil_push_lsn
(
struct
log
*
log
,
xfs_lsn_t
push_sequence
);
/*
* Unmount record type is used as a pseudo transaction type for the ticket.
* It's value must be outside the range of XFS_TRANS_* values.
...
...
fs/xfs/xfs_log_recover.c
浏览文件 @
88e88374
...
...
@@ -1576,7 +1576,7 @@ xlog_recover_reorder_trans(
switch
(
ITEM_TYPE
(
item
))
{
case
XFS_LI_BUF
:
if
(
!
(
buf_f
->
blf_flags
&
XFS_BL
I
_CANCEL
))
{
if
(
!
(
buf_f
->
blf_flags
&
XFS_BL
F
_CANCEL
))
{
trace_xfs_log_recover_item_reorder_head
(
log
,
trans
,
item
,
pass
);
list_move
(
&
item
->
ri_list
,
&
trans
->
r_itemq
);
...
...
@@ -1638,7 +1638,7 @@ xlog_recover_do_buffer_pass1(
/*
* If this isn't a cancel buffer item, then just return.
*/
if
(
!
(
flags
&
XFS_BL
I
_CANCEL
))
{
if
(
!
(
flags
&
XFS_BL
F
_CANCEL
))
{
trace_xfs_log_recover_buf_not_cancel
(
log
,
buf_f
);
return
;
}
...
...
@@ -1696,7 +1696,7 @@ xlog_recover_do_buffer_pass1(
* Check to see whether the buffer being recovered has a corresponding
* entry in the buffer cancel record table. If it does then return 1
* so that it will be cancelled, otherwise return 0. If the buffer is
* actually a buffer cancel item (XFS_BL
I
_CANCEL is set), then decrement
* actually a buffer cancel item (XFS_BL
F
_CANCEL is set), then decrement
* the refcount on the entry in the table and remove it from the table
* if this is the last reference.
*
...
...
@@ -1721,7 +1721,7 @@ xlog_check_buffer_cancelled(
* There is nothing in the table built in pass one,
* so this buffer must not be cancelled.
*/
ASSERT
(
!
(
flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
flags
&
XFS_BL
F
_CANCEL
));
return
0
;
}
...
...
@@ -1733,7 +1733,7 @@ xlog_check_buffer_cancelled(
* There is no corresponding entry in the table built
* in pass one, so this buffer has not been cancelled.
*/
ASSERT
(
!
(
flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
flags
&
XFS_BL
F
_CANCEL
));
return
0
;
}
...
...
@@ -1752,7 +1752,7 @@ xlog_check_buffer_cancelled(
* one in the table and remove it if this is the
* last reference.
*/
if
(
flags
&
XFS_BL
I
_CANCEL
)
{
if
(
flags
&
XFS_BL
F
_CANCEL
)
{
bcp
->
bc_refcount
--
;
if
(
bcp
->
bc_refcount
==
0
)
{
if
(
prevp
==
NULL
)
{
...
...
@@ -1772,7 +1772,7 @@ xlog_check_buffer_cancelled(
* We didn't find a corresponding entry in the table, so
* return 0 so that the buffer is NOT cancelled.
*/
ASSERT
(
!
(
flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
flags
&
XFS_BL
F
_CANCEL
));
return
0
;
}
...
...
@@ -1874,8 +1874,8 @@ xlog_recover_do_inode_buffer(
nbits
=
xfs_contig_bits
(
data_map
,
map_size
,
bit
);
ASSERT
(
nbits
>
0
);
reg_buf_offset
=
bit
<<
XFS_BL
I
_SHIFT
;
reg_buf_bytes
=
nbits
<<
XFS_BL
I
_SHIFT
;
reg_buf_offset
=
bit
<<
XFS_BL
F
_SHIFT
;
reg_buf_bytes
=
nbits
<<
XFS_BL
F
_SHIFT
;
item_index
++
;
}
...
...
@@ -1889,7 +1889,7 @@ xlog_recover_do_inode_buffer(
}
ASSERT
(
item
->
ri_buf
[
item_index
].
i_addr
!=
NULL
);
ASSERT
((
item
->
ri_buf
[
item_index
].
i_len
%
XFS_BL
I
_CHUNK
)
==
0
);
ASSERT
((
item
->
ri_buf
[
item_index
].
i_len
%
XFS_BL
F
_CHUNK
)
==
0
);
ASSERT
((
reg_buf_offset
+
reg_buf_bytes
)
<=
XFS_BUF_COUNT
(
bp
));
/*
...
...
@@ -1955,9 +1955,9 @@ xlog_recover_do_reg_buffer(
nbits
=
xfs_contig_bits
(
data_map
,
map_size
,
bit
);
ASSERT
(
nbits
>
0
);
ASSERT
(
item
->
ri_buf
[
i
].
i_addr
!=
NULL
);
ASSERT
(
item
->
ri_buf
[
i
].
i_len
%
XFS_BL
I
_CHUNK
==
0
);
ASSERT
(
item
->
ri_buf
[
i
].
i_len
%
XFS_BL
F
_CHUNK
==
0
);
ASSERT
(
XFS_BUF_COUNT
(
bp
)
>=
((
uint
)
bit
<<
XFS_BL
I_SHIFT
)
+
(
nbits
<<
XFS_BLI
_SHIFT
));
((
uint
)
bit
<<
XFS_BL
F_SHIFT
)
+
(
nbits
<<
XFS_BLF
_SHIFT
));
/*
* Do a sanity check if this is a dquot buffer. Just checking
...
...
@@ -1966,7 +1966,7 @@ xlog_recover_do_reg_buffer(
*/
error
=
0
;
if
(
buf_f
->
blf_flags
&
(
XFS_BL
I_UDQUOT_BUF
|
XFS_BLI_PDQUOT_BUF
|
XFS_BLI
_GDQUOT_BUF
))
{
(
XFS_BL
F_UDQUOT_BUF
|
XFS_BLF_PDQUOT_BUF
|
XFS_BLF
_GDQUOT_BUF
))
{
if
(
item
->
ri_buf
[
i
].
i_addr
==
NULL
)
{
cmn_err
(
CE_ALERT
,
"XFS: NULL dquot in %s."
,
__func__
);
...
...
@@ -1987,9 +1987,9 @@ xlog_recover_do_reg_buffer(
}
memcpy
(
xfs_buf_offset
(
bp
,
(
uint
)
bit
<<
XFS_BL
I
_SHIFT
),
/* dest */
(
uint
)
bit
<<
XFS_BL
F
_SHIFT
),
/* dest */
item
->
ri_buf
[
i
].
i_addr
,
/* source */
nbits
<<
XFS_BL
I
_SHIFT
);
/* length */
nbits
<<
XFS_BL
F
_SHIFT
);
/* length */
next:
i
++
;
bit
+=
nbits
;
...
...
@@ -2148,11 +2148,11 @@ xlog_recover_do_dquot_buffer(
}
type
=
0
;
if
(
buf_f
->
blf_flags
&
XFS_BL
I
_UDQUOT_BUF
)
if
(
buf_f
->
blf_flags
&
XFS_BL
F
_UDQUOT_BUF
)
type
|=
XFS_DQ_USER
;
if
(
buf_f
->
blf_flags
&
XFS_BL
I
_PDQUOT_BUF
)
if
(
buf_f
->
blf_flags
&
XFS_BL
F
_PDQUOT_BUF
)
type
|=
XFS_DQ_PROJ
;
if
(
buf_f
->
blf_flags
&
XFS_BL
I
_GDQUOT_BUF
)
if
(
buf_f
->
blf_flags
&
XFS_BL
F
_GDQUOT_BUF
)
type
|=
XFS_DQ_GROUP
;
/*
* This type of quotas was turned off, so ignore this buffer
...
...
@@ -2173,7 +2173,7 @@ xlog_recover_do_dquot_buffer(
* here which overlaps that may be stale.
*
* When meta-data buffers are freed at run time we log a buffer item
* with the XFS_BL
I
_CANCEL bit set to indicate that previous copies
* with the XFS_BL
F
_CANCEL bit set to indicate that previous copies
* of the buffer in the log should not be replayed at recovery time.
* This is so that if the blocks covered by the buffer are reused for
* file data before we crash we don't end up replaying old, freed
...
...
@@ -2207,7 +2207,7 @@ xlog_recover_do_buffer_trans(
if
(
pass
==
XLOG_RECOVER_PASS1
)
{
/*
* In this pass we're only looking for buf items
* with the XFS_BL
I
_CANCEL bit set.
* with the XFS_BL
F
_CANCEL bit set.
*/
xlog_recover_do_buffer_pass1
(
log
,
buf_f
);
return
0
;
...
...
@@ -2244,7 +2244,7 @@ xlog_recover_do_buffer_trans(
mp
=
log
->
l_mp
;
buf_flags
=
XBF_LOCK
;
if
(
!
(
flags
&
XFS_BL
I
_INODE_BUF
))
if
(
!
(
flags
&
XFS_BL
F
_INODE_BUF
))
buf_flags
|=
XBF_MAPPED
;
bp
=
xfs_buf_read
(
mp
->
m_ddev_targp
,
blkno
,
len
,
buf_flags
);
...
...
@@ -2257,10 +2257,10 @@ xlog_recover_do_buffer_trans(
}
error
=
0
;
if
(
flags
&
XFS_BL
I
_INODE_BUF
)
{
if
(
flags
&
XFS_BL
F
_INODE_BUF
)
{
error
=
xlog_recover_do_inode_buffer
(
mp
,
item
,
bp
,
buf_f
);
}
else
if
(
flags
&
(
XFS_BL
I_UDQUOT_BUF
|
XFS_BLI_PDQUOT_BUF
|
XFS_BLI
_GDQUOT_BUF
))
{
(
XFS_BL
F_UDQUOT_BUF
|
XFS_BLF_PDQUOT_BUF
|
XFS_BLF
_GDQUOT_BUF
))
{
xlog_recover_do_dquot_buffer
(
mp
,
log
,
item
,
bp
,
buf_f
);
}
else
{
xlog_recover_do_reg_buffer
(
mp
,
item
,
bp
,
buf_f
);
...
...
fs/xfs/xfs_log_recover.h
浏览文件 @
88e88374
...
...
@@ -28,7 +28,7 @@
#define XLOG_RHASH(tid) \
((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BL
I
_CHUNK / 2 + 1)
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BL
F
_CHUNK / 2 + 1)
/*
...
...
fs/xfs/xfs_mount.h
浏览文件 @
88e88374
...
...
@@ -268,6 +268,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0)
/* for nfs - all metadata ops
must be synchronous except
for space allocations */
#define XFS_MOUNT_DELAYLOG (1ULL << 1)
/* delayed logging is enabled */
#define XFS_MOUNT_DMAPI (1ULL << 2)
/* dmapi is enabled */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4)
/* atomic stop of all filesystem
...
...
fs/xfs/xfs_trans.c
浏览文件 @
88e88374
...
...
@@ -44,6 +44,7 @@
#include "xfs_trans_priv.h"
#include "xfs_trans_space.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
kmem_zone_t
*
xfs_trans_zone
;
...
...
@@ -243,9 +244,8 @@ _xfs_trans_alloc(
tp
->
t_type
=
type
;
tp
->
t_mountp
=
mp
;
tp
->
t_items_free
=
XFS_LIC_NUM_SLOTS
;
tp
->
t_busy_free
=
XFS_LBC_NUM_SLOTS
;
xfs_lic_init
(
&
(
tp
->
t_items
));
XFS_LBC_INIT
(
&
(
tp
->
t_busy
)
);
INIT_LIST_HEAD
(
&
tp
->
t_busy
);
return
tp
;
}
...
...
@@ -255,8 +255,13 @@ _xfs_trans_alloc(
*/
STATIC
void
xfs_trans_free
(
xfs_trans_t
*
tp
)
struct
xfs_trans
*
tp
)
{
struct
xfs_busy_extent
*
busyp
,
*
n
;
list_for_each_entry_safe
(
busyp
,
n
,
&
tp
->
t_busy
,
list
)
xfs_alloc_busy_clear
(
tp
->
t_mountp
,
busyp
);
atomic_dec
(
&
tp
->
t_mountp
->
m_active_trans
);
xfs_trans_free_dqinfo
(
tp
);
kmem_zone_free
(
xfs_trans_zone
,
tp
);
...
...
@@ -285,9 +290,8 @@ xfs_trans_dup(
ntp
->
t_type
=
tp
->
t_type
;
ntp
->
t_mountp
=
tp
->
t_mountp
;
ntp
->
t_items_free
=
XFS_LIC_NUM_SLOTS
;
ntp
->
t_busy_free
=
XFS_LBC_NUM_SLOTS
;
xfs_lic_init
(
&
(
ntp
->
t_items
));
XFS_LBC_INIT
(
&
(
ntp
->
t_busy
)
);
INIT_LIST_HEAD
(
&
ntp
->
t_busy
);
ASSERT
(
tp
->
t_flags
&
XFS_TRANS_PERM_LOG_RES
);
ASSERT
(
tp
->
t_ticket
!=
NULL
);
...
...
@@ -423,7 +427,6 @@ xfs_trans_reserve(
return
error
;
}
/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
...
...
@@ -652,7 +655,7 @@ xfs_trans_apply_sb_deltas(
* XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
* still need to update the incore superblock with the changes.
*/
STATIC
void
void
xfs_trans_unreserve_and_mod_sb
(
xfs_trans_t
*
tp
)
{
...
...
@@ -880,7 +883,7 @@ xfs_trans_fill_vecs(
* they could be immediately flushed and we'd have to race with the flusher
* trying to pull the item from the AIL as we add it.
*/
static
void
void
xfs_trans_item_committed
(
struct
xfs_log_item
*
lip
,
xfs_lsn_t
commit_lsn
,
...
...
@@ -930,26 +933,6 @@ xfs_trans_item_committed(
IOP_UNPIN
(
lip
);
}
/* Clear all the per-AG busy list items listed in this transaction */
static
void
xfs_trans_clear_busy_extents
(
struct
xfs_trans
*
tp
)
{
xfs_log_busy_chunk_t
*
lbcp
;
xfs_log_busy_slot_t
*
lbsp
;
int
i
;
for
(
lbcp
=
&
tp
->
t_busy
;
lbcp
!=
NULL
;
lbcp
=
lbcp
->
lbc_next
)
{
i
=
0
;
for
(
lbsp
=
lbcp
->
lbc_busy
;
i
<
lbcp
->
lbc_unused
;
i
++
,
lbsp
++
)
{
if
(
XFS_LBC_ISFREE
(
lbcp
,
i
))
continue
;
xfs_alloc_clear_busy
(
tp
,
lbsp
->
lbc_ag
,
lbsp
->
lbc_idx
);
}
}
xfs_trans_free_busy
(
tp
);
}
/*
* This is typically called by the LM when a transaction has been fully
* committed to disk. It needs to unpin the items which have
...
...
@@ -984,7 +967,6 @@ xfs_trans_committed(
kmem_free
(
licp
);
}
xfs_trans_clear_busy_extents
(
tp
);
xfs_trans_free
(
tp
);
}
...
...
@@ -1012,8 +994,7 @@ xfs_trans_uncommit(
xfs_trans_unreserve_and_mod_sb
(
tp
);
xfs_trans_unreserve_and_mod_dquots
(
tp
);
xfs_trans_free_items
(
tp
,
flags
);
xfs_trans_free_busy
(
tp
);
xfs_trans_free_items
(
tp
,
NULLCOMMITLSN
,
flags
);
xfs_trans_free
(
tp
);
}
...
...
@@ -1075,6 +1056,8 @@ xfs_trans_commit_iclog(
*
commit_lsn
=
xfs_log_done
(
mp
,
tp
->
t_ticket
,
&
commit_iclog
,
log_flags
);
tp
->
t_commit_lsn
=
*
commit_lsn
;
trace_xfs_trans_commit_lsn
(
tp
);
if
(
nvec
>
XFS_TRANS_LOGVEC_COUNT
)
kmem_free
(
log_vector
);
...
...
@@ -1161,6 +1144,93 @@ xfs_trans_commit_iclog(
return
xfs_log_release_iclog
(
mp
,
commit_iclog
);
}
/*
* Walk the log items and allocate log vector structures for
* each item large enough to fit all the vectors they require.
* Note that this format differs from the old log vector format in
* that there is no transaction header in these log vectors.
*/
STATIC
struct
xfs_log_vec
*
xfs_trans_alloc_log_vecs
(
xfs_trans_t
*
tp
)
{
xfs_log_item_desc_t
*
lidp
;
struct
xfs_log_vec
*
lv
=
NULL
;
struct
xfs_log_vec
*
ret_lv
=
NULL
;
lidp
=
xfs_trans_first_item
(
tp
);
/* Bail out if we didn't find a log item. */
if
(
!
lidp
)
{
ASSERT
(
0
);
return
NULL
;
}
while
(
lidp
!=
NULL
)
{
struct
xfs_log_vec
*
new_lv
;
/* Skip items which aren't dirty in this transaction. */
if
(
!
(
lidp
->
lid_flags
&
XFS_LID_DIRTY
))
{
lidp
=
xfs_trans_next_item
(
tp
,
lidp
);
continue
;
}
/* Skip items that do not have any vectors for writing */
lidp
->
lid_size
=
IOP_SIZE
(
lidp
->
lid_item
);
if
(
!
lidp
->
lid_size
)
{
lidp
=
xfs_trans_next_item
(
tp
,
lidp
);
continue
;
}
new_lv
=
kmem_zalloc
(
sizeof
(
*
new_lv
)
+
lidp
->
lid_size
*
sizeof
(
struct
xfs_log_iovec
),
KM_SLEEP
);
/* The allocated iovec region lies beyond the log vector. */
new_lv
->
lv_iovecp
=
(
struct
xfs_log_iovec
*
)
&
new_lv
[
1
];
new_lv
->
lv_niovecs
=
lidp
->
lid_size
;
new_lv
->
lv_item
=
lidp
->
lid_item
;
if
(
!
ret_lv
)
ret_lv
=
new_lv
;
else
lv
->
lv_next
=
new_lv
;
lv
=
new_lv
;
lidp
=
xfs_trans_next_item
(
tp
,
lidp
);
}
return
ret_lv
;
}
static
int
xfs_trans_commit_cil
(
struct
xfs_mount
*
mp
,
struct
xfs_trans
*
tp
,
xfs_lsn_t
*
commit_lsn
,
int
flags
)
{
struct
xfs_log_vec
*
log_vector
;
int
error
;
/*
* Get each log item to allocate a vector structure for
* the log item to to pass to the log write code. The
* CIL commit code will format the vector and save it away.
*/
log_vector
=
xfs_trans_alloc_log_vecs
(
tp
);
if
(
!
log_vector
)
return
ENOMEM
;
error
=
xfs_log_commit_cil
(
mp
,
tp
,
log_vector
,
commit_lsn
,
flags
);
if
(
error
)
return
error
;
current_restore_flags_nested
(
&
tp
->
t_pflags
,
PF_FSTRANS
);
/* xfs_trans_free_items() unlocks them first */
xfs_trans_free_items
(
tp
,
*
commit_lsn
,
0
);
xfs_trans_free
(
tp
);
return
0
;
}
/*
* xfs_trans_commit
...
...
@@ -1221,7 +1291,11 @@ _xfs_trans_commit(
xfs_trans_apply_sb_deltas
(
tp
);
xfs_trans_apply_dquot_deltas
(
tp
);
error
=
xfs_trans_commit_iclog
(
mp
,
tp
,
&
commit_lsn
,
flags
);
if
(
mp
->
m_flags
&
XFS_MOUNT_DELAYLOG
)
error
=
xfs_trans_commit_cil
(
mp
,
tp
,
&
commit_lsn
,
flags
);
else
error
=
xfs_trans_commit_iclog
(
mp
,
tp
,
&
commit_lsn
,
flags
);
if
(
error
==
ENOMEM
)
{
xfs_force_shutdown
(
mp
,
SHUTDOWN_LOG_IO_ERROR
);
error
=
XFS_ERROR
(
EIO
);
...
...
@@ -1259,8 +1333,7 @@ _xfs_trans_commit(
error
=
XFS_ERROR
(
EIO
);
}
current_restore_flags_nested
(
&
tp
->
t_pflags
,
PF_FSTRANS
);
xfs_trans_free_items
(
tp
,
error
?
XFS_TRANS_ABORT
:
0
);
xfs_trans_free_busy
(
tp
);
xfs_trans_free_items
(
tp
,
NULLCOMMITLSN
,
error
?
XFS_TRANS_ABORT
:
0
);
xfs_trans_free
(
tp
);
XFS_STATS_INC
(
xs_trans_empty
);
...
...
@@ -1338,8 +1411,7 @@ xfs_trans_cancel(
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested
(
&
tp
->
t_pflags
,
PF_FSTRANS
);
xfs_trans_free_items
(
tp
,
flags
);
xfs_trans_free_busy
(
tp
);
xfs_trans_free_items
(
tp
,
NULLCOMMITLSN
,
flags
);
xfs_trans_free
(
tp
);
}
...
...
fs/xfs/xfs_trans.h
浏览文件 @
88e88374
...
...
@@ -106,7 +106,8 @@ typedef struct xfs_trans_header {
#define XFS_TRANS_GROWFSRT_FREE 39
#define XFS_TRANS_SWAPEXT 40
#define XFS_TRANS_SB_COUNT 41
#define XFS_TRANS_TYPE_MAX 41
#define XFS_TRANS_CHECKPOINT 42
#define XFS_TRANS_TYPE_MAX 42
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
...
...
@@ -148,6 +149,7 @@ typedef struct xfs_trans_header {
{ XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
{ XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
{ XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
{ XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
{ XFS_TRANS_DUMMY1, "DUMMY1" }, \
{ XFS_TRANS_DUMMY2, "DUMMY2" }, \
{ XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
...
...
@@ -813,6 +815,7 @@ struct xfs_log_item_desc;
struct
xfs_mount
;
struct
xfs_trans
;
struct
xfs_dquot_acct
;
struct
xfs_busy_extent
;
typedef
struct
xfs_log_item
{
struct
list_head
li_ail
;
/* AIL pointers */
...
...
@@ -828,6 +831,11 @@ typedef struct xfs_log_item {
/* buffer item iodone */
/* callback func */
struct
xfs_item_ops
*
li_ops
;
/* function list */
/* delayed logging */
struct
list_head
li_cil
;
/* CIL pointers */
struct
xfs_log_vec
*
li_lv
;
/* active log vector */
xfs_lsn_t
li_seq
;
/* CIL commit seq */
}
xfs_log_item_t
;
#define XFS_LI_IN_AIL 0x1
...
...
@@ -871,34 +879,6 @@ typedef struct xfs_item_ops {
#define XFS_ITEM_LOCKED 2
#define XFS_ITEM_PUSHBUF 3
/*
* This structure is used to maintain a list of block ranges that have been
* freed in the transaction. The ranges are listed in the perag[] busy list
* between when they're freed and the transaction is committed to disk.
*/
typedef
struct
xfs_log_busy_slot
{
xfs_agnumber_t
lbc_ag
;
ushort
lbc_idx
;
/* index in perag.busy[] */
}
xfs_log_busy_slot_t
;
#define XFS_LBC_NUM_SLOTS 31
typedef
struct
xfs_log_busy_chunk
{
struct
xfs_log_busy_chunk
*
lbc_next
;
uint
lbc_free
;
/* free slots bitmask */
ushort
lbc_unused
;
/* first unused */
xfs_log_busy_slot_t
lbc_busy
[
XFS_LBC_NUM_SLOTS
];
}
xfs_log_busy_chunk_t
;
#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
/*
* This is the type of function which can be given to xfs_trans_callback()
* to be called upon the transaction's commit to disk.
...
...
@@ -950,8 +930,7 @@ typedef struct xfs_trans {
unsigned
int
t_items_free
;
/* log item descs free */
xfs_log_item_chunk_t
t_items
;
/* first log item desc chunk */
xfs_trans_header_t
t_header
;
/* header for in-log trans */
unsigned
int
t_busy_free
;
/* busy descs free */
xfs_log_busy_chunk_t
t_busy
;
/* busy/async free blocks */
struct
list_head
t_busy
;
/* list of busy extents */
unsigned
long
t_pflags
;
/* saved process flags state */
}
xfs_trans_t
;
...
...
@@ -1025,9 +1004,6 @@ int _xfs_trans_commit(xfs_trans_t *,
void
xfs_trans_cancel
(
xfs_trans_t
*
,
int
);
int
xfs_trans_ail_init
(
struct
xfs_mount
*
);
void
xfs_trans_ail_destroy
(
struct
xfs_mount
*
);
xfs_log_busy_slot_t
*
xfs_trans_add_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
ag
,
xfs_extlen_t
idx
);
extern
kmem_zone_t
*
xfs_trans_zone
;
...
...
fs/xfs/xfs_trans_buf.c
浏览文件 @
88e88374
...
...
@@ -114,7 +114,7 @@ _xfs_trans_bjoin(
xfs_buf_item_init
(
bp
,
tp
->
t_mountp
);
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
!
(
bip
->
bli_flags
&
XFS_BLI_STALE
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
));
ASSERT
(
!
(
bip
->
bli_flags
&
XFS_BLI_LOGGED
));
if
(
reset_recur
)
bip
->
bli_recur
=
0
;
...
...
@@ -511,7 +511,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
bip
->
bli_item
.
li_type
==
XFS_LI_BUF
);
ASSERT
(
!
(
bip
->
bli_flags
&
XFS_BLI_STALE
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
));
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
/*
...
...
@@ -619,7 +619,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
!
(
bip
->
bli_flags
&
XFS_BLI_STALE
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
));
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
bip
->
bli_flags
|=
XFS_BLI_HOLD
;
trace_xfs_trans_bhold
(
bip
);
...
...
@@ -641,7 +641,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
!
(
bip
->
bli_flags
&
XFS_BLI_STALE
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
));
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
ASSERT
(
bip
->
bli_flags
&
XFS_BLI_HOLD
);
bip
->
bli_flags
&=
~
XFS_BLI_HOLD
;
...
...
@@ -704,7 +704,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
bip
->
bli_flags
&=
~
XFS_BLI_STALE
;
ASSERT
(
XFS_BUF_ISSTALE
(
bp
));
XFS_BUF_UNSTALE
(
bp
);
bip
->
bli_format
.
blf_flags
&=
~
XFS_BL
I
_CANCEL
;
bip
->
bli_format
.
blf_flags
&=
~
XFS_BL
F
_CANCEL
;
}
lidp
=
xfs_trans_find_item
(
tp
,
(
xfs_log_item_t
*
)
bip
);
...
...
@@ -762,8 +762,8 @@ xfs_trans_binval(
ASSERT
(
!
(
XFS_BUF_ISDELAYWRITE
(
bp
)));
ASSERT
(
XFS_BUF_ISSTALE
(
bp
));
ASSERT
(
!
(
bip
->
bli_flags
&
(
XFS_BLI_LOGGED
|
XFS_BLI_DIRTY
)));
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_INODE_BUF
));
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
I
_CANCEL
);
ASSERT
(
!
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_INODE_BUF
));
ASSERT
(
bip
->
bli_format
.
blf_flags
&
XFS_BL
F
_CANCEL
);
ASSERT
(
lidp
->
lid_flags
&
XFS_LID_DIRTY
);
ASSERT
(
tp
->
t_flags
&
XFS_TRANS_DIRTY
);
return
;
...
...
@@ -774,7 +774,7 @@ xfs_trans_binval(
* in the buf log item. The STALE flag will be used in
* xfs_buf_item_unpin() to determine if it should clean up
* when the last reference to the buf item is given up.
* We set the XFS_BL
I
_CANCEL flag in the buf log format structure
* We set the XFS_BL
F
_CANCEL flag in the buf log format structure
* and log the buf item. This will be used at recovery time
* to determine that copies of the buffer in the log before
* this should not be replayed.
...
...
@@ -792,9 +792,9 @@ xfs_trans_binval(
XFS_BUF_UNDELAYWRITE
(
bp
);
XFS_BUF_STALE
(
bp
);
bip
->
bli_flags
|=
XFS_BLI_STALE
;
bip
->
bli_flags
&=
~
(
XFS_BLI_LOGGED
|
XFS_BLI_DIRTY
);
bip
->
bli_format
.
blf_flags
&=
~
XFS_BL
I
_INODE_BUF
;
bip
->
bli_format
.
blf_flags
|=
XFS_BL
I
_CANCEL
;
bip
->
bli_flags
&=
~
(
XFS_BLI_
INODE_BUF
|
XFS_BLI_
LOGGED
|
XFS_BLI_DIRTY
);
bip
->
bli_format
.
blf_flags
&=
~
XFS_BL
F
_INODE_BUF
;
bip
->
bli_format
.
blf_flags
|=
XFS_BL
F
_CANCEL
;
memset
((
char
*
)(
bip
->
bli_format
.
blf_data_map
),
0
,
(
bip
->
bli_format
.
blf_map_size
*
sizeof
(
uint
)));
lidp
->
lid_flags
|=
XFS_LID_DIRTY
;
...
...
@@ -802,16 +802,16 @@ xfs_trans_binval(
}
/*
* This call is used to indicate that the buffer contains on-disk
*
inodes which must be handled specially during recovery. They
*
require special handling because only the di_next_unlinked from
*
the inodes in the buffer should be recovered. The rest of the
*
data in the buffer is logged via the inodes
themselves.
* This call is used to indicate that the buffer contains on-disk
inodes which
*
must be handled specially during recovery. They require special handling
*
because only the di_next_unlinked from the inodes in the buffer should be
*
recovered. The rest of the data in the buffer is logged via the inodes
* themselves.
*
* All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log
* format structure so that we'll know what to do at recovery time.
* All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be
* transferred to the buffer's log format structure so that we'll know what to
* do at recovery time.
*/
/* ARGSUSED */
void
xfs_trans_inode_buf
(
xfs_trans_t
*
tp
,
...
...
@@ -826,7 +826,7 @@ xfs_trans_inode_buf(
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
bip
->
bli_f
ormat
.
blf_f
lags
|=
XFS_BLI_INODE_BUF
;
bip
->
bli_flags
|=
XFS_BLI_INODE_BUF
;
}
/*
...
...
@@ -908,9 +908,9 @@ xfs_trans_dquot_buf(
ASSERT
(
XFS_BUF_ISBUSY
(
bp
));
ASSERT
(
XFS_BUF_FSPRIVATE2
(
bp
,
xfs_trans_t
*
)
==
tp
);
ASSERT
(
XFS_BUF_FSPRIVATE
(
bp
,
void
*
)
!=
NULL
);
ASSERT
(
type
==
XFS_BL
I
_UDQUOT_BUF
||
type
==
XFS_BL
I
_PDQUOT_BUF
||
type
==
XFS_BL
I
_GDQUOT_BUF
);
ASSERT
(
type
==
XFS_BL
F
_UDQUOT_BUF
||
type
==
XFS_BL
F
_PDQUOT_BUF
||
type
==
XFS_BL
F
_GDQUOT_BUF
);
bip
=
XFS_BUF_FSPRIVATE
(
bp
,
xfs_buf_log_item_t
*
);
ASSERT
(
atomic_read
(
&
bip
->
bli_refcount
)
>
0
);
...
...
fs/xfs/xfs_trans_item.c
浏览文件 @
88e88374
...
...
@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
void
xfs_trans_free_items
(
xfs_trans_t
*
tp
,
xfs_lsn_t
commit_lsn
,
int
flags
)
{
xfs_log_item_chunk_t
*
licp
;
...
...
@@ -311,7 +312,7 @@ xfs_trans_free_items(
* Special case the embedded chunk so we don't free it below.
*/
if
(
!
xfs_lic_are_all_free
(
licp
))
{
(
void
)
xfs_trans_unlock_chunk
(
licp
,
1
,
abort
,
NULLCOMMITLSN
);
(
void
)
xfs_trans_unlock_chunk
(
licp
,
1
,
abort
,
commit_lsn
);
xfs_lic_all_free
(
licp
);
licp
->
lic_unused
=
0
;
}
...
...
@@ -322,7 +323,7 @@ xfs_trans_free_items(
*/
while
(
licp
!=
NULL
)
{
ASSERT
(
!
xfs_lic_are_all_free
(
licp
));
(
void
)
xfs_trans_unlock_chunk
(
licp
,
1
,
abort
,
NULLCOMMITLSN
);
(
void
)
xfs_trans_unlock_chunk
(
licp
,
1
,
abort
,
commit_lsn
);
next_licp
=
licp
->
lic_next
;
kmem_free
(
licp
);
licp
=
next_licp
;
...
...
@@ -438,112 +439,3 @@ xfs_trans_unlock_chunk(
return
freed
;
}
/*
* This is called to add the given busy item to the transaction's
* list of busy items. It must find a free busy item descriptor
* or allocate a new one and add the item to that descriptor.
* The function returns a pointer to busy descriptor used to point
* to the new busy entry. The log busy entry will now point to its new
* descriptor with its ???? field.
*/
xfs_log_busy_slot_t
*
xfs_trans_add_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
ag
,
xfs_extlen_t
idx
)
{
xfs_log_busy_chunk_t
*
lbcp
;
xfs_log_busy_slot_t
*
lbsp
;
int
i
=
0
;
/*
* If there are no free descriptors, allocate a new chunk
* of them and put it at the front of the chunk list.
*/
if
(
tp
->
t_busy_free
==
0
)
{
lbcp
=
(
xfs_log_busy_chunk_t
*
)
kmem_alloc
(
sizeof
(
xfs_log_busy_chunk_t
),
KM_SLEEP
);
ASSERT
(
lbcp
!=
NULL
);
/*
* Initialize the chunk, and then
* claim the first slot in the newly allocated chunk.
*/
XFS_LBC_INIT
(
lbcp
);
XFS_LBC_CLAIM
(
lbcp
,
0
);
lbcp
->
lbc_unused
=
1
;
lbsp
=
XFS_LBC_SLOT
(
lbcp
,
0
);
/*
* Link in the new chunk and update the free count.
*/
lbcp
->
lbc_next
=
tp
->
t_busy
.
lbc_next
;
tp
->
t_busy
.
lbc_next
=
lbcp
;
tp
->
t_busy_free
=
XFS_LIC_NUM_SLOTS
-
1
;
/*
* Initialize the descriptor and the generic portion
* of the log item.
*
* Point the new slot at this item and return it.
* Also point the log item at its currently active
* descriptor and set the item's mount pointer.
*/
lbsp
->
lbc_ag
=
ag
;
lbsp
->
lbc_idx
=
idx
;
return
lbsp
;
}
/*
* Find the free descriptor. It is somewhere in the chunklist
* of descriptors.
*/
lbcp
=
&
tp
->
t_busy
;
while
(
lbcp
!=
NULL
)
{
if
(
XFS_LBC_VACANCY
(
lbcp
))
{
if
(
lbcp
->
lbc_unused
<=
XFS_LBC_MAX_SLOT
)
{
i
=
lbcp
->
lbc_unused
;
break
;
}
else
{
/* out-of-order vacancy */
cmn_err
(
CE_DEBUG
,
"OOO vacancy lbcp 0x%p
\n
"
,
lbcp
);
ASSERT
(
0
);
}
}
lbcp
=
lbcp
->
lbc_next
;
}
ASSERT
(
lbcp
!=
NULL
);
/*
* If we find a free descriptor, claim it,
* initialize it, and return it.
*/
XFS_LBC_CLAIM
(
lbcp
,
i
);
if
(
lbcp
->
lbc_unused
<=
i
)
{
lbcp
->
lbc_unused
=
i
+
1
;
}
lbsp
=
XFS_LBC_SLOT
(
lbcp
,
i
);
tp
->
t_busy_free
--
;
lbsp
->
lbc_ag
=
ag
;
lbsp
->
lbc_idx
=
idx
;
return
lbsp
;
}
/*
* xfs_trans_free_busy
* Free all of the busy lists from a transaction
*/
void
xfs_trans_free_busy
(
xfs_trans_t
*
tp
)
{
xfs_log_busy_chunk_t
*
lbcp
;
xfs_log_busy_chunk_t
*
lbcq
;
lbcp
=
tp
->
t_busy
.
lbc_next
;
while
(
lbcp
!=
NULL
)
{
lbcq
=
lbcp
->
lbc_next
;
kmem_free
(
lbcp
);
lbcp
=
lbcq
;
}
XFS_LBC_INIT
(
&
tp
->
t_busy
);
tp
->
t_busy
.
lbc_unused
=
0
;
}
fs/xfs/xfs_trans_priv.h
浏览文件 @
88e88374
...
...
@@ -35,13 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *,
struct
xfs_log_item_desc
*
xfs_trans_first_item
(
struct
xfs_trans
*
);
struct
xfs_log_item_desc
*
xfs_trans_next_item
(
struct
xfs_trans
*
,
struct
xfs_log_item_desc
*
);
void
xfs_trans_free_items
(
struct
xfs_trans
*
,
int
);
void
xfs_trans_unlock_items
(
struct
xfs_trans
*
,
xfs_lsn_t
);
void
xfs_trans_free_busy
(
xfs_trans_t
*
tp
);
xfs_log_busy_slot_t
*
xfs_trans_add_busy
(
xfs_trans_t
*
tp
,
xfs_agnumber_t
ag
,
xfs_extlen_t
idx
);
void
xfs_trans_unlock_items
(
struct
xfs_trans
*
tp
,
xfs_lsn_t
commit_lsn
);
void
xfs_trans_free_items
(
struct
xfs_trans
*
tp
,
xfs_lsn_t
commit_lsn
,
int
flags
);
void
xfs_trans_item_committed
(
struct
xfs_log_item
*
lip
,
xfs_lsn_t
commit_lsn
,
int
aborted
);
void
xfs_trans_unreserve_and_mod_sb
(
struct
xfs_trans
*
tp
);
/*
* AIL traversal cursor.
...
...
fs/xfs/xfs_types.h
浏览文件 @
88e88374
...
...
@@ -75,6 +75,8 @@ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
typedef
__uint16_t
xfs_prid_t
;
/* prid_t truncated to 16bits in XFS */
typedef
__uint32_t
xlog_tid_t
;
/* transaction ID type */
/*
* These types are 64 bits on disk but are either 32 or 64 bits in memory.
* Disk based types:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录