Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Linux-御风守护者
linux
提交
20737738
L
linux
项目概览
Linux-御风守护者
/
linux
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
L
linux
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
20737738
编写于
12月 13, 2016
作者:
S
Shaohua Li
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'md-next' into md-linus
上级
b78b499a
2953079c
变更
16
展开全部
隐藏空白更改
内联
并排
Showing
16 changed file
with
3429 addition
and
1262 deletion
+3429
-1262
drivers/md/bitmap.c
drivers/md/bitmap.c
+98
-68
drivers/md/dm-raid.c
drivers/md/dm-raid.c
+2
-2
drivers/md/linear.c
drivers/md/linear.c
+17
-14
drivers/md/md.c
drivers/md/md.c
+364
-337
drivers/md/md.h
drivers/md/md.h
+67
-41
drivers/md/multipath.c
drivers/md/multipath.c
+34
-58
drivers/md/raid0.c
drivers/md/raid0.c
+59
-48
drivers/md/raid1.c
drivers/md/raid1.c
+165
-82
drivers/md/raid1.h
drivers/md/raid1.h
+11
-8
drivers/md/raid10.c
drivers/md/raid10.c
+189
-106
drivers/md/raid10.h
drivers/md/raid10.h
+2
-0
drivers/md/raid5-cache.c
drivers/md/raid5-cache.c
+1645
-240
drivers/md/raid5.c
drivers/md/raid5.c
+381
-242
drivers/md/raid5.h
drivers/md/raid5.h
+160
-12
include/uapi/linux/raid/md_p.h
include/uapi/linux/raid/md_p.h
+6
-1
lib/raid6/avx2.c
lib/raid6/avx2.c
+229
-3
未找到文件。
drivers/md/bitmap.c
浏览文件 @
20737738
...
...
@@ -27,6 +27,7 @@
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/seq_file.h>
#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"
...
...
@@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
static
int
write_sb_page
(
struct
bitmap
*
bitmap
,
struct
page
*
page
,
int
wait
)
{
struct
md_rdev
*
rdev
=
NULL
;
struct
md_rdev
*
rdev
;
struct
block_device
*
bdev
;
struct
mddev
*
mddev
=
bitmap
->
mddev
;
struct
bitmap_storage
*
store
=
&
bitmap
->
storage
;
restart:
rdev
=
NULL
;
while
((
rdev
=
next_active_rdev
(
rdev
,
mddev
))
!=
NULL
)
{
int
size
=
PAGE_SIZE
;
loff_t
offset
=
mddev
->
bitmap_info
.
offset
;
...
...
@@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
page
);
}
if
(
wait
)
md_super_wait
(
mddev
)
;
if
(
wait
&&
md_super_wait
(
mddev
)
<
0
)
goto
restart
;
return
0
;
bad_alignment:
...
...
@@ -405,10 +408,10 @@ static int read_page(struct file *file, unsigned long index,
ret
=
-
EIO
;
out:
if
(
ret
)
pr
intk
(
KERN_ALERT
"md: bitmap read error: (%dB @ %llu): %d
\n
"
,
(
int
)
PAGE_SIZE
,
(
unsigned
long
long
)
index
<<
PAGE_SHIFT
,
ret
);
pr
_err
(
"md: bitmap read error: (%dB @ %llu): %d
\n
"
,
(
int
)
PAGE_SIZE
,
(
unsigned
long
long
)
index
<<
PAGE_SHIFT
,
ret
);
return
ret
;
}
...
...
@@ -416,6 +419,28 @@ static int read_page(struct file *file, unsigned long index,
* bitmap file superblock operations
*/
/*
* bitmap_wait_writes() should be called before writing any bitmap
* blocks, to ensure previous writes, particularly from
* bitmap_daemon_work(), have completed.
*/
static
void
bitmap_wait_writes
(
struct
bitmap
*
bitmap
)
{
if
(
bitmap
->
storage
.
file
)
wait_event
(
bitmap
->
write_wait
,
atomic_read
(
&
bitmap
->
pending_writes
)
==
0
);
else
/* Note that we ignore the return value. The writes
* might have failed, but that would just mean that
* some bits which should be cleared haven't been,
* which is safe. The relevant bitmap blocks will
* probably get written again, but there is no great
* loss if they aren't.
*/
md_super_wait
(
bitmap
->
mddev
);
}
/* update the event counter and sync the superblock to disk */
void
bitmap_update_sb
(
struct
bitmap
*
bitmap
)
{
...
...
@@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap)
if
(
!
bitmap
||
!
bitmap
->
storage
.
sb_page
)
return
;
sb
=
kmap_atomic
(
bitmap
->
storage
.
sb_page
);
pr
intk
(
KERN_DEBUG
"%s: bitmap file superblock:
\n
"
,
bmname
(
bitmap
));
pr
intk
(
KERN_DEBUG
" magic: %08x
\n
"
,
le32_to_cpu
(
sb
->
magic
));
pr
intk
(
KERN_DEBUG
" version: %d
\n
"
,
le32_to_cpu
(
sb
->
version
));
pr
intk
(
KERN_DEBUG
" uuid: %08x.%08x.%08x.%08x
\n
"
,
*
(
__u32
*
)(
sb
->
uuid
+
0
),
*
(
__u32
*
)(
sb
->
uuid
+
4
),
*
(
__u32
*
)(
sb
->
uuid
+
8
),
*
(
__u32
*
)(
sb
->
uuid
+
12
));
pr
intk
(
KERN_DEBUG
" events: %llu
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
events
));
pr
intk
(
KERN_DEBUG
"events cleared: %llu
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
events_cleared
));
pr
intk
(
KERN_DEBUG
" state: %08x
\n
"
,
le32_to_cpu
(
sb
->
state
));
pr
intk
(
KERN_DEBUG
" chunksize: %d B
\n
"
,
le32_to_cpu
(
sb
->
chunksize
));
pr
intk
(
KERN_DEBUG
" daemon sleep: %ds
\n
"
,
le32_to_cpu
(
sb
->
daemon_sleep
));
pr
intk
(
KERN_DEBUG
" sync size: %llu KB
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
sync_size
)
/
2
);
pr
intk
(
KERN_DEBUG
"max write behind: %d
\n
"
,
le32_to_cpu
(
sb
->
write_behind
));
pr
_debug
(
"%s: bitmap file superblock:
\n
"
,
bmname
(
bitmap
));
pr
_debug
(
" magic: %08x
\n
"
,
le32_to_cpu
(
sb
->
magic
));
pr
_debug
(
" version: %d
\n
"
,
le32_to_cpu
(
sb
->
version
));
pr
_debug
(
" uuid: %08x.%08x.%08x.%08x
\n
"
,
*
(
__u32
*
)(
sb
->
uuid
+
0
),
*
(
__u32
*
)(
sb
->
uuid
+
4
),
*
(
__u32
*
)(
sb
->
uuid
+
8
),
*
(
__u32
*
)(
sb
->
uuid
+
12
));
pr
_debug
(
" events: %llu
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
events
));
pr
_debug
(
"events cleared: %llu
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
events_cleared
));
pr
_debug
(
" state: %08x
\n
"
,
le32_to_cpu
(
sb
->
state
));
pr
_debug
(
" chunksize: %d B
\n
"
,
le32_to_cpu
(
sb
->
chunksize
));
pr
_debug
(
" daemon sleep: %ds
\n
"
,
le32_to_cpu
(
sb
->
daemon_sleep
));
pr
_debug
(
" sync size: %llu KB
\n
"
,
(
unsigned
long
long
)
le64_to_cpu
(
sb
->
sync_size
)
/
2
);
pr
_debug
(
"max write behind: %d
\n
"
,
le32_to_cpu
(
sb
->
write_behind
));
kunmap_atomic
(
sb
);
}
...
...
@@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
BUG_ON
(
!
chunksize
);
if
(
!
is_power_of_2
(
chunksize
))
{
kunmap_atomic
(
sb
);
pr
intk
(
KERN_ERR
"bitmap chunksize not a power of 2
\n
"
);
pr
_warn
(
"bitmap chunksize not a power of 2
\n
"
);
return
-
EINVAL
;
}
sb
->
chunksize
=
cpu_to_le32
(
chunksize
);
daemon_sleep
=
bitmap
->
mddev
->
bitmap_info
.
daemon_sleep
;
if
(
!
daemon_sleep
||
(
daemon_sleep
>
MAX_SCHEDULE_TIMEOUT
))
{
pr
intk
(
KERN_INFO
"Choosing daemon_sleep default (5 sec)
\n
"
);
pr
_debug
(
"Choosing daemon_sleep default (5 sec)
\n
"
);
daemon_sleep
=
5
*
HZ
;
}
sb
->
daemon_sleep
=
cpu_to_le32
(
daemon_sleep
);
...
...
@@ -584,7 +609,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
/* to 4k blocks */
bm_blocks
=
DIV_ROUND_UP_SECTOR_T
(
bm_blocks
,
4096
);
offset
=
bitmap
->
mddev
->
bitmap_info
.
offset
+
(
bitmap
->
cluster_slot
*
(
bm_blocks
<<
3
));
pr_
info
(
"%s:%d bm slot: %d offset: %llu
\n
"
,
__func__
,
__LINE__
,
pr_
debug
(
"%s:%d bm slot: %d offset: %llu
\n
"
,
__func__
,
__LINE__
,
bitmap
->
cluster_slot
,
offset
);
}
...
...
@@ -634,7 +659,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
else
if
(
write_behind
>
COUNTER_MAX
)
reason
=
"write-behind limit out of range (0 - 16383)"
;
if
(
reason
)
{
pr
intk
(
KERN_INFO
"%s: invalid bitmap file superblock: %s
\n
"
,
pr
_warn
(
"%s: invalid bitmap file superblock: %s
\n
"
,
bmname
(
bitmap
),
reason
);
goto
out
;
}
...
...
@@ -648,18 +673,15 @@ static int bitmap_read_sb(struct bitmap *bitmap)
* bitmap's UUID and event counter to the mddev's
*/
if
(
memcmp
(
sb
->
uuid
,
bitmap
->
mddev
->
uuid
,
16
))
{
printk
(
KERN_INFO
"%s: bitmap superblock UUID mismatch
\n
"
,
bmname
(
bitmap
));
pr_warn
(
"%s: bitmap superblock UUID mismatch
\n
"
,
bmname
(
bitmap
));
goto
out
;
}
events
=
le64_to_cpu
(
sb
->
events
);
if
(
!
nodes
&&
(
events
<
bitmap
->
mddev
->
events
))
{
printk
(
KERN_INFO
"%s: bitmap file is out of date (%llu < %llu) "
"-- forcing full recovery
\n
"
,
bmname
(
bitmap
),
events
,
(
unsigned
long
long
)
bitmap
->
mddev
->
events
);
pr_warn
(
"%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery
\n
"
,
bmname
(
bitmap
),
events
,
(
unsigned
long
long
)
bitmap
->
mddev
->
events
);
set_bit
(
BITMAP_STALE
,
&
bitmap
->
flags
);
}
}
...
...
@@ -679,8 +701,8 @@ static int bitmap_read_sb(struct bitmap *bitmap)
if
(
err
==
0
&&
nodes
&&
(
bitmap
->
cluster_slot
<
0
))
{
err
=
md_setup_cluster
(
bitmap
->
mddev
,
nodes
);
if
(
err
)
{
pr_
err
(
"%s: Could not setup cluster service (%d)
\n
"
,
bmname
(
bitmap
),
err
);
pr_
warn
(
"%s: Could not setup cluster service (%d)
\n
"
,
bmname
(
bitmap
),
err
);
goto
out_no_sb
;
}
bitmap
->
cluster_slot
=
md_cluster_ops
->
slot_number
(
bitmap
->
mddev
);
...
...
@@ -847,15 +869,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
ptr
=
file_path
(
bitmap
->
storage
.
file
,
path
,
PAGE_SIZE
);
printk
(
KERN_ALERT
"%s: kicking failed bitmap file %s from array!
\n
"
,
bmname
(
bitmap
),
IS_ERR
(
ptr
)
?
""
:
ptr
);
pr_warn
(
"%s: kicking failed bitmap file %s from array!
\n
"
,
bmname
(
bitmap
),
IS_ERR
(
ptr
)
?
""
:
ptr
);
kfree
(
path
);
}
else
printk
(
KERN_ALERT
"%s: disabling internal bitmap due to errors
\n
"
,
bmname
(
bitmap
));
pr_warn
(
"%s: disabling internal bitmap due to errors
\n
"
,
bmname
(
bitmap
));
}
}
...
...
@@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap)
{
unsigned
long
i
;
int
dirty
,
need_write
;
int
writing
=
0
;
if
(
!
bitmap
||
!
bitmap
->
storage
.
filemap
||
test_bit
(
BITMAP_STALE
,
&
bitmap
->
flags
))
...
...
@@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap)
need_write
=
test_and_clear_page_attr
(
bitmap
,
i
,
BITMAP_PAGE_NEEDWRITE
);
if
(
dirty
||
need_write
)
{
if
(
!
writing
)
{
bitmap_wait_writes
(
bitmap
);
if
(
bitmap
->
mddev
->
queue
)
blk_add_trace_msg
(
bitmap
->
mddev
->
queue
,
"md bitmap_unplug"
);
}
clear_page_attr
(
bitmap
,
i
,
BITMAP_PAGE_PENDING
);
write_page
(
bitmap
,
bitmap
->
storage
.
filemap
[
i
],
0
);
writing
=
1
;
}
}
if
(
bitmap
->
storage
.
file
)
wait_event
(
bitmap
->
write_wait
,
atomic_read
(
&
bitmap
->
pending_writes
)
==
0
);
else
md_super_wait
(
bitmap
->
mddev
);
if
(
writing
)
bitmap_wait_writes
(
bitmap
);
if
(
test_bit
(
BITMAP_WRITE_ERROR
,
&
bitmap
->
flags
))
bitmap_file_kick
(
bitmap
);
...
...
@@ -1056,14 +1081,13 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
outofdate
=
test_bit
(
BITMAP_STALE
,
&
bitmap
->
flags
);
if
(
outofdate
)
printk
(
KERN_INFO
"%s: bitmap file is out of date, doing full "
"recovery
\n
"
,
bmname
(
bitmap
));
pr_warn
(
"%s: bitmap file is out of date, doing full recovery
\n
"
,
bmname
(
bitmap
));
if
(
file
&&
i_size_read
(
file
->
f_mapping
->
host
)
<
store
->
bytes
)
{
pr
intk
(
KERN_INFO
"%s: bitmap file too short %lu < %lu
\n
"
,
bmname
(
bitmap
),
(
unsigned
long
)
i_size_read
(
file
->
f_mapping
->
host
),
store
->
bytes
);
pr
_warn
(
"%s: bitmap file too short %lu < %lu
\n
"
,
bmname
(
bitmap
),
(
unsigned
long
)
i_size_read
(
file
->
f_mapping
->
host
),
store
->
bytes
);
goto
err
;
}
...
...
@@ -1137,16 +1161,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
offset
=
0
;
}
printk
(
KERN_INFO
"%s: bitmap initialized from disk: "
"read %lu pages, set %lu of %lu bits
\n
"
,
bmname
(
bitmap
),
store
->
file_pages
,
bit_cnt
,
chunks
);
pr_debug
(
"%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits
\n
"
,
bmname
(
bitmap
),
store
->
file_pages
,
bit_cnt
,
chunks
);
return
0
;
err:
pr
intk
(
KERN_INFO
"%s: bitmap initialisation failed: %d
\n
"
,
bmname
(
bitmap
),
ret
);
pr
_warn
(
"%s: bitmap initialisation failed: %d
\n
"
,
bmname
(
bitmap
),
ret
);
return
ret
;
}
...
...
@@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev)
}
bitmap
->
allclean
=
1
;
if
(
bitmap
->
mddev
->
queue
)
blk_add_trace_msg
(
bitmap
->
mddev
->
queue
,
"md bitmap_daemon_work"
);
/* Any file-page which is PENDING now needs to be written.
* So set NEEDWRITE now, then after we make any last-minute changes
* we will write it.
...
...
@@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev)
}
spin_unlock_irq
(
&
counts
->
lock
);
bitmap_wait_writes
(
bitmap
);
/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
* DIRTY pages need to be written by bitmap_unplug so it can wait
* for them.
...
...
@@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
atomic_read
(
&
bitmap
->
mddev
->
recovery_active
)
==
0
);
bitmap
->
mddev
->
curr_resync_completed
=
sector
;
set_bit
(
MD_
CHANGE_CLEAN
,
&
bitmap
->
mddev
->
flags
);
set_bit
(
MD_
SB_CHANGE_CLEAN
,
&
bitmap
->
mddev
->
sb_
flags
);
sector
&=
~
((
1ULL
<<
bitmap
->
counts
.
chunkshift
)
-
1
);
s
=
0
;
while
(
s
<
sector
&&
s
<
bitmap
->
mddev
->
resync_max_sectors
)
{
...
...
@@ -1825,8 +1853,8 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot)
if
(
err
)
goto
error
;
pr
intk
(
KERN_INFO
"created bitmap (%lu pages) for device %s
\n
"
,
bitmap
->
counts
.
pages
,
bmname
(
bitmap
));
pr
_debug
(
"created bitmap (%lu pages) for device %s
\n
"
,
bitmap
->
counts
.
pages
,
bmname
(
bitmap
));
err
=
test_bit
(
BITMAP_WRITE_ERROR
,
&
bitmap
->
flags
)
?
-
EIO
:
0
;
if
(
err
)
...
...
@@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
!
bitmap
->
mddev
->
bitmap_info
.
external
,
mddev_is_clustered
(
bitmap
->
mddev
)
?
bitmap
->
cluster_slot
:
0
);
if
(
ret
)
if
(
ret
)
{
bitmap_file_unmap
(
&
store
);
goto
err
;
}
pages
=
DIV_ROUND_UP
(
chunks
,
PAGE_COUNTER_RATIO
);
...
...
@@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap
->
mddev
->
bitmap_info
.
chunksize
=
1
<<
(
old_counts
.
chunkshift
+
BITMAP_BLOCK_SHIFT
);
blocks
=
old_counts
.
chunks
<<
old_counts
.
chunkshift
;
pr_
err
(
"Could not pre-allocate in-memory bitmap for cluster raid
\n
"
);
pr_
warn
(
"Could not pre-allocate in-memory bitmap for cluster raid
\n
"
);
break
;
}
else
bitmap
->
counts
.
bp
[
page
].
count
+=
1
;
...
...
@@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
/* Ensure new bitmap info is stored in
* metadata promptly.
*/
set_bit
(
MD_
CHANGE_DEVS
,
&
mddev
->
flags
);
set_bit
(
MD_
SB_CHANGE_DEVS
,
&
mddev
->
sb_
flags
);
md_wakeup_thread
(
mddev
->
thread
);
}
rv
=
0
;
...
...
drivers/md/dm-raid.c
浏览文件 @
20737738
...
...
@@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
sb
->
compat_features
=
cpu_to_le32
(
FEATURE_FLAG_SUPPORTS_V190
);
/* Force writing of superblocks to disk */
set_bit
(
MD_
CHANGE_DEVS
,
&
rdev
->
mddev
->
flags
);
set_bit
(
MD_
SB_CHANGE_DEVS
,
&
rdev
->
mddev
->
sb_
flags
);
/* Any superblock is better than none, choose that if given */
return
refdev
?
0
:
1
;
...
...
@@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs)
struct
mddev
*
mddev
=
&
rs
->
md
;
int
ro
=
mddev
->
ro
;
set_bit
(
MD_
CHANGE_DEVS
,
&
mddev
->
flags
);
set_bit
(
MD_
SB_CHANGE_DEVS
,
&
mddev
->
sb_
flags
);
mddev
->
ro
=
0
;
md_update_sb
(
mddev
,
1
);
mddev
->
ro
=
ro
;
...
...
drivers/md/linear.c
浏览文件 @
20737738
...
...
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "linear.h"
...
...
@@ -101,8 +102,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
sector_t
sectors
;
if
(
j
<
0
||
j
>=
raid_disks
||
disk
->
rdev
)
{
pr
intk
(
KERN_ERR
"md/linear:%s: disk numbering problem. Aborting!
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/linear:%s: disk numbering problem. Aborting!
\n
"
,
mdname
(
mddev
));
goto
out
;
}
...
...
@@ -123,8 +124,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
discard_supported
=
true
;
}
if
(
cnt
!=
raid_disks
)
{
pr
intk
(
KERN_ERR
"md/linear:%s: not enough drives present. Aborting!
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/linear:%s: not enough drives present. Aborting!
\n
"
,
mdname
(
mddev
));
goto
out
;
}
...
...
@@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
}
do
{
tmp_dev
=
which_dev
(
mddev
,
bio
->
bi_iter
.
bi_sector
);
sector_t
bio_sector
=
bio
->
bi_iter
.
bi_sector
;
tmp_dev
=
which_dev
(
mddev
,
bio_sector
);
start_sector
=
tmp_dev
->
end_sector
-
tmp_dev
->
rdev
->
sectors
;
end_sector
=
tmp_dev
->
end_sector
;
data_offset
=
tmp_dev
->
rdev
->
data_offset
;
bio
->
bi_bdev
=
tmp_dev
->
rdev
->
bdev
;
if
(
unlikely
(
bio
->
bi_iter
.
bi
_sector
>=
end_sector
||
bio
->
bi_iter
.
bi
_sector
<
start_sector
))
if
(
unlikely
(
bio_sector
>=
end_sector
||
bio_sector
<
start_sector
))
goto
out_of_bounds
;
if
(
unlikely
(
bio_end_sector
(
bio
)
>
end_sector
))
{
/* This bio crosses a device boundary, so we have to
* split it.
*/
split
=
bio_split
(
bio
,
end_sector
-
bio
->
bi_iter
.
bi_sector
,
split
=
bio_split
(
bio
,
end_sector
-
bio_sector
,
GFP_NOIO
,
fs_bio_set
);
bio_chain
(
split
,
bio
);
}
else
{
...
...
@@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
!
blk_queue_discard
(
bdev_get_queue
(
split
->
bi_bdev
))))
{
/* Just ignore it */
bio_endio
(
split
);
}
else
}
else
{
if
(
mddev
->
gendisk
)
trace_block_bio_remap
(
bdev_get_queue
(
split
->
bi_bdev
),
split
,
disk_devt
(
mddev
->
gendisk
),
bio_sector
);
generic_make_request
(
split
);
}
}
while
(
split
!=
bio
);
return
;
out_of_bounds:
printk
(
KERN_ERR
"md/linear:%s: make_request: Sector %llu out of bounds on "
"dev %s: %llu sectors, offset %llu
\n
"
,
pr_err
(
"md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu
\n
"
,
mdname
(
mddev
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
,
bdevname
(
tmp_dev
->
rdev
->
bdev
,
b
),
...
...
@@ -275,7 +279,6 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
static
void
linear_status
(
struct
seq_file
*
seq
,
struct
mddev
*
mddev
)
{
seq_printf
(
seq
,
" %dk rounding"
,
mddev
->
chunk_sectors
/
2
);
}
...
...
drivers/md/md.c
浏览文件 @
20737738
此差异已折叠。
点击以展开。
drivers/md/md.h
浏览文件 @
20737738
...
...
@@ -29,6 +29,16 @@
#define MaxSector (~(sector_t)0)
/*
* These flags should really be called "NO_RETRY" rather than
* "FAILFAST" because they don't make any promise about time lapse,
* only about the number of retries, which will be zero.
* REQ_FAILFAST_DRIVER is not included because
* Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
* seems to suggest that the errors it avoids retrying should usually
* be retried.
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/*
* MD's 'extended' device
*/
...
...
@@ -168,6 +178,19 @@ enum flag_bits {
* so it is safe to remove without
* another synchronize_rcu() call.
*/
ExternalBbl
,
/* External metadata provides bad
* block management for a disk
*/
FailFast
,
/* Minimal retries should be attempted on
* this device, so use REQ_FAILFAST_DEV.
* Also don't try to repair failed reads.
* It is expects that no bad block log
* is present.
*/
LastDev
,
/* Seems to be the last working dev as
* it didn't fail, so don't use FailFast
* any more for metadata
*/
};
static
inline
int
is_badblock
(
struct
md_rdev
*
rdev
,
sector_t
s
,
int
sectors
,
...
...
@@ -189,6 +212,31 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int
is_new
);
struct
md_cluster_info
;
enum
mddev_flags
{
MD_ARRAY_FIRST_USE
,
/* First use of array, needs initialization */
MD_CLOSING
,
/* If set, we are closing the array, do not open
* it then */
MD_JOURNAL_CLEAN
,
/* A raid with journal is already clean */
MD_HAS_JOURNAL
,
/* The raid array has journal feature set */
MD_RELOAD_SB
,
/* Reload the superblock because another node
* updated it.
*/
MD_CLUSTER_RESYNC_LOCKED
,
/* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
MD_FAILFAST_SUPPORTED
,
/* Using MD_FAILFAST on metadata writes is
* supported as calls to md_error() will
* never cause the array to become failed.
*/
};
enum
mddev_sb_flags
{
MD_SB_CHANGE_DEVS
,
/* Some device status has changed */
MD_SB_CHANGE_CLEAN
,
/* transition to or from 'clean' */
MD_SB_CHANGE_PENDING
,
/* switch from 'clean' to 'active' in progress */
MD_SB_NEED_REWRITE
,
/* metadata write needs to be repeated */
};
struct
mddev
{
void
*
private
;
struct
md_personality
*
pers
;
...
...
@@ -196,21 +244,7 @@ struct mddev {
int
md_minor
;
struct
list_head
disks
;
unsigned
long
flags
;
#define MD_CHANGE_DEVS 0
/* Some device status has changed */
#define MD_CHANGE_CLEAN 1
/* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2
/* switch from 'clean' to 'active' in progress */
#define MD_UPDATE_SB_FLAGS (1 | 2 | 4)
/* If these are set, md_update_sb needed */
#define MD_ARRAY_FIRST_USE 3
/* First use of array, needs initialization */
#define MD_CLOSING 4
/* If set, we are closing the array, do not open
* it then */
#define MD_JOURNAL_CLEAN 5
/* A raid with journal is already clean */
#define MD_HAS_JOURNAL 6
/* The raid array has journal feature set */
#define MD_RELOAD_SB 7
/* Reload the superblock because another node
* updated it.
*/
#define MD_CLUSTER_RESYNC_LOCKED 8
/* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
unsigned
long
sb_flags
;
int
suspended
;
atomic_t
active_io
;
...
...
@@ -304,31 +338,6 @@ struct mddev {
int
parallel_resync
;
int
ok_start_degraded
;
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
* RUNNING: a thread is running, or about to be started
* SYNC: actually doing a resync, not a recovery
* RECOVER: doing recovery, or need to try it.
* INTR: resync needs to be aborted for some reason
* DONE: thread is done and is waiting to be reaped
* REQUEST: user-space has requested a sync (used with SYNC)
* CHECK: user-space request for check-only, no repair
* RESHAPE: A reshape is happening
* ERROR: sync-action interrupted because io-error
*
* If neither SYNC or RESHAPE are set, then it is a recovery.
*/
#define MD_RECOVERY_RUNNING 0
#define MD_RECOVERY_SYNC 1
#define MD_RECOVERY_RECOVER 2
#define MD_RECOVERY_INTR 3
#define MD_RECOVERY_DONE 4
#define MD_RECOVERY_NEEDED 5
#define MD_RECOVERY_REQUESTED 6
#define MD_RECOVERY_CHECK 7
#define MD_RECOVERY_RESHAPE 8
#define MD_RECOVERY_FROZEN 9
#define MD_RECOVERY_ERROR 10
unsigned
long
recovery
;
/* If a RAID personality determines that recovery (of a particular
...
...
@@ -442,6 +451,23 @@ struct mddev {
unsigned
int
good_device_nr
;
/* good device num within cluster raid */
};
enum
recovery_flags
{
/*
* If neither SYNC or RESHAPE are set, then it is a recovery.
*/
MD_RECOVERY_RUNNING
,
/* a thread is running, or about to be started */
MD_RECOVERY_SYNC
,
/* actually doing a resync, not a recovery */
MD_RECOVERY_RECOVER
,
/* doing recovery, or need to try it. */
MD_RECOVERY_INTR
,
/* resync needs to be aborted for some reason */
MD_RECOVERY_DONE
,
/* thread is done and is waiting to be reaped */
MD_RECOVERY_NEEDED
,
/* we might need to start a resync/recover */
MD_RECOVERY_REQUESTED
,
/* user-space has requested a sync (used with SYNC) */
MD_RECOVERY_CHECK
,
/* user-space request for check-only, no repair */
MD_RECOVERY_RESHAPE
,
/* A reshape is happening */
MD_RECOVERY_FROZEN
,
/* User request to abort, and not restart, any action */
MD_RECOVERY_ERROR
,
/* sync-action interrupted because io-error */
};
static
inline
int
__must_check
mddev_lock
(
struct
mddev
*
mddev
)
{
return
mutex_lock_interruptible
(
&
mddev
->
reconfig_mutex
);
...
...
@@ -623,7 +649,7 @@ extern int mddev_congested(struct mddev *mddev, int bits);
extern
void
md_flush_request
(
struct
mddev
*
mddev
,
struct
bio
*
bio
);
extern
void
md_super_write
(
struct
mddev
*
mddev
,
struct
md_rdev
*
rdev
,
sector_t
sector
,
int
size
,
struct
page
*
page
);
extern
void
md_super_wait
(
struct
mddev
*
mddev
);
extern
int
md_super_wait
(
struct
mddev
*
mddev
);
extern
int
sync_page_io
(
struct
md_rdev
*
rdev
,
sector_t
sector
,
int
size
,
struct
page
*
page
,
int
op
,
int
op_flags
,
bool
metadata_op
);
...
...
drivers/md/multipath.c
浏览文件 @
20737738
...
...
@@ -52,7 +52,7 @@ static int multipath_map (struct mpconf *conf)
}
rcu_read_unlock
();
pr
intk
(
KERN_ERR
"multipath_map(): no more operational IO paths?
\n
"
);
pr
_crit_ratelimited
(
"multipath_map(): no more operational IO paths?
\n
"
);
return
(
-
1
);
}
...
...
@@ -97,9 +97,9 @@ static void multipath_end_request(struct bio *bio)
*/
char
b
[
BDEVNAME_SIZE
];
md_error
(
mp_bh
->
mddev
,
rdev
);
pr
intk
(
KERN_ERR
"multipath: %s: rescheduling sector %llu
\n
"
,
bdevname
(
rdev
->
bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
pr
_info
(
"multipath: %s: rescheduling sector %llu
\n
"
,
bdevname
(
rdev
->
bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
multipath_reschedule_retry
(
mp_bh
);
}
else
multipath_end_bh_io
(
mp_bh
,
bio
->
bi_error
);
...
...
@@ -194,8 +194,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
* first check if this is a queued request for a device
* which has just failed.
*/
printk
(
KERN_ALERT
"multipath: only one IO path left and IO error.
\n
"
);
pr_warn
(
"multipath: only one IO path left and IO error.
\n
"
);
/* leave it active... it's all we have */
return
;
}
...
...
@@ -209,11 +208,9 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
spin_unlock_irqrestore
(
&
conf
->
device_lock
,
flags
);
}
set_bit
(
Faulty
,
&
rdev
->
flags
);
set_bit
(
MD_CHANGE_DEVS
,
&
mddev
->
flags
);
printk
(
KERN_ALERT
"multipath: IO failure on %s,"
" disabling IO path.
\n
"
"multipath: Operation continuing"
" on %d IO paths.
\n
"
,
set_bit
(
MD_SB_CHANGE_DEVS
,
&
mddev
->
sb_flags
);
pr_err
(
"multipath: IO failure on %s, disabling IO path.
\n
"
"multipath: Operation continuing on %d IO paths.
\n
"
,
bdevname
(
rdev
->
bdev
,
b
),
conf
->
raid_disks
-
mddev
->
degraded
);
}
...
...
@@ -223,21 +220,21 @@ static void print_multipath_conf (struct mpconf *conf)
int
i
;
struct
multipath_info
*
tmp
;
pr
intk
(
"MULTIPATH conf printout:
\n
"
);
pr
_debug
(
"MULTIPATH conf printout:
\n
"
);
if
(
!
conf
)
{
pr
intk
(
"(conf==NULL)
\n
"
);
pr
_debug
(
"(conf==NULL)
\n
"
);
return
;
}
pr
intk
(
" --- wd:%d rd:%d
\n
"
,
conf
->
raid_disks
-
conf
->
mddev
->
degraded
,
conf
->
raid_disks
);
pr
_debug
(
" --- wd:%d rd:%d
\n
"
,
conf
->
raid_disks
-
conf
->
mddev
->
degraded
,
conf
->
raid_disks
);
for
(
i
=
0
;
i
<
conf
->
raid_disks
;
i
++
)
{
char
b
[
BDEVNAME_SIZE
];
tmp
=
conf
->
multipaths
+
i
;
if
(
tmp
->
rdev
)
pr
intk
(
" disk%d, o:%d, dev:%s
\n
"
,
i
,
!
test_bit
(
Faulty
,
&
tmp
->
rdev
->
flags
),
bdevname
(
tmp
->
rdev
->
bdev
,
b
));
pr
_debug
(
" disk%d, o:%d, dev:%s
\n
"
,
i
,
!
test_bit
(
Faulty
,
&
tmp
->
rdev
->
flags
),
bdevname
(
tmp
->
rdev
->
bdev
,
b
));
}
}
...
...
@@ -292,8 +289,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if
(
rdev
==
p
->
rdev
)
{
if
(
test_bit
(
In_sync
,
&
rdev
->
flags
)
||
atomic_read
(
&
rdev
->
nr_pending
))
{
printk
(
KERN_ERR
"hot-remove-disk, slot %d is identified"
" but is still operational!
\n
"
,
number
);
pr_warn
(
"hot-remove-disk, slot %d is identified but is still operational!
\n
"
,
number
);
err
=
-
EBUSY
;
goto
abort
;
}
...
...
@@ -346,16 +342,14 @@ static void multipathd(struct md_thread *thread)
bio
->
bi_iter
.
bi_sector
=
mp_bh
->
master_bio
->
bi_iter
.
bi_sector
;
if
((
mp_bh
->
path
=
multipath_map
(
conf
))
<
0
)
{
printk
(
KERN_ALERT
"multipath: %s: unrecoverable IO read"
" error for block %llu
\n
"
,
bdevname
(
bio
->
bi_bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
pr_err
(
"multipath: %s: unrecoverable IO read error for block %llu
\n
"
,
bdevname
(
bio
->
bi_bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
multipath_end_bh_io
(
mp_bh
,
-
EIO
);
}
else
{
printk
(
KERN_ERR
"multipath: %s: redirecting sector %llu"
" to another IO path
\n
"
,
bdevname
(
bio
->
bi_bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
pr_err
(
"multipath: %s: redirecting sector %llu to another IO path
\n
"
,
bdevname
(
bio
->
bi_bdev
,
b
),
(
unsigned
long
long
)
bio
->
bi_iter
.
bi_sector
);
*
bio
=
*
(
mp_bh
->
master_bio
);
bio
->
bi_iter
.
bi_sector
+=
conf
->
multipaths
[
mp_bh
->
path
].
rdev
->
data_offset
;
...
...
@@ -389,8 +383,8 @@ static int multipath_run (struct mddev *mddev)
return
-
EINVAL
;
if
(
mddev
->
level
!=
LEVEL_MULTIPATH
)
{
pr
intk
(
"multipath: %s: raid level not set to multipath IO (%d)
\n
"
,
mdname
(
mddev
),
mddev
->
level
);
pr
_warn
(
"multipath: %s: raid level not set to multipath IO (%d)
\n
"
,
mdname
(
mddev
),
mddev
->
level
);
goto
out
;
}
/*
...
...
@@ -401,21 +395,13 @@ static int multipath_run (struct mddev *mddev)
conf
=
kzalloc
(
sizeof
(
struct
mpconf
),
GFP_KERNEL
);
mddev
->
private
=
conf
;
if
(
!
conf
)
{
printk
(
KERN_ERR
"multipath: couldn't allocate memory for %s
\n
"
,
mdname
(
mddev
));
if
(
!
conf
)
goto
out
;
}
conf
->
multipaths
=
kzalloc
(
sizeof
(
struct
multipath_info
)
*
mddev
->
raid_disks
,
GFP_KERNEL
);
if
(
!
conf
->
multipaths
)
{
printk
(
KERN_ERR
"multipath: couldn't allocate memory for %s
\n
"
,
mdname
(
mddev
));
if
(
!
conf
->
multipaths
)
goto
out_free_conf
;
}
working_disks
=
0
;
rdev_for_each
(
rdev
,
mddev
)
{
...
...
@@ -439,7 +425,7 @@ static int multipath_run (struct mddev *mddev)
INIT_LIST_HEAD
(
&
conf
->
retry_list
);
if
(
!
working_disks
)
{
pr
intk
(
KERN_ERR
"multipath: no operational IO paths for %s
\n
"
,
pr
_warn
(
"multipath: no operational IO paths for %s
\n
"
,
mdname
(
mddev
));
goto
out_free_conf
;
}
...
...
@@ -447,27 +433,17 @@ static int multipath_run (struct mddev *mddev)
conf
->
pool
=
mempool_create_kmalloc_pool
(
NR_RESERVED_BUFS
,
sizeof
(
struct
multipath_bh
));
if
(
conf
->
pool
==
NULL
)
{
printk
(
KERN_ERR
"multipath: couldn't allocate memory for %s
\n
"
,
mdname
(
mddev
));
if
(
conf
->
pool
==
NULL
)
goto
out_free_conf
;
}
{
mddev
->
thread
=
md_register_thread
(
multipathd
,
mddev
,
"multipath"
);
if
(
!
mddev
->
thread
)
{
printk
(
KERN_ERR
"multipath: couldn't allocate thread"
" for %s
\n
"
,
mdname
(
mddev
));
goto
out_free_conf
;
}
}
mddev
->
thread
=
md_register_thread
(
multipathd
,
mddev
,
"multipath"
);
if
(
!
mddev
->
thread
)
goto
out_free_conf
;
printk
(
KERN_INFO
"multipath: array %s active with %d out of %d IO paths
\n
"
,
pr_info
(
"multipath: array %s active with %d out of %d IO paths
\n
"
,
mdname
(
mddev
),
conf
->
raid_disks
-
mddev
->
degraded
,
mddev
->
raid_disks
);
mddev
->
raid_disks
);
/*
* Ok, everything is just fine now
*/
...
...
drivers/md/raid0.c
浏览文件 @
20737738
...
...
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "raid0.h"
#include "raid5.h"
...
...
@@ -51,20 +52,21 @@ static void dump_zones(struct mddev *mddev)
char
b
[
BDEVNAME_SIZE
];
struct
r0conf
*
conf
=
mddev
->
private
;
int
raid_disks
=
conf
->
strip_zone
[
0
].
nb_dev
;
pr
intk
(
KERN_INFO
"md: RAID0 configuration for %s - %d zone%s
\n
"
,
mdname
(
mddev
),
conf
->
nr_strip_zones
,
conf
->
nr_strip_zones
==
1
?
""
:
"s"
);
pr
_debug
(
"md: RAID0 configuration for %s - %d zone%s
\n
"
,
mdname
(
mddev
),
conf
->
nr_strip_zones
,
conf
->
nr_strip_zones
==
1
?
""
:
"s"
);
for
(
j
=
0
;
j
<
conf
->
nr_strip_zones
;
j
++
)
{
printk
(
KERN_INFO
"md: zone%d=["
,
j
);
char
line
[
200
];
int
len
=
0
;
for
(
k
=
0
;
k
<
conf
->
strip_zone
[
j
].
nb_dev
;
k
++
)
printk
(
KERN_CONT
"%s%s"
,
k
?
"/"
:
""
,
bdevname
(
conf
->
devlist
[
j
*
raid_disks
+
k
]
->
bdev
,
b
));
pr
intk
(
KERN_CONT
"]
\n
"
);
len
+=
snprintf
(
line
+
len
,
200
-
len
,
"%s%s"
,
k
?
"/"
:
""
,
bdevname
(
conf
->
devlist
[
j
*
raid_disks
+
k
]
->
bdev
,
b
));
pr
_debug
(
"md: zone%d=[%s]
\n
"
,
j
,
line
);
zone_size
=
conf
->
strip_zone
[
j
].
zone_end
-
zone_start
;
printk
(
KERN_INFO
" zone-offset=%10lluKB, "
"device-offset=%10lluKB, size=%10lluKB
\n
"
,
pr_debug
(
" zone-offset=%10lluKB, device-offset=%10lluKB, size=%10lluKB
\n
"
,
(
unsigned
long
long
)
zone_start
>>
1
,
(
unsigned
long
long
)
conf
->
strip_zone
[
j
].
dev_start
>>
1
,
(
unsigned
long
long
)
zone_size
>>
1
);
...
...
@@ -142,9 +144,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
* chunk size is a multiple of that sector size
*/
if
((
mddev
->
chunk_sectors
<<
9
)
%
blksize
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: chunk_size of %d not multiple of block size %d
\n
"
,
mdname
(
mddev
),
mddev
->
chunk_sectors
<<
9
,
blksize
);
pr
_warn
(
"md/raid0:%s: chunk_size of %d not multiple of block size %d
\n
"
,
mdname
(
mddev
),
mddev
->
chunk_sectors
<<
9
,
blksize
);
err
=
-
EINVAL
;
goto
abort
;
}
...
...
@@ -186,19 +188,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
if
(
j
<
0
)
{
printk
(
KERN_ERR
"md/raid0:%s: remove inactive devices before converting to RAID0
\n
"
,
mdname
(
mddev
));
pr_warn
(
"md/raid0:%s: remove inactive devices before converting to RAID0
\n
"
,
mdname
(
mddev
));
goto
abort
;
}
if
(
j
>=
mddev
->
raid_disks
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: bad disk number %d - "
"aborting!
\n
"
,
mdname
(
mddev
),
j
);
pr
_warn
(
"md/raid0:%s: bad disk number %d - aborting!
\n
"
,
mdname
(
mddev
),
j
);
goto
abort
;
}
if
(
dev
[
j
])
{
pr
intk
(
KERN_ERR
"md/raid0:%s: multiple devices for %d - "
"aborting!
\n
"
,
mdname
(
mddev
),
j
);
pr
_warn
(
"md/raid0:%s: multiple devices for %d - aborting!
\n
"
,
mdname
(
mddev
),
j
);
goto
abort
;
}
dev
[
j
]
=
rdev1
;
...
...
@@ -208,8 +209,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
cnt
++
;
}
if
(
cnt
!=
mddev
->
raid_disks
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: too few disks (%d of %d) - "
"aborting!
\n
"
,
mdname
(
mddev
),
cnt
,
mddev
->
raid_disks
);
pr
_warn
(
"md/raid0:%s: too few disks (%d of %d) - aborting!
\n
"
,
mdname
(
mddev
),
cnt
,
mddev
->
raid_disks
);
goto
abort
;
}
zone
->
nb_dev
=
cnt
;
...
...
@@ -357,8 +358,7 @@ static int raid0_run(struct mddev *mddev)
int
ret
;
if
(
mddev
->
chunk_sectors
==
0
)
{
printk
(
KERN_ERR
"md/raid0:%s: chunk size must be set.
\n
"
,
mdname
(
mddev
));
pr_warn
(
"md/raid0:%s: chunk size must be set.
\n
"
,
mdname
(
mddev
));
return
-
EINVAL
;
}
if
(
md_check_no_bitmap
(
mddev
))
...
...
@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
/* calculate array device size */
md_set_array_sectors
(
mddev
,
raid0_size
(
mddev
,
0
,
0
));
pr
intk
(
KERN_INFO
"md/raid0:%s: md_size is %llu sectors.
\n
"
,
mdname
(
mddev
),
(
unsigned
long
long
)
mddev
->
array_sectors
);
pr
_debug
(
"md/raid0:%s: md_size is %llu sectors.
\n
"
,
mdname
(
mddev
),
(
unsigned
long
long
)
mddev
->
array_sectors
);
if
(
mddev
->
queue
)
{
/* calculate the max read-ahead size.
...
...
@@ -464,7 +464,8 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
}
do
{
sector_t
sector
=
bio
->
bi_iter
.
bi_sector
;
sector_t
bio_sector
=
bio
->
bi_iter
.
bi_sector
;
sector_t
sector
=
bio_sector
;
unsigned
chunk_sects
=
mddev
->
chunk_sectors
;
unsigned
sectors
=
chunk_sects
-
...
...
@@ -473,7 +474,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
:
sector_div
(
sector
,
chunk_sects
));
/* Restore due to sector_div */
sector
=
bio
->
bi_iter
.
bi
_sector
;
sector
=
bio_sector
;
if
(
sectors
<
bio_sectors
(
bio
))
{
split
=
bio_split
(
bio
,
sectors
,
GFP_NOIO
,
fs_bio_set
);
...
...
@@ -492,8 +493,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
!
blk_queue_discard
(
bdev_get_queue
(
split
->
bi_bdev
))))
{
/* Just ignore it */
bio_endio
(
split
);
}
else
}
else
{
if
(
mddev
->
gendisk
)
trace_block_bio_remap
(
bdev_get_queue
(
split
->
bi_bdev
),
split
,
disk_devt
(
mddev
->
gendisk
),
bio_sector
);
generic_make_request
(
split
);
}
}
while
(
split
!=
bio
);
}
...
...
@@ -509,17 +515,17 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
struct
r0conf
*
priv_conf
;
if
(
mddev
->
degraded
!=
1
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: raid5 must be degraded! Degraded disks: %d
\n
"
,
mdname
(
mddev
),
mddev
->
degraded
);
pr
_warn
(
"md/raid0:%s: raid5 must be degraded! Degraded disks: %d
\n
"
,
mdname
(
mddev
),
mddev
->
degraded
);
return
ERR_PTR
(
-
EINVAL
);
}
rdev_for_each
(
rdev
,
mddev
)
{
/* check slot number for a disk */
if
(
rdev
->
raid_disk
==
mddev
->
raid_disks
-
1
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: raid5 must have missing parity disk!
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/raid0:%s: raid5 must have missing parity disk!
\n
"
,
mdname
(
mddev
));
return
ERR_PTR
(
-
EINVAL
);
}
rdev
->
sectors
=
mddev
->
dev_sectors
;
...
...
@@ -533,8 +539,11 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
mddev
->
delta_disks
=
-
1
;
/* make sure it will be not marked as dirty */
mddev
->
recovery_cp
=
MaxSector
;
clear_bit
(
MD_HAS_JOURNAL
,
&
mddev
->
flags
);
clear_bit
(
MD_JOURNAL_CLEAN
,
&
mddev
->
flags
);
create_strip_zones
(
mddev
,
&
priv_conf
);
return
priv_conf
;
}
...
...
@@ -549,19 +558,19 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
* - all mirrors must be already degraded
*/
if
(
mddev
->
layout
!=
((
1
<<
8
)
+
2
))
{
pr
intk
(
KERN_ERR
"md/raid0:%s:: Raid0 cannot takeover layout: 0x%x
\n
"
,
mdname
(
mddev
),
mddev
->
layout
);
pr
_warn
(
"md/raid0:%s:: Raid0 cannot takeover layout: 0x%x
\n
"
,
mdname
(
mddev
),
mddev
->
layout
);
return
ERR_PTR
(
-
EINVAL
);
}
if
(
mddev
->
raid_disks
&
1
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.
\n
"
,
mdname
(
mddev
));
return
ERR_PTR
(
-
EINVAL
);
}
if
(
mddev
->
degraded
!=
(
mddev
->
raid_disks
>>
1
))
{
pr
intk
(
KERN_ERR
"md/raid0:%s: All mirrors must be already degraded!
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/raid0:%s: All mirrors must be already degraded!
\n
"
,
mdname
(
mddev
));
return
ERR_PTR
(
-
EINVAL
);
}
...
...
@@ -574,6 +583,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
mddev
->
degraded
=
0
;
/* make sure it will be not marked as dirty */
mddev
->
recovery_cp
=
MaxSector
;
clear_bit
(
MD_FAILFAST_SUPPORTED
,
&
mddev
->
flags
);
create_strip_zones
(
mddev
,
&
priv_conf
);
return
priv_conf
;
...
...
@@ -588,7 +598,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
* - (N - 1) mirror drives must be already faulty
*/
if
((
mddev
->
raid_disks
-
1
)
!=
mddev
->
degraded
)
{
pr
intk
(
KERN_ERR
"md/raid0:%s: (N - 1) mirrors drives must be already faulty!
\n
"
,
pr
_err
(
"md/raid0:%s: (N - 1) mirrors drives must be already faulty!
\n
"
,
mdname
(
mddev
));
return
ERR_PTR
(
-
EINVAL
);
}
...
...
@@ -616,6 +626,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
mddev
->
raid_disks
=
1
;
/* make sure it will be not marked as dirty */
mddev
->
recovery_cp
=
MaxSector
;
clear_bit
(
MD_FAILFAST_SUPPORTED
,
&
mddev
->
flags
);
create_strip_zones
(
mddev
,
&
priv_conf
);
return
priv_conf
;
...
...
@@ -631,8 +642,8 @@ static void *raid0_takeover(struct mddev *mddev)
*/
if
(
mddev
->
bitmap
)
{
pr
intk
(
KERN_ERR
"md/raid0: %s: cannot takeover array with bitmap
\n
"
,
mdname
(
mddev
));
pr
_warn
(
"md/raid0: %s: cannot takeover array with bitmap
\n
"
,
mdname
(
mddev
));
return
ERR_PTR
(
-
EBUSY
);
}
if
(
mddev
->
level
==
4
)
...
...
@@ -642,8 +653,8 @@ static void *raid0_takeover(struct mddev *mddev)
if
(
mddev
->
layout
==
ALGORITHM_PARITY_N
)
return
raid0_takeover_raid45
(
mddev
);
pr
intk
(
KERN_ERR
"md/raid0:%s: Raid can only takeover Raid5 with layout: %d
\n
"
,
mdname
(
mddev
),
ALGORITHM_PARITY_N
);
pr
_warn
(
"md/raid0:%s: Raid can only takeover Raid5 with layout: %d
\n
"
,
mdname
(
mddev
),
ALGORITHM_PARITY_N
);
}
if
(
mddev
->
level
==
10
)
...
...
@@ -652,7 +663,7 @@ static void *raid0_takeover(struct mddev *mddev)
if
(
mddev
->
level
==
1
)
return
raid0_takeover_raid1
(
mddev
);
pr
intk
(
KERN_ERR
"Takeover from raid%i to raid0 not supported
\n
"
,
pr
_warn
(
"Takeover from raid%i to raid0 not supported
\n
"
,
mddev
->
level
);
return
ERR_PTR
(
-
EINVAL
);
...
...
drivers/md/raid1.c
浏览文件 @
20737738
此差异已折叠。
点击以展开。
drivers/md/raid1.h
浏览文件 @
20737738
...
...
@@ -161,14 +161,15 @@ struct r1bio {
};
/* bits for r1bio.state */
#define R1BIO_Uptodate 0
#define R1BIO_IsSync 1
#define R1BIO_Degraded 2
#define R1BIO_BehindIO 3
enum
r1bio_state
{
R1BIO_Uptodate
,
R1BIO_IsSync
,
R1BIO_Degraded
,
R1BIO_BehindIO
,
/* Set ReadError on bios that experience a readerror so that
* raid1d knows what to do with them.
*/
#define R1BIO_ReadError 4
R1BIO_ReadError
,
/* For write-behind requests, we call bi_end_io when
* the last non-write-behind device completes, providing
* any write was successful. Otherwise we call when
...
...
@@ -176,10 +177,12 @@ struct r1bio {
* with failure when last write completes (and all failed).
* Record that bi_end_io was called with this flag...
*/
#define R1BIO_Returned 6
R1BIO_Returned
,
/* If a write for this request means we can clear some
* known-bad-block records, we set this flag
*/
#define R1BIO_MadeGood 7
#define R1BIO_WriteError 8
R1BIO_MadeGood
,
R1BIO_WriteError
,
R1BIO_FailFast
,
};
#endif
drivers/md/raid10.c
浏览文件 @
20737738
此差异已折叠。
点击以展开。
drivers/md/raid10.h
浏览文件 @
20737738
...
...
@@ -156,5 +156,7 @@ enum r10bio_state {
* flag is set
*/
R10BIO_Previous
,
/* failfast devices did receive failfast requests. */
R10BIO_FailFast
,
};
#endif
drivers/md/raid5-cache.c
浏览文件 @
20737738
此差异已折叠。
点击以展开。
drivers/md/raid5.c
浏览文件 @
20737738
此差异已折叠。
点击以展开。
drivers/md/raid5.h
浏览文件 @
20737738
...
...
@@ -226,6 +226,8 @@ struct stripe_head {
struct
r5l_io_unit
*
log_io
;
struct
list_head
log_list
;
sector_t
log_start
;
/* first meta block on the journal */
struct
list_head
r5c
;
/* for r5c_cache->stripe_in_journal */
/**
* struct stripe_operations
* @target - STRIPE_OP_COMPUTE_BLK target
...
...
@@ -264,6 +266,7 @@ struct stripe_head_state {
int
syncing
,
expanding
,
expanded
,
replacing
;
int
locked
,
uptodate
,
to_read
,
to_write
,
failed
,
written
;
int
to_fill
,
compute
,
req_compute
,
non_overwrite
;
int
injournal
,
just_cached
;
int
failed_num
[
2
];
int
p_failed
,
q_failed
;
int
dec_preread_active
;
...
...
@@ -273,6 +276,7 @@ struct stripe_head_state {
struct
md_rdev
*
blocked_rdev
;
int
handle_bad_blocks
;
int
log_failed
;
int
waiting_extra_page
;
};
/* Flags for struct r5dev.flags */
...
...
@@ -313,6 +317,11 @@ enum r5dev_flags {
*/
R5_Discard
,
/* Discard the stripe */
R5_SkipCopy
,
/* Don't copy data from bio to stripe cache */
R5_InJournal
,
/* data being written is in the journal device.
* if R5_InJournal is set for parity pd_idx, all the
* data and parity being written are in the journal
* device
*/
};
/*
...
...
@@ -345,7 +354,30 @@ enum {
STRIPE_BITMAP_PENDING
,
/* Being added to bitmap, don't add
* to batch yet.
*/
STRIPE_LOG_TRAPPED
,
/* trapped into log */
STRIPE_LOG_TRAPPED
,
/* trapped into log (see raid5-cache.c)
* this bit is used in two scenarios:
*
* 1. write-out phase
* set in first entry of r5l_write_stripe
* clear in second entry of r5l_write_stripe
* used to bypass logic in handle_stripe
*
* 2. caching phase
* set in r5c_try_caching_write()
* clear when journal write is done
* used to initiate r5c_cache_data()
* also used to bypass logic in handle_stripe
*/
STRIPE_R5C_CACHING
,
/* the stripe is in caching phase
* see more detail in the raid5-cache.c
*/
STRIPE_R5C_PARTIAL_STRIPE
,
/* in r5c cache (to-be/being handled or
* in conf->r5c_partial_stripe_list)
*/
STRIPE_R5C_FULL_STRIPE
,
/* in r5c cache (to-be/being handled or
* in conf->r5c_full_stripe_list)
*/
STRIPE_R5C_PREFLUSH
,
/* need to flush journal device */
};
#define STRIPE_EXPAND_SYNC_FLAGS \
...
...
@@ -408,8 +440,86 @@ enum {
struct
disk_info
{
struct
md_rdev
*
rdev
,
*
replacement
;
struct
page
*
extra_page
;
/* extra page to use in prexor */
};
/*
* Stripe cache
*/
#define NR_STRIPES 256
#define STRIPE_SIZE PAGE_SIZE
#define STRIPE_SHIFT (PAGE_SHIFT - 9)
#define STRIPE_SECTORS (STRIPE_SIZE>>9)
#define IO_THRESHOLD 1
#define BYPASS_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1)
#define MAX_STRIPE_BATCH 8
/* bio's attached to a stripe+device for I/O are linked together in bi_sector
* order without overlap. There may be several bio's per stripe+device, and
* a bio could span several devices.
* When walking this list for a particular stripe+device, we must never proceed
* beyond a bio that extends past this device, as the next bio might no longer
* be valid.
* This function is used to determine the 'next' bio in the list, given the
* sector of the current stripe+device
*/
static
inline
struct
bio
*
r5_next_bio
(
struct
bio
*
bio
,
sector_t
sector
)
{
int
sectors
=
bio_sectors
(
bio
);
if
(
bio
->
bi_iter
.
bi_sector
+
sectors
<
sector
+
STRIPE_SECTORS
)
return
bio
->
bi_next
;
else
return
NULL
;
}
/*
* We maintain a biased count of active stripes in the bottom 16 bits of
* bi_phys_segments, and a count of processed stripes in the upper 16 bits
*/
static
inline
int
raid5_bi_processed_stripes
(
struct
bio
*
bio
)
{
atomic_t
*
segments
=
(
atomic_t
*
)
&
bio
->
bi_phys_segments
;
return
(
atomic_read
(
segments
)
>>
16
)
&
0xffff
;
}
static
inline
int
raid5_dec_bi_active_stripes
(
struct
bio
*
bio
)
{
atomic_t
*
segments
=
(
atomic_t
*
)
&
bio
->
bi_phys_segments
;
return
atomic_sub_return
(
1
,
segments
)
&
0xffff
;
}
static
inline
void
raid5_inc_bi_active_stripes
(
struct
bio
*
bio
)
{
atomic_t
*
segments
=
(
atomic_t
*
)
&
bio
->
bi_phys_segments
;
atomic_inc
(
segments
);
}
static
inline
void
raid5_set_bi_processed_stripes
(
struct
bio
*
bio
,
unsigned
int
cnt
)
{
atomic_t
*
segments
=
(
atomic_t
*
)
&
bio
->
bi_phys_segments
;
int
old
,
new
;
do
{
old
=
atomic_read
(
segments
);
new
=
(
old
&
0xffff
)
|
(
cnt
<<
16
);
}
while
(
atomic_cmpxchg
(
segments
,
old
,
new
)
!=
old
);
}
static
inline
void
raid5_set_bi_stripes
(
struct
bio
*
bio
,
unsigned
int
cnt
)
{
atomic_t
*
segments
=
(
atomic_t
*
)
&
bio
->
bi_phys_segments
;
atomic_set
(
segments
,
cnt
);
}
/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
* This is because we sometimes take all the spinlocks
* and creating that much locking depth can cause
...
...
@@ -432,6 +542,30 @@ struct r5worker_group {
int
stripes_cnt
;
};
enum
r5_cache_state
{
R5_INACTIVE_BLOCKED
,
/* release of inactive stripes blocked,
* waiting for 25% to be free
*/
R5_ALLOC_MORE
,
/* It might help to allocate another
* stripe.
*/
R5_DID_ALLOC
,
/* A stripe was allocated, don't allocate
* more until at least one has been
* released. This avoids flooding
* the cache.
*/
R5C_LOG_TIGHT
,
/* log device space tight, need to
* prioritize stripes at last_checkpoint
*/
R5C_LOG_CRITICAL
,
/* log device is running out of space,
* only process stripes that are already
* occupying the log
*/
R5C_EXTRA_PAGE_IN_USE
,
/* a stripe is using disk_info.extra_page
* for prexor
*/
};
struct
r5conf
{
struct
hlist_head
*
stripe_hashtbl
;
/* only protect corresponding hash list and inactive_list */
...
...
@@ -519,23 +653,18 @@ struct r5conf {
*/
atomic_t
active_stripes
;
struct
list_head
inactive_list
[
NR_STRIPE_HASH_LOCKS
];
atomic_t
r5c_cached_full_stripes
;
struct
list_head
r5c_full_stripe_list
;
atomic_t
r5c_cached_partial_stripes
;
struct
list_head
r5c_partial_stripe_list
;
atomic_t
empty_inactive_list_nr
;
struct
llist_head
released_stripes
;
wait_queue_head_t
wait_for_quiescent
;
wait_queue_head_t
wait_for_stripe
;
wait_queue_head_t
wait_for_overlap
;
unsigned
long
cache_state
;
#define R5_INACTIVE_BLOCKED 1
/* release of inactive stripes blocked,
* waiting for 25% to be free
*/
#define R5_ALLOC_MORE 2
/* It might help to allocate another
* stripe.
*/
#define R5_DID_ALLOC 4
/* A stripe was allocated, don't allocate
* more until at least one has been
* released. This avoids flooding
* the cache.
*/
struct
shrinker
shrinker
;
int
pool_size
;
/* number of disks in stripeheads in pool */
spinlock_t
device_lock
;
...
...
@@ -633,4 +762,23 @@ extern void r5l_stripe_write_finished(struct stripe_head *sh);
extern
int
r5l_handle_flush_request
(
struct
r5l_log
*
log
,
struct
bio
*
bio
);
extern
void
r5l_quiesce
(
struct
r5l_log
*
log
,
int
state
);
extern
bool
r5l_log_disk_error
(
struct
r5conf
*
conf
);
extern
bool
r5c_is_writeback
(
struct
r5l_log
*
log
);
extern
int
r5c_try_caching_write
(
struct
r5conf
*
conf
,
struct
stripe_head
*
sh
,
struct
stripe_head_state
*
s
,
int
disks
);
extern
void
r5c_finish_stripe_write_out
(
struct
r5conf
*
conf
,
struct
stripe_head
*
sh
,
struct
stripe_head_state
*
s
);
extern
void
r5c_release_extra_page
(
struct
stripe_head
*
sh
);
extern
void
r5c_use_extra_page
(
struct
stripe_head
*
sh
);
extern
void
r5l_wake_reclaim
(
struct
r5l_log
*
log
,
sector_t
space
);
extern
void
r5c_handle_cached_data_endio
(
struct
r5conf
*
conf
,
struct
stripe_head
*
sh
,
int
disks
,
struct
bio_list
*
return_bi
);
extern
int
r5c_cache_data
(
struct
r5l_log
*
log
,
struct
stripe_head
*
sh
,
struct
stripe_head_state
*
s
);
extern
void
r5c_make_stripe_write_out
(
struct
stripe_head
*
sh
);
extern
void
r5c_flush_cache
(
struct
r5conf
*
conf
,
int
num
);
extern
void
r5c_check_stripe_cache_usage
(
struct
r5conf
*
conf
);
extern
void
r5c_check_cached_full_stripe
(
struct
r5conf
*
conf
);
extern
struct
md_sysfs_entry
r5c_journal_mode
;
#endif
include/uapi/linux/raid/md_p.h
浏览文件 @
20737738
...
...
@@ -84,6 +84,10 @@
#define MD_DISK_CANDIDATE 5
/* disk is added as spare (local) until confirmed
* For clustered enviroments only.
*/
#define MD_DISK_FAILFAST 10
/* Send REQ_FAILFAST if there are multiple
* devices available - and don't try to
* correct read errors.
*/
#define MD_DISK_WRITEMOSTLY 9
/* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
...
...
@@ -265,8 +269,9 @@ struct mdp_superblock_1 {
__le32
dev_number
;
/* permanent identifier of this device - not role in raid */
__le32
cnt_corrected_read
;
/* number of read errors that were corrected by re-writing */
__u8
device_uuid
[
16
];
/* user-space setable, ignored by kernel */
__u8
devflags
;
/* per-device flags. Only
one
defined...*/
__u8
devflags
;
/* per-device flags. Only
two
defined...*/
#define WriteMostly1 1
/* mask for writemostly flag in above */
#define FailFast1 2
/* Should avoid retries and fixups and just fail */
/* Bad block log. If there are any bad blocks the feature flag is set.
* If offset and size are non-zero, that space is reserved and available
*/
...
...
lib/raid6/avx2.c
浏览文件 @
20737738
...
...
@@ -87,9 +87,57 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end
();
}
static
void
raid6_avx21_xor_syndrome
(
int
disks
,
int
start
,
int
stop
,
size_t
bytes
,
void
**
ptrs
)
{
u8
**
dptr
=
(
u8
**
)
ptrs
;
u8
*
p
,
*
q
;
int
d
,
z
,
z0
;
z0
=
stop
;
/* P/Q right side optimization */
p
=
dptr
[
disks
-
2
];
/* XOR parity */
q
=
dptr
[
disks
-
1
];
/* RS syndrome */
kernel_fpu_begin
();
asm
volatile
(
"vmovdqa %0,%%ymm0"
:
:
"m"
(
raid6_avx2_constants
.
x1d
[
0
]));
for
(
d
=
0
;
d
<
bytes
;
d
+=
32
)
{
asm
volatile
(
"vmovdqa %0,%%ymm4"
::
"m"
(
dptr
[
z0
][
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm2"
:
:
"m"
(
p
[
d
]));
asm
volatile
(
"vpxor %ymm4,%ymm2,%ymm2"
);
/* P/Q data pages */
for
(
z
=
z0
-
1
;
z
>=
start
;
z
--
)
{
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vmovdqa %0,%%ymm5"
::
"m"
(
dptr
[
z
][
d
]));
asm
volatile
(
"vpxor %ymm5,%ymm2,%ymm2"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
}
/* P/Q left side optimization */
for
(
z
=
start
-
1
;
z
>=
0
;
z
--
)
{
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
}
asm
volatile
(
"vpxor %0,%%ymm4,%%ymm4"
:
:
"m"
(
q
[
d
]));
/* Don't use movntdq for r/w memory area < cache line */
asm
volatile
(
"vmovdqa %%ymm4,%0"
:
"=m"
(
q
[
d
]));
asm
volatile
(
"vmovdqa %%ymm2,%0"
:
"=m"
(
p
[
d
]));
}
asm
volatile
(
"sfence"
:
:
:
"memory"
);
kernel_fpu_end
();
}
const
struct
raid6_calls
raid6_avx2x1
=
{
raid6_avx21_gen_syndrome
,
NULL
,
/* XOR not yet implemented */
raid6_avx21_xor_syndrome
,
raid6_have_avx2
,
"avx2x1"
,
1
/* Has cache hints */
...
...
@@ -149,9 +197,77 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end
();
}
static
void
raid6_avx22_xor_syndrome
(
int
disks
,
int
start
,
int
stop
,
size_t
bytes
,
void
**
ptrs
)
{
u8
**
dptr
=
(
u8
**
)
ptrs
;
u8
*
p
,
*
q
;
int
d
,
z
,
z0
;
z0
=
stop
;
/* P/Q right side optimization */
p
=
dptr
[
disks
-
2
];
/* XOR parity */
q
=
dptr
[
disks
-
1
];
/* RS syndrome */
kernel_fpu_begin
();
asm
volatile
(
"vmovdqa %0,%%ymm0"
:
:
"m"
(
raid6_avx2_constants
.
x1d
[
0
]));
for
(
d
=
0
;
d
<
bytes
;
d
+=
64
)
{
asm
volatile
(
"vmovdqa %0,%%ymm4"
::
"m"
(
dptr
[
z0
][
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm6"
::
"m"
(
dptr
[
z0
][
d
+
32
]));
asm
volatile
(
"vmovdqa %0,%%ymm2"
:
:
"m"
(
p
[
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm3"
:
:
"m"
(
p
[
d
+
32
]));
asm
volatile
(
"vpxor %ymm4,%ymm2,%ymm2"
);
asm
volatile
(
"vpxor %ymm6,%ymm3,%ymm3"
);
/* P/Q data pages */
for
(
z
=
z0
-
1
;
z
>=
start
;
z
--
)
{
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm7,%ymm7,%ymm7"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm6,%ymm7,%ymm7"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpaddb %ymm6,%ymm6,%ymm6"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpand %ymm0,%ymm7,%ymm7"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
asm
volatile
(
"vmovdqa %0,%%ymm5"
::
"m"
(
dptr
[
z
][
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm7"
::
"m"
(
dptr
[
z
][
d
+
32
]));
asm
volatile
(
"vpxor %ymm5,%ymm2,%ymm2"
);
asm
volatile
(
"vpxor %ymm7,%ymm3,%ymm3"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
}
/* P/Q left side optimization */
for
(
z
=
start
-
1
;
z
>=
0
;
z
--
)
{
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm7,%ymm7,%ymm7"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm6,%ymm7,%ymm7"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpaddb %ymm6,%ymm6,%ymm6"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpand %ymm0,%ymm7,%ymm7"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
}
asm
volatile
(
"vpxor %0,%%ymm4,%%ymm4"
:
:
"m"
(
q
[
d
]));
asm
volatile
(
"vpxor %0,%%ymm6,%%ymm6"
:
:
"m"
(
q
[
d
+
32
]));
/* Don't use movntdq for r/w memory area < cache line */
asm
volatile
(
"vmovdqa %%ymm4,%0"
:
"=m"
(
q
[
d
]));
asm
volatile
(
"vmovdqa %%ymm6,%0"
:
"=m"
(
q
[
d
+
32
]));
asm
volatile
(
"vmovdqa %%ymm2,%0"
:
"=m"
(
p
[
d
]));
asm
volatile
(
"vmovdqa %%ymm3,%0"
:
"=m"
(
p
[
d
+
32
]));
}
asm
volatile
(
"sfence"
:
:
:
"memory"
);
kernel_fpu_end
();
}
const
struct
raid6_calls
raid6_avx2x2
=
{
raid6_avx22_gen_syndrome
,
NULL
,
/* XOR not yet implemented */
raid6_avx22_xor_syndrome
,
raid6_have_avx2
,
"avx2x2"
,
1
/* Has cache hints */
...
...
@@ -242,9 +358,119 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end
();
}
static
void
raid6_avx24_xor_syndrome
(
int
disks
,
int
start
,
int
stop
,
size_t
bytes
,
void
**
ptrs
)
{
u8
**
dptr
=
(
u8
**
)
ptrs
;
u8
*
p
,
*
q
;
int
d
,
z
,
z0
;
z0
=
stop
;
/* P/Q right side optimization */
p
=
dptr
[
disks
-
2
];
/* XOR parity */
q
=
dptr
[
disks
-
1
];
/* RS syndrome */
kernel_fpu_begin
();
asm
volatile
(
"vmovdqa %0,%%ymm0"
::
"m"
(
raid6_avx2_constants
.
x1d
[
0
]));
for
(
d
=
0
;
d
<
bytes
;
d
+=
128
)
{
asm
volatile
(
"vmovdqa %0,%%ymm4"
::
"m"
(
dptr
[
z0
][
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm6"
::
"m"
(
dptr
[
z0
][
d
+
32
]));
asm
volatile
(
"vmovdqa %0,%%ymm12"
::
"m"
(
dptr
[
z0
][
d
+
64
]));
asm
volatile
(
"vmovdqa %0,%%ymm14"
::
"m"
(
dptr
[
z0
][
d
+
96
]));
asm
volatile
(
"vmovdqa %0,%%ymm2"
:
:
"m"
(
p
[
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm3"
:
:
"m"
(
p
[
d
+
32
]));
asm
volatile
(
"vmovdqa %0,%%ymm10"
:
:
"m"
(
p
[
d
+
64
]));
asm
volatile
(
"vmovdqa %0,%%ymm11"
:
:
"m"
(
p
[
d
+
96
]));
asm
volatile
(
"vpxor %ymm4,%ymm2,%ymm2"
);
asm
volatile
(
"vpxor %ymm6,%ymm3,%ymm3"
);
asm
volatile
(
"vpxor %ymm12,%ymm10,%ymm10"
);
asm
volatile
(
"vpxor %ymm14,%ymm11,%ymm11"
);
/* P/Q data pages */
for
(
z
=
z0
-
1
;
z
>=
start
;
z
--
)
{
asm
volatile
(
"prefetchnta %0"
::
"m"
(
dptr
[
z
][
d
]));
asm
volatile
(
"prefetchnta %0"
::
"m"
(
dptr
[
z
][
d
+
64
]));
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm7,%ymm7,%ymm7"
);
asm
volatile
(
"vpxor %ymm13,%ymm13,%ymm13"
);
asm
volatile
(
"vpxor %ymm15,%ymm15,%ymm15"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm6,%ymm7,%ymm7"
);
asm
volatile
(
"vpcmpgtb %ymm12,%ymm13,%ymm13"
);
asm
volatile
(
"vpcmpgtb %ymm14,%ymm15,%ymm15"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpaddb %ymm6,%ymm6,%ymm6"
);
asm
volatile
(
"vpaddb %ymm12,%ymm12,%ymm12"
);
asm
volatile
(
"vpaddb %ymm14,%ymm14,%ymm14"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpand %ymm0,%ymm7,%ymm7"
);
asm
volatile
(
"vpand %ymm0,%ymm13,%ymm13"
);
asm
volatile
(
"vpand %ymm0,%ymm15,%ymm15"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
asm
volatile
(
"vpxor %ymm13,%ymm12,%ymm12"
);
asm
volatile
(
"vpxor %ymm15,%ymm14,%ymm14"
);
asm
volatile
(
"vmovdqa %0,%%ymm5"
::
"m"
(
dptr
[
z
][
d
]));
asm
volatile
(
"vmovdqa %0,%%ymm7"
::
"m"
(
dptr
[
z
][
d
+
32
]));
asm
volatile
(
"vmovdqa %0,%%ymm13"
::
"m"
(
dptr
[
z
][
d
+
64
]));
asm
volatile
(
"vmovdqa %0,%%ymm15"
::
"m"
(
dptr
[
z
][
d
+
96
]));
asm
volatile
(
"vpxor %ymm5,%ymm2,%ymm2"
);
asm
volatile
(
"vpxor %ymm7,%ymm3,%ymm3"
);
asm
volatile
(
"vpxor %ymm13,%ymm10,%ymm10"
);
asm
volatile
(
"vpxor %ymm15,%ymm11,%ymm11"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
asm
volatile
(
"vpxor %ymm13,%ymm12,%ymm12"
);
asm
volatile
(
"vpxor %ymm15,%ymm14,%ymm14"
);
}
asm
volatile
(
"prefetchnta %0"
::
"m"
(
q
[
d
]));
asm
volatile
(
"prefetchnta %0"
::
"m"
(
q
[
d
+
64
]));
/* P/Q left side optimization */
for
(
z
=
start
-
1
;
z
>=
0
;
z
--
)
{
asm
volatile
(
"vpxor %ymm5,%ymm5,%ymm5"
);
asm
volatile
(
"vpxor %ymm7,%ymm7,%ymm7"
);
asm
volatile
(
"vpxor %ymm13,%ymm13,%ymm13"
);
asm
volatile
(
"vpxor %ymm15,%ymm15,%ymm15"
);
asm
volatile
(
"vpcmpgtb %ymm4,%ymm5,%ymm5"
);
asm
volatile
(
"vpcmpgtb %ymm6,%ymm7,%ymm7"
);
asm
volatile
(
"vpcmpgtb %ymm12,%ymm13,%ymm13"
);
asm
volatile
(
"vpcmpgtb %ymm14,%ymm15,%ymm15"
);
asm
volatile
(
"vpaddb %ymm4,%ymm4,%ymm4"
);
asm
volatile
(
"vpaddb %ymm6,%ymm6,%ymm6"
);
asm
volatile
(
"vpaddb %ymm12,%ymm12,%ymm12"
);
asm
volatile
(
"vpaddb %ymm14,%ymm14,%ymm14"
);
asm
volatile
(
"vpand %ymm0,%ymm5,%ymm5"
);
asm
volatile
(
"vpand %ymm0,%ymm7,%ymm7"
);
asm
volatile
(
"vpand %ymm0,%ymm13,%ymm13"
);
asm
volatile
(
"vpand %ymm0,%ymm15,%ymm15"
);
asm
volatile
(
"vpxor %ymm5,%ymm4,%ymm4"
);
asm
volatile
(
"vpxor %ymm7,%ymm6,%ymm6"
);
asm
volatile
(
"vpxor %ymm13,%ymm12,%ymm12"
);
asm
volatile
(
"vpxor %ymm15,%ymm14,%ymm14"
);
}
asm
volatile
(
"vmovntdq %%ymm2,%0"
:
"=m"
(
p
[
d
]));
asm
volatile
(
"vmovntdq %%ymm3,%0"
:
"=m"
(
p
[
d
+
32
]));
asm
volatile
(
"vmovntdq %%ymm10,%0"
:
"=m"
(
p
[
d
+
64
]));
asm
volatile
(
"vmovntdq %%ymm11,%0"
:
"=m"
(
p
[
d
+
96
]));
asm
volatile
(
"vpxor %0,%%ymm4,%%ymm4"
:
:
"m"
(
q
[
d
]));
asm
volatile
(
"vpxor %0,%%ymm6,%%ymm6"
:
:
"m"
(
q
[
d
+
32
]));
asm
volatile
(
"vpxor %0,%%ymm12,%%ymm12"
:
:
"m"
(
q
[
d
+
64
]));
asm
volatile
(
"vpxor %0,%%ymm14,%%ymm14"
:
:
"m"
(
q
[
d
+
96
]));
asm
volatile
(
"vmovntdq %%ymm4,%0"
:
"=m"
(
q
[
d
]));
asm
volatile
(
"vmovntdq %%ymm6,%0"
:
"=m"
(
q
[
d
+
32
]));
asm
volatile
(
"vmovntdq %%ymm12,%0"
:
"=m"
(
q
[
d
+
64
]));
asm
volatile
(
"vmovntdq %%ymm14,%0"
:
"=m"
(
q
[
d
+
96
]));
}
asm
volatile
(
"sfence"
:
:
:
"memory"
);
kernel_fpu_end
();
}
const
struct
raid6_calls
raid6_avx2x4
=
{
raid6_avx24_gen_syndrome
,
NULL
,
/* XOR not yet implemented */
raid6_avx24_xor_syndrome
,
raid6_have_avx2
,
"avx2x4"
,
1
/* Has cache hints */
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录