Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
4c7a08c8
K
Kernel
项目概览
openeuler
/
Kernel
接近 2 年 前同步成功
通知
8
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
4c7a08c8
编写于
2月 19, 2013
作者:
A
Alex Elder
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'testing' of github.com:ceph/ceph-client into into linux-3.8-ceph
上级
19f949f5
903bb32e
变更
26
隐藏空白更改
内联
并排
Showing
26 changed file
with
1756 addition
and
1125 deletion
+1756
-1125
drivers/block/rbd.c
drivers/block/rbd.c
+1087
-686
fs/ceph/addr.c
fs/ceph/addr.c
+3
-4
fs/ceph/caps.c
fs/ceph/caps.c
+25
-7
fs/ceph/file.c
fs/ceph/file.c
+7
-1
fs/ceph/ioctl.c
fs/ceph/ioctl.c
+1
-1
fs/ceph/mds_client.c
fs/ceph/mds_client.c
+31
-2
fs/ceph/mds_client.h
fs/ceph/mds_client.h
+6
-0
fs/ceph/strings.c
fs/ceph/strings.c
+4
-0
fs/ceph/super.h
fs/ceph/super.h
+1
-7
fs/ceph/xattr.c
fs/ceph/xattr.c
+163
-47
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_features.h
+7
-1
include/linux/ceph/ceph_fs.h
include/linux/ceph/ceph_fs.h
+24
-8
include/linux/ceph/decode.h
include/linux/ceph/decode.h
+17
-12
include/linux/ceph/libceph.h
include/linux/ceph/libceph.h
+9
-7
include/linux/ceph/messenger.h
include/linux/ceph/messenger.h
+2
-0
include/linux/ceph/osd_client.h
include/linux/ceph/osd_client.h
+19
-35
include/linux/ceph/osdmap.h
include/linux/ceph/osdmap.h
+1
-1
include/linux/ceph/rados.h
include/linux/ceph/rados.h
+73
-20
include/linux/crush/crush.h
include/linux/crush/crush.h
+2
-0
net/ceph/ceph_common.c
net/ceph/ceph_common.c
+16
-0
net/ceph/ceph_strings.c
net/ceph/ceph_strings.c
+39
-0
net/ceph/crush/mapper.c
net/ceph/crush/mapper.c
+11
-4
net/ceph/messenger.c
net/ceph/messenger.c
+4
-1
net/ceph/osd_client.c
net/ceph/osd_client.c
+173
-245
net/ceph/osdmap.c
net/ceph/osdmap.c
+20
-23
net/ceph/pagevec.c
net/ceph/pagevec.c
+11
-13
未找到文件。
drivers/block/rbd.c
浏览文件 @
4c7a08c8
...
@@ -52,9 +52,12 @@
...
@@ -52,9 +52,12 @@
#define SECTOR_SHIFT 9
#define SECTOR_SHIFT 9
#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
/* It might be useful to have th
is defined elsewhere too
*/
/* It might be useful to have th
ese defined elsewhere
*/
#define U64_MAX ((u64) (~0ULL))
#define U8_MAX ((u8) (~0U))
#define U16_MAX ((u16) (~0U))
#define U32_MAX ((u32) (~0U))
#define U64_MAX ((u64) (~0ULL))
#define RBD_DRV_NAME "rbd"
#define RBD_DRV_NAME "rbd"
#define RBD_DRV_NAME_LONG "rbd (rados block device)"
#define RBD_DRV_NAME_LONG "rbd (rados block device)"
...
@@ -66,7 +69,6 @@
...
@@ -66,7 +69,6 @@
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
#define RBD_MAX_SNAP_COUNT 510
/* allows max snapc to fit in 4KB */
#define RBD_MAX_SNAP_COUNT 510
/* allows max snapc to fit in 4KB */
#define RBD_MAX_OPT_LEN 1024
#define RBD_SNAP_HEAD_NAME "-"
#define RBD_SNAP_HEAD_NAME "-"
...
@@ -93,8 +95,6 @@
...
@@ -93,8 +95,6 @@
#define DEV_NAME_LEN 32
#define DEV_NAME_LEN 32
#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
#define RBD_READ_ONLY_DEFAULT false
/*
/*
* block device image metadata (in-memory version)
* block device image metadata (in-memory version)
*/
*/
...
@@ -119,16 +119,33 @@ struct rbd_image_header {
...
@@ -119,16 +119,33 @@ struct rbd_image_header {
* An rbd image specification.
* An rbd image specification.
*
*
* The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
* The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
* identify an image.
* identify an image. Each rbd_dev structure includes a pointer to
* an rbd_spec structure that encapsulates this identity.
*
* Each of the id's in an rbd_spec has an associated name. For a
* user-mapped image, the names are supplied and the id's associated
* with them are looked up. For a layered image, a parent image is
* defined by the tuple, and the names are looked up.
*
* An rbd_dev structure contains a parent_spec pointer which is
* non-null if the image it represents is a child in a layered
* image. This pointer will refer to the rbd_spec structure used
* by the parent rbd_dev for its own identity (i.e., the structure
* is shared between the parent and child).
*
* Since these structures are populated once, during the discovery
* phase of image construction, they are effectively immutable so
* we make no effort to synchronize access to them.
*
* Note that code herein does not assume the image name is known (it
* could be a null pointer).
*/
*/
struct
rbd_spec
{
struct
rbd_spec
{
u64
pool_id
;
u64
pool_id
;
char
*
pool_name
;
char
*
pool_name
;
char
*
image_id
;
char
*
image_id
;
size_t
image_id_len
;
char
*
image_name
;
char
*
image_name
;
size_t
image_name_len
;
u64
snap_id
;
u64
snap_id
;
char
*
snap_name
;
char
*
snap_name
;
...
@@ -136,10 +153,6 @@ struct rbd_spec {
...
@@ -136,10 +153,6 @@ struct rbd_spec {
struct
kref
kref
;
struct
kref
kref
;
};
};
struct
rbd_options
{
bool
read_only
;
};
/*
/*
* an instance of the client. multiple devices may share an rbd client.
* an instance of the client. multiple devices may share an rbd client.
*/
*/
...
@@ -149,37 +162,76 @@ struct rbd_client {
...
@@ -149,37 +162,76 @@ struct rbd_client {
struct
list_head
node
;
struct
list_head
node
;
};
};
/*
struct
rbd_img_request
;
* a request completion status
typedef
void
(
*
rbd_img_callback_t
)(
struct
rbd_img_request
*
);
*/
struct
rbd_req_status
{
#define BAD_WHICH U32_MAX
/* Good which or bad which, which? */
int
done
;
int
rc
;
struct
rbd_obj_request
;
u64
bytes
;
typedef
void
(
*
rbd_obj_callback_t
)(
struct
rbd_obj_request
*
);
enum
obj_request_type
{
OBJ_REQUEST_NODATA
,
OBJ_REQUEST_BIO
,
OBJ_REQUEST_PAGES
};
};
/*
struct
rbd_obj_request
{
* a collection of requests
const
char
*
object_name
;
*/
u64
offset
;
/* object start byte */
struct
rbd_req_coll
{
u64
length
;
/* bytes from offset */
int
total
;
int
num_done
;
struct
rbd_img_request
*
img_request
;
struct
list_head
links
;
/* img_request->obj_requests */
u32
which
;
/* posn image request list */
enum
obj_request_type
type
;
union
{
struct
bio
*
bio_list
;
struct
{
struct
page
**
pages
;
u32
page_count
;
};
};
struct
ceph_osd_request
*
osd_req
;
u64
xferred
;
/* bytes transferred */
u64
version
;
s32
result
;
atomic_t
done
;
rbd_obj_callback_t
callback
;
struct
completion
completion
;
struct
kref
kref
;
struct
kref
kref
;
struct
rbd_req_status
status
[
0
];
};
};
/*
struct
rbd_img_request
{
* a single io request
struct
request
*
rq
;
*/
struct
rbd_device
*
rbd_dev
;
struct
rbd_request
{
u64
offset
;
/* starting image byte offset */
struct
request
*
rq
;
/* blk layer request */
u64
length
;
/* byte count from offset */
struct
bio
*
bio
;
/* cloned bio */
bool
write_request
;
/* false for read */
struct
page
**
pages
;
/* list of used pages */
union
{
u64
len
;
struct
ceph_snap_context
*
snapc
;
/* for writes */
int
coll_index
;
u64
snap_id
;
/* for reads */
struct
rbd_req_coll
*
coll
;
};
spinlock_t
completion_lock
;
/* protects next_completion */
u32
next_completion
;
rbd_img_callback_t
callback
;
u32
obj_request_count
;
struct
list_head
obj_requests
;
/* rbd_obj_request structs */
struct
kref
kref
;
};
};
#define for_each_obj_request(ireq, oreq) \
list_for_each_entry(oreq, &(ireq)->obj_requests, links)
#define for_each_obj_request_from(ireq, oreq) \
list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
#define for_each_obj_request_safe(ireq, oreq, n) \
list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
struct
rbd_snap
{
struct
rbd_snap
{
struct
device
dev
;
struct
device
dev
;
const
char
*
name
;
const
char
*
name
;
...
@@ -209,16 +261,18 @@ struct rbd_device {
...
@@ -209,16 +261,18 @@ struct rbd_device {
char
name
[
DEV_NAME_LEN
];
/* blkdev name, e.g. rbd3 */
char
name
[
DEV_NAME_LEN
];
/* blkdev name, e.g. rbd3 */
spinlock_t
lock
;
/* queue
lock
*/
spinlock_t
lock
;
/* queue
, flags, open_count
*/
struct
rbd_image_header
header
;
struct
rbd_image_header
header
;
bool
exists
;
unsigned
long
flags
;
/* possibly lock protected */
struct
rbd_spec
*
spec
;
struct
rbd_spec
*
spec
;
char
*
header_name
;
char
*
header_name
;
struct
ceph_file_layout
layout
;
struct
ceph_osd_event
*
watch_event
;
struct
ceph_osd_event
*
watch_event
;
struct
ceph_osd_request
*
watch_request
;
struct
rbd_obj_request
*
watch_request
;
struct
rbd_spec
*
parent_spec
;
struct
rbd_spec
*
parent_spec
;
u64
parent_overlap
;
u64
parent_overlap
;
...
@@ -235,7 +289,19 @@ struct rbd_device {
...
@@ -235,7 +289,19 @@ struct rbd_device {
/* sysfs related */
/* sysfs related */
struct
device
dev
;
struct
device
dev
;
unsigned
long
open_count
;
unsigned
long
open_count
;
/* protected by lock */
};
/*
* Flag bits for rbd_dev->flags. If atomicity is required,
* rbd_dev->lock is used to protect access.
*
* Currently, only the "removing" flag (which is coupled with the
* "open_count" field) requires atomic access.
*/
enum
rbd_dev_flags
{
RBD_DEV_FLAG_EXISTS
,
/* mapped snapshot has not been deleted */
RBD_DEV_FLAG_REMOVING
,
/* this mapping is being removed */
};
};
static
DEFINE_MUTEX
(
ctl_mutex
);
/* Serialize open/close/setup/teardown */
static
DEFINE_MUTEX
(
ctl_mutex
);
/* Serialize open/close/setup/teardown */
...
@@ -277,6 +343,33 @@ static struct device rbd_root_dev = {
...
@@ -277,6 +343,33 @@ static struct device rbd_root_dev = {
.
release
=
rbd_root_dev_release
,
.
release
=
rbd_root_dev_release
,
};
};
static
__printf
(
2
,
3
)
void
rbd_warn
(
struct
rbd_device
*
rbd_dev
,
const
char
*
fmt
,
...)
{
struct
va_format
vaf
;
va_list
args
;
va_start
(
args
,
fmt
);
vaf
.
fmt
=
fmt
;
vaf
.
va
=
&
args
;
if
(
!
rbd_dev
)
printk
(
KERN_WARNING
"%s: %pV
\n
"
,
RBD_DRV_NAME
,
&
vaf
);
else
if
(
rbd_dev
->
disk
)
printk
(
KERN_WARNING
"%s: %s: %pV
\n
"
,
RBD_DRV_NAME
,
rbd_dev
->
disk
->
disk_name
,
&
vaf
);
else
if
(
rbd_dev
->
spec
&&
rbd_dev
->
spec
->
image_name
)
printk
(
KERN_WARNING
"%s: image %s: %pV
\n
"
,
RBD_DRV_NAME
,
rbd_dev
->
spec
->
image_name
,
&
vaf
);
else
if
(
rbd_dev
->
spec
&&
rbd_dev
->
spec
->
image_id
)
printk
(
KERN_WARNING
"%s: id %s: %pV
\n
"
,
RBD_DRV_NAME
,
rbd_dev
->
spec
->
image_id
,
&
vaf
);
else
/* punt */
printk
(
KERN_WARNING
"%s: rbd_dev %p: %pV
\n
"
,
RBD_DRV_NAME
,
rbd_dev
,
&
vaf
);
va_end
(
args
);
}
#ifdef RBD_DEBUG
#ifdef RBD_DEBUG
#define rbd_assert(expr) \
#define rbd_assert(expr) \
if (unlikely(!(expr))) { \
if (unlikely(!(expr))) { \
...
@@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
...
@@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
static
int
rbd_open
(
struct
block_device
*
bdev
,
fmode_t
mode
)
static
int
rbd_open
(
struct
block_device
*
bdev
,
fmode_t
mode
)
{
{
struct
rbd_device
*
rbd_dev
=
bdev
->
bd_disk
->
private_data
;
struct
rbd_device
*
rbd_dev
=
bdev
->
bd_disk
->
private_data
;
bool
removing
=
false
;
if
((
mode
&
FMODE_WRITE
)
&&
rbd_dev
->
mapping
.
read_only
)
if
((
mode
&
FMODE_WRITE
)
&&
rbd_dev
->
mapping
.
read_only
)
return
-
EROFS
;
return
-
EROFS
;
spin_lock_irq
(
&
rbd_dev
->
lock
);
if
(
test_bit
(
RBD_DEV_FLAG_REMOVING
,
&
rbd_dev
->
flags
))
removing
=
true
;
else
rbd_dev
->
open_count
++
;
spin_unlock_irq
(
&
rbd_dev
->
lock
);
if
(
removing
)
return
-
ENOENT
;
mutex_lock_nested
(
&
ctl_mutex
,
SINGLE_DEPTH_NESTING
);
mutex_lock_nested
(
&
ctl_mutex
,
SINGLE_DEPTH_NESTING
);
(
void
)
get_device
(
&
rbd_dev
->
dev
);
(
void
)
get_device
(
&
rbd_dev
->
dev
);
set_device_ro
(
bdev
,
rbd_dev
->
mapping
.
read_only
);
set_device_ro
(
bdev
,
rbd_dev
->
mapping
.
read_only
);
rbd_dev
->
open_count
++
;
mutex_unlock
(
&
ctl_mutex
);
mutex_unlock
(
&
ctl_mutex
);
return
0
;
return
0
;
...
@@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
...
@@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
static
int
rbd_release
(
struct
gendisk
*
disk
,
fmode_t
mode
)
static
int
rbd_release
(
struct
gendisk
*
disk
,
fmode_t
mode
)
{
{
struct
rbd_device
*
rbd_dev
=
disk
->
private_data
;
struct
rbd_device
*
rbd_dev
=
disk
->
private_data
;
unsigned
long
open_count_before
;
spin_lock_irq
(
&
rbd_dev
->
lock
);
open_count_before
=
rbd_dev
->
open_count
--
;
spin_unlock_irq
(
&
rbd_dev
->
lock
);
rbd_assert
(
open_count_before
>
0
);
mutex_lock_nested
(
&
ctl_mutex
,
SINGLE_DEPTH_NESTING
);
mutex_lock_nested
(
&
ctl_mutex
,
SINGLE_DEPTH_NESTING
);
rbd_assert
(
rbd_dev
->
open_count
>
0
);
rbd_dev
->
open_count
--
;
put_device
(
&
rbd_dev
->
dev
);
put_device
(
&
rbd_dev
->
dev
);
mutex_unlock
(
&
ctl_mutex
);
mutex_unlock
(
&
ctl_mutex
);
...
@@ -426,6 +532,12 @@ static match_table_t rbd_opts_tokens = {
...
@@ -426,6 +532,12 @@ static match_table_t rbd_opts_tokens = {
{
-
1
,
NULL
}
{
-
1
,
NULL
}
};
};
struct
rbd_options
{
bool
read_only
;
};
#define RBD_READ_ONLY_DEFAULT false
static
int
parse_rbd_opts_token
(
char
*
c
,
void
*
private
)
static
int
parse_rbd_opts_token
(
char
*
c
,
void
*
private
)
{
{
struct
rbd_options
*
rbd_opts
=
private
;
struct
rbd_options
*
rbd_opts
=
private
;
...
@@ -512,18 +624,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
...
@@ -512,18 +624,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
kref_put
(
&
rbdc
->
kref
,
rbd_client_release
);
kref_put
(
&
rbdc
->
kref
,
rbd_client_release
);
}
}
/*
* Destroy requests collection
*/
static
void
rbd_coll_release
(
struct
kref
*
kref
)
{
struct
rbd_req_coll
*
coll
=
container_of
(
kref
,
struct
rbd_req_coll
,
kref
);
dout
(
"rbd_coll_release %p
\n
"
,
coll
);
kfree
(
coll
);
}
static
bool
rbd_image_format_valid
(
u32
image_format
)
static
bool
rbd_image_format_valid
(
u32
image_format
)
{
{
return
image_format
==
1
||
image_format
==
2
;
return
image_format
==
1
||
image_format
==
2
;
...
@@ -707,7 +807,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
...
@@ -707,7 +807,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
goto
done
;
goto
done
;
rbd_dev
->
mapping
.
read_only
=
true
;
rbd_dev
->
mapping
.
read_only
=
true
;
}
}
rbd_dev
->
exists
=
true
;
set_bit
(
RBD_DEV_FLAG_EXISTS
,
&
rbd_dev
->
flags
);
done:
done:
return
ret
;
return
ret
;
}
}
...
@@ -724,7 +825,7 @@ static void rbd_header_free(struct rbd_image_header *header)
...
@@ -724,7 +825,7 @@ static void rbd_header_free(struct rbd_image_header *header)
header
->
snapc
=
NULL
;
header
->
snapc
=
NULL
;
}
}
static
char
*
rbd_segment_name
(
struct
rbd_device
*
rbd_dev
,
u64
offset
)
static
c
onst
c
har
*
rbd_segment_name
(
struct
rbd_device
*
rbd_dev
,
u64
offset
)
{
{
char
*
name
;
char
*
name
;
u64
segment
;
u64
segment
;
...
@@ -767,23 +868,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
...
@@ -767,23 +868,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
return
length
;
return
length
;
}
}
static
int
rbd_get_num_segments
(
struct
rbd_image_header
*
header
,
u64
ofs
,
u64
len
)
{
u64
start_seg
;
u64
end_seg
;
if
(
!
len
)
return
0
;
if
(
len
-
1
>
U64_MAX
-
ofs
)
return
-
ERANGE
;
start_seg
=
ofs
>>
header
->
obj_order
;
end_seg
=
(
ofs
+
len
-
1
)
>>
header
->
obj_order
;
return
end_seg
-
start_seg
+
1
;
}
/*
/*
* returns the size of an object in the image
* returns the size of an object in the image
*/
*/
...
@@ -949,8 +1033,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
...
@@ -949,8 +1033,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
unsigned
int
bi_size
;
unsigned
int
bi_size
;
struct
bio
*
bio
;
struct
bio
*
bio
;
if
(
!
bi
)
if
(
!
bi
)
{
rbd_warn
(
NULL
,
"bio_chain exhausted with %u left"
,
len
);
goto
out_err
;
/* EINVAL; ran out of bio's */
goto
out_err
;
/* EINVAL; ran out of bio's */
}
bi_size
=
min_t
(
unsigned
int
,
bi
->
bi_size
-
off
,
len
);
bi_size
=
min_t
(
unsigned
int
,
bi
->
bi_size
-
off
,
len
);
bio
=
bio_clone_range
(
bi
,
off
,
bi_size
,
gfpmask
);
bio
=
bio_clone_range
(
bi
,
off
,
bi_size
,
gfpmask
);
if
(
!
bio
)
if
(
!
bio
)
...
@@ -976,399 +1062,665 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
...
@@ -976,399 +1062,665 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
return
NULL
;
return
NULL
;
}
}
/*
static
void
rbd_obj_request_get
(
struct
rbd_obj_request
*
obj_request
)
* helpers for osd request op vectors.
*/
static
struct
ceph_osd_req_op
*
rbd_create_rw_ops
(
int
num_ops
,
int
opcode
,
u32
payload_len
)
{
{
struct
ceph_osd_req_op
*
ops
;
kref_get
(
&
obj_request
->
kref
);
}
ops
=
kzalloc
(
sizeof
(
*
ops
)
*
(
num_ops
+
1
),
GFP_NOIO
);
static
void
rbd_obj_request_destroy
(
struct
kref
*
kref
);
if
(
!
ops
)
static
void
rbd_obj_request_put
(
struct
rbd_obj_request
*
obj_request
)
{
rbd_assert
(
obj_request
!=
NULL
);
kref_put
(
&
obj_request
->
kref
,
rbd_obj_request_destroy
);
}
static
void
rbd_img_request_get
(
struct
rbd_img_request
*
img_request
)
{
kref_get
(
&
img_request
->
kref
);
}
static
void
rbd_img_request_destroy
(
struct
kref
*
kref
);
static
void
rbd_img_request_put
(
struct
rbd_img_request
*
img_request
)
{
rbd_assert
(
img_request
!=
NULL
);
kref_put
(
&
img_request
->
kref
,
rbd_img_request_destroy
);
}
static
inline
void
rbd_img_obj_request_add
(
struct
rbd_img_request
*
img_request
,
struct
rbd_obj_request
*
obj_request
)
{
rbd_assert
(
obj_request
->
img_request
==
NULL
);
rbd_obj_request_get
(
obj_request
);
obj_request
->
img_request
=
img_request
;
obj_request
->
which
=
img_request
->
obj_request_count
;
rbd_assert
(
obj_request
->
which
!=
BAD_WHICH
);
img_request
->
obj_request_count
++
;
list_add_tail
(
&
obj_request
->
links
,
&
img_request
->
obj_requests
);
}
static
inline
void
rbd_img_obj_request_del
(
struct
rbd_img_request
*
img_request
,
struct
rbd_obj_request
*
obj_request
)
{
rbd_assert
(
obj_request
->
which
!=
BAD_WHICH
);
list_del
(
&
obj_request
->
links
);
rbd_assert
(
img_request
->
obj_request_count
>
0
);
img_request
->
obj_request_count
--
;
rbd_assert
(
obj_request
->
which
==
img_request
->
obj_request_count
);
obj_request
->
which
=
BAD_WHICH
;
rbd_assert
(
obj_request
->
img_request
==
img_request
);
obj_request
->
img_request
=
NULL
;
obj_request
->
callback
=
NULL
;
rbd_obj_request_put
(
obj_request
);
}
static
bool
obj_request_type_valid
(
enum
obj_request_type
type
)
{
switch
(
type
)
{
case
OBJ_REQUEST_NODATA
:
case
OBJ_REQUEST_BIO
:
case
OBJ_REQUEST_PAGES
:
return
true
;
default:
return
false
;
}
}
struct
ceph_osd_req_op
*
rbd_osd_req_op_create
(
u16
opcode
,
...)
{
struct
ceph_osd_req_op
*
op
;
va_list
args
;
size_t
size
;
op
=
kzalloc
(
sizeof
(
*
op
),
GFP_NOIO
);
if
(
!
op
)
return
NULL
;
return
NULL
;
op
->
op
=
opcode
;
va_start
(
args
,
opcode
);
switch
(
opcode
)
{
case
CEPH_OSD_OP_READ
:
case
CEPH_OSD_OP_WRITE
:
/* rbd_osd_req_op_create(READ, offset, length) */
/* rbd_osd_req_op_create(WRITE, offset, length) */
op
->
extent
.
offset
=
va_arg
(
args
,
u64
);
op
->
extent
.
length
=
va_arg
(
args
,
u64
);
if
(
opcode
==
CEPH_OSD_OP_WRITE
)
op
->
payload_len
=
op
->
extent
.
length
;
break
;
case
CEPH_OSD_OP_STAT
:
break
;
case
CEPH_OSD_OP_CALL
:
/* rbd_osd_req_op_create(CALL, class, method, data, datalen) */
op
->
cls
.
class_name
=
va_arg
(
args
,
char
*
);
size
=
strlen
(
op
->
cls
.
class_name
);
rbd_assert
(
size
<=
(
size_t
)
U8_MAX
);
op
->
cls
.
class_len
=
size
;
op
->
payload_len
=
size
;
op
->
cls
.
method_name
=
va_arg
(
args
,
char
*
);
size
=
strlen
(
op
->
cls
.
method_name
);
rbd_assert
(
size
<=
(
size_t
)
U8_MAX
);
op
->
cls
.
method_len
=
size
;
op
->
payload_len
+=
size
;
op
->
cls
.
argc
=
0
;
op
->
cls
.
indata
=
va_arg
(
args
,
void
*
);
size
=
va_arg
(
args
,
size_t
);
rbd_assert
(
size
<=
(
size_t
)
U32_MAX
);
op
->
cls
.
indata_len
=
(
u32
)
size
;
op
->
payload_len
+=
size
;
break
;
case
CEPH_OSD_OP_NOTIFY_ACK
:
case
CEPH_OSD_OP_WATCH
:
/* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */
/* rbd_osd_req_op_create(WATCH, cookie, version, flag) */
op
->
watch
.
cookie
=
va_arg
(
args
,
u64
);
op
->
watch
.
ver
=
va_arg
(
args
,
u64
);
op
->
watch
.
ver
=
cpu_to_le64
(
op
->
watch
.
ver
);
if
(
opcode
==
CEPH_OSD_OP_WATCH
&&
va_arg
(
args
,
int
))
op
->
watch
.
flag
=
(
u8
)
1
;
break
;
default:
rbd_warn
(
NULL
,
"unsupported opcode %hu
\n
"
,
opcode
);
kfree
(
op
);
op
=
NULL
;
break
;
}
va_end
(
args
);
ops
[
0
].
op
=
opcode
;
return
op
;
}
/*
static
void
rbd_osd_req_op_destroy
(
struct
ceph_osd_req_op
*
op
)
* op extent offset and length will be set later on
{
* in calc_raw_layout()
kfree
(
op
);
*/
}
ops
[
0
].
payload_len
=
payload_len
;
return
ops
;
static
int
rbd_obj_request_submit
(
struct
ceph_osd_client
*
osdc
,
struct
rbd_obj_request
*
obj_request
)
{
return
ceph_osdc_start_request
(
osdc
,
obj_request
->
osd_req
,
false
);
}
}
static
void
rbd_
destroy_ops
(
struct
ceph_osd_req_op
*
ops
)
static
void
rbd_
img_request_complete
(
struct
rbd_img_request
*
img_request
)
{
{
kfree
(
ops
);
if
(
img_request
->
callback
)
img_request
->
callback
(
img_request
);
else
rbd_img_request_put
(
img_request
);
}
}
static
void
rbd_coll_end_req_index
(
struct
request
*
rq
,
/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
struct
rbd_req_coll
*
coll
,
int
index
,
static
int
rbd_obj_request_wait
(
struct
rbd_obj_request
*
obj_request
)
int
ret
,
u64
len
)
{
{
struct
request_queue
*
q
;
return
wait_for_completion_interruptible
(
&
obj_request
->
completion
)
;
int
min
,
max
,
i
;
}
dout
(
"rbd_coll_end_req_index %p index %d ret %d len %llu
\n
"
,
static
void
obj_request_done_init
(
struct
rbd_obj_request
*
obj_request
)
coll
,
index
,
ret
,
(
unsigned
long
long
)
len
);
{
atomic_set
(
&
obj_request
->
done
,
0
);
smp_wmb
();
}
if
(
!
rq
)
static
void
obj_request_done_set
(
struct
rbd_obj_request
*
obj_request
)
return
;
{
atomic_set
(
&
obj_request
->
done
,
1
);
smp_wmb
();
}
if
(
!
coll
)
{
static
bool
obj_request_done_test
(
struct
rbd_obj_request
*
obj_request
)
blk_end_request
(
rq
,
ret
,
len
);
{
return
;
smp_rmb
();
}
return
atomic_read
(
&
obj_request
->
done
)
!=
0
;
}
static
void
rbd_osd_trivial_callback
(
struct
rbd_obj_request
*
obj_request
,
struct
ceph_osd_op
*
op
)
{
obj_request_done_set
(
obj_request
);
}
q
=
rq
->
q
;
static
void
rbd_obj_request_complete
(
struct
rbd_obj_request
*
obj_request
)
{
spin_lock_irq
(
q
->
queue_lock
);
if
(
obj_request
->
callback
)
coll
->
status
[
index
].
done
=
1
;
obj_request
->
callback
(
obj_request
);
coll
->
status
[
index
].
rc
=
ret
;
else
coll
->
status
[
index
].
bytes
=
len
;
complete_all
(
&
obj_request
->
completion
);
max
=
min
=
coll
->
num_done
;
}
while
(
max
<
coll
->
total
&&
coll
->
status
[
max
].
done
)
max
++
;
static
void
rbd_osd_read_callback
(
struct
rbd_obj_request
*
obj_request
,
struct
ceph_osd_op
*
op
)
for
(
i
=
min
;
i
<
max
;
i
++
)
{
{
__blk_end_request
(
rq
,
coll
->
status
[
i
].
rc
,
u64
xferred
;
coll
->
status
[
i
].
bytes
);
coll
->
num_done
++
;
/*
kref_put
(
&
coll
->
kref
,
rbd_coll_release
);
* We support a 64-bit length, but ultimately it has to be
* passed to blk_end_request(), which takes an unsigned int.
*/
xferred
=
le64_to_cpu
(
op
->
extent
.
length
);
rbd_assert
(
xferred
<
(
u64
)
UINT_MAX
);
if
(
obj_request
->
result
==
(
s32
)
-
ENOENT
)
{
zero_bio_chain
(
obj_request
->
bio_list
,
0
);
obj_request
->
result
=
0
;
}
else
if
(
xferred
<
obj_request
->
length
&&
!
obj_request
->
result
)
{
zero_bio_chain
(
obj_request
->
bio_list
,
xferred
);
xferred
=
obj_request
->
length
;
}
}
spin_unlock_irq
(
q
->
queue_lock
);
obj_request
->
xferred
=
xferred
;
obj_request_done_set
(
obj_request
);
}
}
static
void
rbd_
coll_end_req
(
struct
rbd_request
*
req
,
static
void
rbd_
osd_write_callback
(
struct
rbd_obj_request
*
obj_request
,
int
ret
,
u64
len
)
struct
ceph_osd_op
*
op
)
{
{
rbd_coll_end_req_index
(
req
->
rq
,
req
->
coll
,
req
->
coll_index
,
ret
,
len
);
obj_request
->
xferred
=
le64_to_cpu
(
op
->
extent
.
length
);
obj_request_done_set
(
obj_request
);
}
}
/*
/*
* Send ceph osd request
* For a simple stat call there's nothing to do. We'll do more if
* this is part of a write sequence for a layered image.
*/
*/
static
int
rbd_do_request
(
struct
request
*
rq
,
static
void
rbd_osd_stat_callback
(
struct
rbd_obj_request
*
obj_request
,
struct
rbd_device
*
rbd_dev
,
struct
ceph_osd_op
*
op
)
struct
ceph_snap_context
*
snapc
,
{
u64
snapid
,
obj_request_done_set
(
obj_request
);
const
char
*
object_name
,
u64
ofs
,
u64
len
,
}
struct
bio
*
bio
,
struct
page
**
pages
,
int
num_pages
,
int
flags
,
struct
ceph_osd_req_op
*
ops
,
struct
rbd_req_coll
*
coll
,
int
coll_index
,
void
(
*
rbd_cb
)(
struct
ceph_osd_request
*
req
,
struct
ceph_msg
*
msg
),
struct
ceph_osd_request
**
linger_req
,
u64
*
ver
)
{
struct
ceph_osd_request
*
req
;
struct
ceph_file_layout
*
layout
;
int
ret
;
u64
bno
;
struct
timespec
mtime
=
CURRENT_TIME
;
struct
rbd_request
*
req_data
;
struct
ceph_osd_request_head
*
reqhead
;
struct
ceph_osd_client
*
osdc
;
req_data
=
kzalloc
(
sizeof
(
*
req_data
),
GFP_NOIO
);
static
void
rbd_osd_req_callback
(
struct
ceph_osd_request
*
osd_req
,
if
(
!
req_data
)
{
struct
ceph_msg
*
msg
)
if
(
coll
)
{
rbd_coll_end_req_index
(
rq
,
coll
,
coll_index
,
struct
rbd_obj_request
*
obj_request
=
osd_req
->
r_priv
;
-
ENOMEM
,
len
);
struct
ceph_osd_reply_head
*
reply_head
;
return
-
ENOMEM
;
struct
ceph_osd_op
*
op
;
u32
num_ops
;
u16
opcode
;
rbd_assert
(
osd_req
==
obj_request
->
osd_req
);
rbd_assert
(
!!
obj_request
->
img_request
^
(
obj_request
->
which
==
BAD_WHICH
));
obj_request
->
xferred
=
le32_to_cpu
(
msg
->
hdr
.
data_len
);
reply_head
=
msg
->
front
.
iov_base
;
obj_request
->
result
=
(
s32
)
le32_to_cpu
(
reply_head
->
result
);
obj_request
->
version
=
le64_to_cpu
(
osd_req
->
r_reassert_version
.
version
);
num_ops
=
le32_to_cpu
(
reply_head
->
num_ops
);
WARN_ON
(
num_ops
!=
1
);
/* For now */
op
=
&
reply_head
->
ops
[
0
];
opcode
=
le16_to_cpu
(
op
->
op
);
switch
(
opcode
)
{
case
CEPH_OSD_OP_READ
:
rbd_osd_read_callback
(
obj_request
,
op
);
break
;
case
CEPH_OSD_OP_WRITE
:
rbd_osd_write_callback
(
obj_request
,
op
);
break
;
case
CEPH_OSD_OP_STAT
:
rbd_osd_stat_callback
(
obj_request
,
op
);
break
;
case
CEPH_OSD_OP_CALL
:
case
CEPH_OSD_OP_NOTIFY_ACK
:
case
CEPH_OSD_OP_WATCH
:
rbd_osd_trivial_callback
(
obj_request
,
op
);
break
;
default:
rbd_warn
(
NULL
,
"%s: unsupported op %hu
\n
"
,
obj_request
->
object_name
,
(
unsigned
short
)
opcode
);
break
;
}
}
if
(
coll
)
{
if
(
obj_request_done_test
(
obj_request
))
req_data
->
coll
=
coll
;
rbd_obj_request_complete
(
obj_request
);
req_data
->
coll_index
=
coll_index
;
}
static
struct
ceph_osd_request
*
rbd_osd_req_create
(
struct
rbd_device
*
rbd_dev
,
bool
write_request
,
struct
rbd_obj_request
*
obj_request
,
struct
ceph_osd_req_op
*
op
)
{
struct
rbd_img_request
*
img_request
=
obj_request
->
img_request
;
struct
ceph_snap_context
*
snapc
=
NULL
;
struct
ceph_osd_client
*
osdc
;
struct
ceph_osd_request
*
osd_req
;
struct
timespec
now
;
struct
timespec
*
mtime
;
u64
snap_id
=
CEPH_NOSNAP
;
u64
offset
=
obj_request
->
offset
;
u64
length
=
obj_request
->
length
;
if
(
img_request
)
{
rbd_assert
(
img_request
->
write_request
==
write_request
);
if
(
img_request
->
write_request
)
snapc
=
img_request
->
snapc
;
else
snap_id
=
img_request
->
snap_id
;
}
}
dout
(
"rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]
\n
"
,
/* Allocate and initialize the request, for the single op */
object_name
,
(
unsigned
long
long
)
ofs
,
(
unsigned
long
long
)
len
,
coll
,
coll_index
);
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
req
=
ceph_osdc_alloc_request
(
osdc
,
flags
,
snapc
,
ops
,
osd_req
=
ceph_osdc_alloc_request
(
osdc
,
snapc
,
1
,
false
,
GFP_ATOMIC
);
false
,
GFP_NOIO
,
pages
,
bio
);
if
(
!
osd_req
)
if
(
!
req
)
{
return
NULL
;
/* ENOMEM */
ret
=
-
ENOMEM
;
goto
done_pages
;
rbd_assert
(
obj_request_type_valid
(
obj_request
->
type
));
switch
(
obj_request
->
type
)
{
case
OBJ_REQUEST_NODATA
:
break
;
/* Nothing to do */
case
OBJ_REQUEST_BIO
:
rbd_assert
(
obj_request
->
bio_list
!=
NULL
);
osd_req
->
r_bio
=
obj_request
->
bio_list
;
break
;
case
OBJ_REQUEST_PAGES
:
osd_req
->
r_pages
=
obj_request
->
pages
;
osd_req
->
r_num_pages
=
obj_request
->
page_count
;
osd_req
->
r_page_alignment
=
offset
&
~
PAGE_MASK
;
break
;
}
}
req
->
r_callback
=
rbd_cb
;
if
(
write_request
)
{
osd_req
->
r_flags
=
CEPH_OSD_FLAG_WRITE
|
CEPH_OSD_FLAG_ONDISK
;
now
=
CURRENT_TIME
;
mtime
=
&
now
;
}
else
{
osd_req
->
r_flags
=
CEPH_OSD_FLAG_READ
;
mtime
=
NULL
;
/* not needed for reads */
offset
=
0
;
/* These are not used... */
length
=
0
;
/* ...for osd read requests */
}
req_data
->
rq
=
rq
;
osd_req
->
r_callback
=
rbd_osd_req_callback
;
req_data
->
bio
=
bio
;
osd_req
->
r_priv
=
obj_request
;
req_data
->
pages
=
pages
;
req_data
->
len
=
len
;
req
->
r_priv
=
req_data
;
osd_req
->
r_oid_len
=
strlen
(
obj_request
->
object_name
);
rbd_assert
(
osd_req
->
r_oid_len
<
sizeof
(
osd_req
->
r_oid
));
memcpy
(
osd_req
->
r_oid
,
obj_request
->
object_name
,
osd_req
->
r_oid_len
);
reqhead
=
req
->
r_request
->
front
.
iov_base
;
osd_req
->
r_file_layout
=
rbd_dev
->
layout
;
/* struct */
reqhead
->
snapid
=
cpu_to_le64
(
CEPH_NOSNAP
);
strncpy
(
req
->
r_oid
,
object_name
,
sizeof
(
req
->
r_oid
));
/* osd_req will get its own reference to snapc (if non-null) */
req
->
r_oid_len
=
strlen
(
req
->
r_oid
);
layout
=
&
req
->
r_file_layout
;
ceph_osdc_build_request
(
osd_req
,
offset
,
length
,
1
,
op
,
memset
(
layout
,
0
,
sizeof
(
*
layout
));
snapc
,
snap_id
,
mtime
);
layout
->
fl_stripe_unit
=
cpu_to_le32
(
1
<<
RBD_MAX_OBJ_ORDER
);
layout
->
fl_stripe_count
=
cpu_to_le32
(
1
);
layout
->
fl_object_size
=
cpu_to_le32
(
1
<<
RBD_MAX_OBJ_ORDER
);
layout
->
fl_pg_pool
=
cpu_to_le32
((
int
)
rbd_dev
->
spec
->
pool_id
);
ret
=
ceph_calc_raw_layout
(
osdc
,
layout
,
snapid
,
ofs
,
&
len
,
&
bno
,
req
,
ops
);
rbd_assert
(
ret
==
0
);
ceph_osdc_build_request
(
req
,
ofs
,
&
len
,
return
osd_req
;
ops
,
}
snapc
,
&
mtime
,
req
->
r_oid
,
req
->
r_oid_len
);
if
(
linger_req
)
{
static
void
rbd_osd_req_destroy
(
struct
ceph_osd_request
*
osd_req
)
ceph_osdc_set_request_linger
(
osdc
,
req
);
{
*
linger_req
=
req
;
ceph_osdc_put_request
(
osd_req
)
;
}
}
ret
=
ceph_osdc_start_request
(
osdc
,
req
,
false
);
/* object_name is assumed to be a non-null pointer and NUL-terminated */
if
(
ret
<
0
)
goto
done_err
;
static
struct
rbd_obj_request
*
rbd_obj_request_create
(
const
char
*
object_name
,
u64
offset
,
u64
length
,
if
(
!
rbd_cb
)
{
enum
obj_request_type
type
)
ret
=
ceph_osdc_wait_request
(
osdc
,
req
);
{
if
(
ver
)
struct
rbd_obj_request
*
obj_request
;
*
ver
=
le64_to_cpu
(
req
->
r_reassert_version
.
version
);
size_t
size
;
dout
(
"reassert_ver=%llu
\n
"
,
char
*
name
;
(
unsigned
long
long
)
le64_to_cpu
(
req
->
r_reassert_version
.
version
));
rbd_assert
(
obj_request_type_valid
(
type
));
ceph_osdc_put_request
(
req
);
size
=
strlen
(
object_name
)
+
1
;
obj_request
=
kzalloc
(
sizeof
(
*
obj_request
)
+
size
,
GFP_KERNEL
);
if
(
!
obj_request
)
return
NULL
;
name
=
(
char
*
)(
obj_request
+
1
);
obj_request
->
object_name
=
memcpy
(
name
,
object_name
,
size
);
obj_request
->
offset
=
offset
;
obj_request
->
length
=
length
;
obj_request
->
which
=
BAD_WHICH
;
obj_request
->
type
=
type
;
INIT_LIST_HEAD
(
&
obj_request
->
links
);
obj_request_done_init
(
obj_request
);
init_completion
(
&
obj_request
->
completion
);
kref_init
(
&
obj_request
->
kref
);
return
obj_request
;
}
static
void
rbd_obj_request_destroy
(
struct
kref
*
kref
)
{
struct
rbd_obj_request
*
obj_request
;
obj_request
=
container_of
(
kref
,
struct
rbd_obj_request
,
kref
);
rbd_assert
(
obj_request
->
img_request
==
NULL
);
rbd_assert
(
obj_request
->
which
==
BAD_WHICH
);
if
(
obj_request
->
osd_req
)
rbd_osd_req_destroy
(
obj_request
->
osd_req
);
rbd_assert
(
obj_request_type_valid
(
obj_request
->
type
));
switch
(
obj_request
->
type
)
{
case
OBJ_REQUEST_NODATA
:
break
;
/* Nothing to do */
case
OBJ_REQUEST_BIO
:
if
(
obj_request
->
bio_list
)
bio_chain_put
(
obj_request
->
bio_list
);
break
;
case
OBJ_REQUEST_PAGES
:
if
(
obj_request
->
pages
)
ceph_release_page_vector
(
obj_request
->
pages
,
obj_request
->
page_count
);
break
;
}
}
return
ret
;
done_err:
kfree
(
obj_request
);
bio_chain_put
(
req_data
->
bio
);
ceph_osdc_put_request
(
req
);
done_pages:
rbd_coll_end_req
(
req_data
,
ret
,
len
);
kfree
(
req_data
);
return
ret
;
}
}
/*
/*
* Ceph osd op callback
* Caller is responsible for filling in the list of object requests
* that comprises the image request, and the Linux request pointer
* (if there is one).
*/
*/
static
void
rbd_req_cb
(
struct
ceph_osd_request
*
req
,
struct
ceph_msg
*
msg
)
struct
rbd_img_request
*
rbd_img_request_create
(
struct
rbd_device
*
rbd_dev
,
u64
offset
,
u64
length
,
bool
write_request
)
{
{
struct
rbd_request
*
req_data
=
req
->
r_priv
;
struct
rbd_img_request
*
img_request
;
struct
ceph_osd_reply_head
*
replyhead
;
struct
ceph_snap_context
*
snapc
=
NULL
;
struct
ceph_osd_op
*
op
;
__s32
rc
;
u64
bytes
;
int
read_op
;
/* parse reply */
replyhead
=
msg
->
front
.
iov_base
;
WARN_ON
(
le32_to_cpu
(
replyhead
->
num_ops
)
==
0
);
op
=
(
void
*
)(
replyhead
+
1
);
rc
=
le32_to_cpu
(
replyhead
->
result
);
bytes
=
le64_to_cpu
(
op
->
extent
.
length
);
read_op
=
(
le16_to_cpu
(
op
->
op
)
==
CEPH_OSD_OP_READ
);
dout
(
"rbd_req_cb bytes=%llu readop=%d rc=%d
\n
"
,
(
unsigned
long
long
)
bytes
,
read_op
,
(
int
)
rc
);
if
(
rc
==
-
ENOENT
&&
read_op
)
{
zero_bio_chain
(
req_data
->
bio
,
0
);
rc
=
0
;
}
else
if
(
rc
==
0
&&
read_op
&&
bytes
<
req_data
->
len
)
{
zero_bio_chain
(
req_data
->
bio
,
bytes
);
bytes
=
req_data
->
len
;
}
rbd_coll_end_req
(
req_data
,
rc
,
bytes
);
img_request
=
kmalloc
(
sizeof
(
*
img_request
),
GFP_ATOMIC
);
if
(
!
img_request
)
return
NULL
;
if
(
req_data
->
bio
)
if
(
write_request
)
{
bio_chain_put
(
req_data
->
bio
);
down_read
(
&
rbd_dev
->
header_rwsem
);
snapc
=
ceph_get_snap_context
(
rbd_dev
->
header
.
snapc
);
up_read
(
&
rbd_dev
->
header_rwsem
);
if
(
WARN_ON
(
!
snapc
))
{
kfree
(
img_request
);
return
NULL
;
/* Shouldn't happen */
}
}
ceph_osdc_put_request
(
req
);
img_request
->
rq
=
NULL
;
kfree
(
req_data
);
img_request
->
rbd_dev
=
rbd_dev
;
img_request
->
offset
=
offset
;
img_request
->
length
=
length
;
img_request
->
write_request
=
write_request
;
if
(
write_request
)
img_request
->
snapc
=
snapc
;
else
img_request
->
snap_id
=
rbd_dev
->
spec
->
snap_id
;
spin_lock_init
(
&
img_request
->
completion_lock
);
img_request
->
next_completion
=
0
;
img_request
->
callback
=
NULL
;
img_request
->
obj_request_count
=
0
;
INIT_LIST_HEAD
(
&
img_request
->
obj_requests
);
kref_init
(
&
img_request
->
kref
);
rbd_img_request_get
(
img_request
);
/* Avoid a warning */
rbd_img_request_put
(
img_request
);
/* TEMPORARY */
return
img_request
;
}
}
static
void
rbd_
simple_req_cb
(
struct
ceph_osd_request
*
req
,
struct
ceph_msg
*
msg
)
static
void
rbd_
img_request_destroy
(
struct
kref
*
kref
)
{
{
ceph_osdc_put_request
(
req
);
struct
rbd_img_request
*
img_request
;
struct
rbd_obj_request
*
obj_request
;
struct
rbd_obj_request
*
next_obj_request
;
img_request
=
container_of
(
kref
,
struct
rbd_img_request
,
kref
);
for_each_obj_request_safe
(
img_request
,
obj_request
,
next_obj_request
)
rbd_img_obj_request_del
(
img_request
,
obj_request
);
rbd_assert
(
img_request
->
obj_request_count
==
0
);
if
(
img_request
->
write_request
)
ceph_put_snap_context
(
img_request
->
snapc
);
kfree
(
img_request
);
}
}
/*
static
int
rbd_img_request_fill_bio
(
struct
rbd_img_request
*
img_request
,
* Do a synchronous ceph osd operation
struct
bio
*
bio_list
)
*/
static
int
rbd_req_sync_op
(
struct
rbd_device
*
rbd_dev
,
struct
ceph_snap_context
*
snapc
,
u64
snapid
,
int
flags
,
struct
ceph_osd_req_op
*
ops
,
const
char
*
object_name
,
u64
ofs
,
u64
inbound_size
,
char
*
inbound
,
struct
ceph_osd_request
**
linger_req
,
u64
*
ver
)
{
{
int
ret
;
struct
rbd_device
*
rbd_dev
=
img_request
->
rbd_dev
;
struct
page
**
pages
;
struct
rbd_obj_request
*
obj_request
=
NULL
;
int
num_pages
;
struct
rbd_obj_request
*
next_obj_request
;
unsigned
int
bio_offset
;
rbd_assert
(
ops
!=
NULL
);
u64
image_offset
;
u64
resid
;
u16
opcode
;
opcode
=
img_request
->
write_request
?
CEPH_OSD_OP_WRITE
:
CEPH_OSD_OP_READ
;
bio_offset
=
0
;
image_offset
=
img_request
->
offset
;
rbd_assert
(
image_offset
==
bio_list
->
bi_sector
<<
SECTOR_SHIFT
);
resid
=
img_request
->
length
;
while
(
resid
)
{
const
char
*
object_name
;
unsigned
int
clone_size
;
struct
ceph_osd_req_op
*
op
;
u64
offset
;
u64
length
;
object_name
=
rbd_segment_name
(
rbd_dev
,
image_offset
);
if
(
!
object_name
)
goto
out_unwind
;
offset
=
rbd_segment_offset
(
rbd_dev
,
image_offset
);
length
=
rbd_segment_length
(
rbd_dev
,
image_offset
,
resid
);
obj_request
=
rbd_obj_request_create
(
object_name
,
offset
,
length
,
OBJ_REQUEST_BIO
);
kfree
(
object_name
);
/* object request has its own copy */
if
(
!
obj_request
)
goto
out_unwind
;
rbd_assert
(
length
<=
(
u64
)
UINT_MAX
);
clone_size
=
(
unsigned
int
)
length
;
obj_request
->
bio_list
=
bio_chain_clone_range
(
&
bio_list
,
&
bio_offset
,
clone_size
,
GFP_ATOMIC
);
if
(
!
obj_request
->
bio_list
)
goto
out_partial
;
num_pages
=
calc_pages_for
(
ofs
,
inbound_size
);
/*
pages
=
ceph_alloc_page_vector
(
num_pages
,
GFP_KERNEL
);
* Build up the op to use in building the osd
if
(
IS_ERR
(
pages
))
* request. Note that the contents of the op are
return
PTR_ERR
(
pages
);
* copied by rbd_osd_req_create().
*/
op
=
rbd_osd_req_op_create
(
opcode
,
offset
,
length
);
if
(
!
op
)
goto
out_partial
;
obj_request
->
osd_req
=
rbd_osd_req_create
(
rbd_dev
,
img_request
->
write_request
,
obj_request
,
op
);
rbd_osd_req_op_destroy
(
op
);
if
(
!
obj_request
->
osd_req
)
goto
out_partial
;
/* status and version are initially zero-filled */
rbd_img_obj_request_add
(
img_request
,
obj_request
);
image_offset
+=
length
;
resid
-=
length
;
}
ret
=
rbd_do_request
(
NULL
,
rbd_dev
,
snapc
,
snapid
,
return
0
;
object_name
,
ofs
,
inbound_size
,
NULL
,
pages
,
num_pages
,
flags
,
ops
,
NULL
,
0
,
NULL
,
linger_req
,
ver
);
if
(
ret
<
0
)
goto
done
;
if
((
flags
&
CEPH_OSD_FLAG_READ
)
&&
inbound
)
out_partial:
ret
=
ceph_copy_from_page_vector
(
pages
,
inbound
,
ofs
,
ret
);
rbd_obj_request_put
(
obj_request
);
out_unwind:
for_each_obj_request_safe
(
img_request
,
obj_request
,
next_obj_request
)
rbd_obj_request_put
(
obj_request
);
done:
return
-
ENOMEM
;
ceph_release_page_vector
(
pages
,
num_pages
);
return
ret
;
}
}
/*
static
void
rbd_img_obj_callback
(
struct
rbd_obj_request
*
obj_request
)
* Do an asynchronous ceph osd operation
{
*/
struct
rbd_img_request
*
img_request
;
static
int
rbd_do_op
(
struct
request
*
rq
,
u32
which
=
obj_request
->
which
;
struct
rbd_device
*
rbd_dev
,
bool
more
=
true
;
struct
ceph_snap_context
*
snapc
,
u64
ofs
,
u64
len
,
img_request
=
obj_request
->
img_request
;
struct
bio
*
bio
,
rbd_assert
(
img_request
!=
NULL
);
struct
rbd_req_coll
*
coll
,
rbd_assert
(
img_request
->
rq
!=
NULL
);
int
coll_index
)
rbd_assert
(
which
!=
BAD_WHICH
);
{
rbd_assert
(
which
<
img_request
->
obj_request_count
);
char
*
seg_name
;
rbd_assert
(
which
>=
img_request
->
next_completion
);
u64
seg_ofs
;
u64
seg_len
;
spin_lock_irq
(
&
img_request
->
completion_lock
);
int
ret
;
if
(
which
!=
img_request
->
next_completion
)
struct
ceph_osd_req_op
*
ops
;
goto
out
;
u32
payload_len
;
int
opcode
;
int
flags
;
u64
snapid
;
seg_name
=
rbd_segment_name
(
rbd_dev
,
ofs
);
if
(
!
seg_name
)
return
-
ENOMEM
;
seg_len
=
rbd_segment_length
(
rbd_dev
,
ofs
,
len
);
seg_ofs
=
rbd_segment_offset
(
rbd_dev
,
ofs
);
if
(
rq_data_dir
(
rq
)
==
WRITE
)
{
opcode
=
CEPH_OSD_OP_WRITE
;
flags
=
CEPH_OSD_FLAG_WRITE
|
CEPH_OSD_FLAG_ONDISK
;
snapid
=
CEPH_NOSNAP
;
payload_len
=
seg_len
;
}
else
{
opcode
=
CEPH_OSD_OP_READ
;
flags
=
CEPH_OSD_FLAG_READ
;
snapc
=
NULL
;
snapid
=
rbd_dev
->
spec
->
snap_id
;
payload_len
=
0
;
}
ret
=
-
ENOMEM
;
for_each_obj_request_from
(
img_request
,
obj_request
)
{
ops
=
rbd_create_rw_ops
(
1
,
opcode
,
payload_len
);
unsigned
int
xferred
;
if
(
!
ops
)
int
result
;
goto
done
;
/* we've taken care of segment sizes earlier when we
rbd_assert
(
more
);
cloned the bios. We should never have a segment
rbd_assert
(
which
<
img_request
->
obj_request_count
);
truncated at this point */
rbd_assert
(
seg_len
==
len
);
if
(
!
obj_request_done_test
(
obj_request
))
break
;
ret
=
rbd_do_request
(
rq
,
rbd_dev
,
snapc
,
snapid
,
seg_name
,
seg_ofs
,
seg_len
,
rbd_assert
(
obj_request
->
xferred
<=
(
u64
)
UINT_MAX
);
bio
,
xferred
=
(
unsigned
int
)
obj_request
->
xferred
;
NULL
,
0
,
result
=
(
int
)
obj_request
->
result
;
flags
,
if
(
result
)
ops
,
rbd_warn
(
NULL
,
"obj_request %s result %d xferred %u
\n
"
,
coll
,
coll_index
,
img_request
->
write_request
?
"write"
:
"read"
,
rbd_req_cb
,
0
,
NULL
);
result
,
xferred
);
rbd_destroy_ops
(
ops
);
more
=
blk_end_request
(
img_request
->
rq
,
result
,
xferred
);
done:
which
++
;
kfree
(
seg_name
);
}
return
ret
;
rbd_assert
(
more
^
(
which
==
img_request
->
obj_request_count
));
img_request
->
next_completion
=
which
;
out:
spin_unlock_irq
(
&
img_request
->
completion_lock
);
if
(
!
more
)
rbd_img_request_complete
(
img_request
);
}
}
/*
static
int
rbd_img_request_submit
(
struct
rbd_img_request
*
img_request
)
* Request sync osd read
{
*/
struct
rbd_device
*
rbd_dev
=
img_request
->
rbd_dev
;
static
int
rbd_req_sync_read
(
struct
rbd_device
*
rbd_dev
,
struct
ceph_osd_client
*
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
u64
snapid
,
struct
rbd_obj_request
*
obj_request
;
const
char
*
object_name
,
u64
ofs
,
u64
len
,
char
*
buf
,
u64
*
ver
)
{
struct
ceph_osd_req_op
*
ops
;
int
ret
;
ops
=
rbd_create_rw_ops
(
1
,
CEPH_OSD_OP_READ
,
0
);
for_each_obj_request
(
img_request
,
obj_request
)
{
if
(
!
ops
)
int
ret
;
return
-
ENOMEM
;
ret
=
rbd_req_sync_op
(
rbd_dev
,
NULL
,
obj_request
->
callback
=
rbd_img_obj_callback
;
snapid
,
ret
=
rbd_obj_request_submit
(
osdc
,
obj_request
);
CEPH_OSD_FLAG_READ
,
if
(
ret
)
ops
,
object_name
,
ofs
,
len
,
buf
,
NULL
,
ver
);
return
ret
;
rbd_destroy_ops
(
ops
);
/*
* The image request has its own reference to each
* of its object requests, so we can safely drop the
* initial one here.
*/
rbd_obj_request_put
(
obj_request
);
}
return
ret
;
return
0
;
}
}
/*
static
int
rbd_obj_notify_ack
(
struct
rbd_device
*
rbd_dev
,
* Request sync osd watch
u64
ver
,
u64
notify_id
)
*/
static
int
rbd_req_sync_notify_ack
(
struct
rbd_device
*
rbd_dev
,
u64
ver
,
u64
notify_id
)
{
{
struct
ceph_osd_req_op
*
ops
;
struct
rbd_obj_request
*
obj_request
;
struct
ceph_osd_req_op
*
op
;
struct
ceph_osd_client
*
osdc
;
int
ret
;
int
ret
;
ops
=
rbd_create_rw_ops
(
1
,
CEPH_OSD_OP_NOTIFY_ACK
,
0
);
obj_request
=
rbd_obj_request_create
(
rbd_dev
->
header_name
,
0
,
0
,
if
(
!
ops
)
OBJ_REQUEST_NODATA
);
if
(
!
obj_request
)
return
-
ENOMEM
;
return
-
ENOMEM
;
ops
[
0
].
watch
.
ver
=
cpu_to_le64
(
ver
);
ret
=
-
ENOMEM
;
ops
[
0
].
watch
.
cookie
=
notify_id
;
op
=
rbd_osd_req_op_create
(
CEPH_OSD_OP_NOTIFY_ACK
,
notify_id
,
ver
);
ops
[
0
].
watch
.
flag
=
0
;
if
(
!
op
)
goto
out
;
obj_request
->
osd_req
=
rbd_osd_req_create
(
rbd_dev
,
false
,
obj_request
,
op
);
rbd_osd_req_op_destroy
(
op
);
if
(
!
obj_request
->
osd_req
)
goto
out
;
ret
=
rbd_do_request
(
NULL
,
rbd_dev
,
NULL
,
CEPH_NOSNAP
,
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
rbd_dev
->
header_name
,
0
,
0
,
NULL
,
obj_request
->
callback
=
rbd_obj_request_put
;
NULL
,
0
,
ret
=
rbd_obj_request_submit
(
osdc
,
obj_request
);
CEPH_OSD_FLAG_READ
,
out:
ops
,
if
(
ret
)
NULL
,
0
,
rbd_obj_request_put
(
obj_request
);
rbd_simple_req_cb
,
0
,
NULL
);
rbd_destroy_ops
(
ops
);
return
ret
;
return
ret
;
}
}
...
@@ -1386,90 +1738,98 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
...
@@ -1386,90 +1738,98 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
(
unsigned
int
)
opcode
);
(
unsigned
int
)
opcode
);
rc
=
rbd_dev_refresh
(
rbd_dev
,
&
hver
);
rc
=
rbd_dev_refresh
(
rbd_dev
,
&
hver
);
if
(
rc
)
if
(
rc
)
pr_warning
(
RBD_DRV_NAME
"%d
got notification but failed to "
rbd_warn
(
rbd_dev
,
"
got notification but failed to "
" update snaps: %d
\n
"
,
r
bd_dev
->
major
,
r
c
);
" update snaps: %d
\n
"
,
rc
);
rbd_
req_sync
_notify_ack
(
rbd_dev
,
hver
,
notify_id
);
rbd_
obj
_notify_ack
(
rbd_dev
,
hver
,
notify_id
);
}
}
/*
/*
* Request sync osd watch
* Request sync osd watch/unwatch. The value of "start" determines
* whether a watch request is being initiated or torn down.
*/
*/
static
int
rbd_
req_sync_watch
(
struct
rbd_device
*
rbd_dev
)
static
int
rbd_
dev_header_watch_sync
(
struct
rbd_device
*
rbd_dev
,
int
start
)
{
{
struct
ceph_osd_req_op
*
ops
;
struct
ceph_osd_client
*
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
struct
ceph_osd_client
*
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
struct
rbd_obj_request
*
obj_request
;
struct
ceph_osd_req_op
*
op
;
int
ret
;
int
ret
;
ops
=
rbd_create_rw_ops
(
1
,
CEPH_OSD_OP_WATCH
,
0
);
rbd_assert
(
start
^
!!
rbd_dev
->
watch_event
);
if
(
!
ops
)
rbd_assert
(
start
^
!!
rbd_dev
->
watch_request
);
return
-
ENOMEM
;
ret
=
ceph_osdc_create_event
(
osdc
,
rbd_watch_cb
,
0
,
if
(
start
)
{
(
void
*
)
rbd_dev
,
&
rbd_dev
->
watch_event
);
ret
=
ceph_osdc_create_event
(
osdc
,
rbd_watch_cb
,
rbd_dev
,
if
(
ret
<
0
)
&
rbd_dev
->
watch_event
);
goto
fail
;
if
(
ret
<
0
)
return
ret
;
rbd_assert
(
rbd_dev
->
watch_event
!=
NULL
);
}
ops
[
0
].
watch
.
ver
=
cpu_to_le64
(
rbd_dev
->
header
.
obj_version
);
ret
=
-
ENOMEM
;
ops
[
0
].
watch
.
cookie
=
cpu_to_le64
(
rbd_dev
->
watch_event
->
cookie
);
obj_request
=
rbd_obj_request_create
(
rbd_dev
->
header_name
,
0
,
0
,
ops
[
0
].
watch
.
flag
=
1
;
OBJ_REQUEST_NODATA
);
if
(
!
obj_request
)
goto
out_cancel
;
op
=
rbd_osd_req_op_create
(
CEPH_OSD_OP_WATCH
,
rbd_dev
->
watch_event
->
cookie
,
rbd_dev
->
header
.
obj_version
,
start
);
if
(
!
op
)
goto
out_cancel
;
obj_request
->
osd_req
=
rbd_osd_req_create
(
rbd_dev
,
true
,
obj_request
,
op
);
rbd_osd_req_op_destroy
(
op
);
if
(
!
obj_request
->
osd_req
)
goto
out_cancel
;
if
(
start
)
ceph_osdc_set_request_linger
(
osdc
,
obj_request
->
osd_req
);
else
ceph_osdc_unregister_linger_request
(
osdc
,
rbd_dev
->
watch_request
->
osd_req
);
ret
=
rbd_obj_request_submit
(
osdc
,
obj_request
);
if
(
ret
)
goto
out_cancel
;
ret
=
rbd_obj_request_wait
(
obj_request
);
if
(
ret
)
goto
out_cancel
;
ret
=
obj_request
->
result
;
if
(
ret
)
goto
out_cancel
;
ret
=
rbd_req_sync_op
(
rbd_dev
,
NULL
,
/*
CEPH_NOSNAP
,
* A watch request is set to linger, so the underlying osd
CEPH_OSD_FLAG_WRITE
|
CEPH_OSD_FLAG_ONDISK
,
* request won't go away until we unregister it. We retain
ops
,
* a pointer to the object request during that time (in
rbd_dev
->
header_name
,
* rbd_dev->watch_request), so we'll keep a reference to
0
,
0
,
NULL
,
* it. We'll drop that reference (below) after we've
&
rbd_dev
->
watch_request
,
NULL
);
* unregistered it.
*/
if
(
start
)
{
rbd_dev
->
watch_request
=
obj_request
;
if
(
ret
<
0
)
return
0
;
goto
fail_event
;
}
rbd_destroy_ops
(
ops
);
/* We have successfully torn down the watch request */
return
0
;
fail_event:
rbd_obj_request_put
(
rbd_dev
->
watch_request
);
rbd_dev
->
watch_request
=
NULL
;
out_cancel:
/* Cancel the event if we're tearing down, or on error */
ceph_osdc_cancel_event
(
rbd_dev
->
watch_event
);
ceph_osdc_cancel_event
(
rbd_dev
->
watch_event
);
rbd_dev
->
watch_event
=
NULL
;
rbd_dev
->
watch_event
=
NULL
;
fail:
if
(
obj_request
)
rbd_destroy_ops
(
ops
);
rbd_obj_request_put
(
obj_request
);
return
ret
;
}
/*
* Request sync osd unwatch
*/
static
int
rbd_req_sync_unwatch
(
struct
rbd_device
*
rbd_dev
)
{
struct
ceph_osd_req_op
*
ops
;
int
ret
;
ops
=
rbd_create_rw_ops
(
1
,
CEPH_OSD_OP_WATCH
,
0
);
if
(
!
ops
)
return
-
ENOMEM
;
ops
[
0
].
watch
.
ver
=
0
;
ops
[
0
].
watch
.
cookie
=
cpu_to_le64
(
rbd_dev
->
watch_event
->
cookie
);
ops
[
0
].
watch
.
flag
=
0
;
ret
=
rbd_req_sync_op
(
rbd_dev
,
NULL
,
CEPH_NOSNAP
,
CEPH_OSD_FLAG_WRITE
|
CEPH_OSD_FLAG_ONDISK
,
ops
,
rbd_dev
->
header_name
,
0
,
0
,
NULL
,
NULL
,
NULL
);
rbd_destroy_ops
(
ops
);
ceph_osdc_cancel_event
(
rbd_dev
->
watch_event
);
rbd_dev
->
watch_event
=
NULL
;
return
ret
;
return
ret
;
}
}
/*
/*
* Synchronous osd object method call
* Synchronous osd object method call
*/
*/
static
int
rbd_
req_sync_exe
c
(
struct
rbd_device
*
rbd_dev
,
static
int
rbd_
obj_method_syn
c
(
struct
rbd_device
*
rbd_dev
,
const
char
*
object_name
,
const
char
*
object_name
,
const
char
*
class_name
,
const
char
*
class_name
,
const
char
*
method_name
,
const
char
*
method_name
,
...
@@ -1477,169 +1837,143 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
...
@@ -1477,169 +1837,143 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
size_t
outbound_size
,
size_t
outbound_size
,
char
*
inbound
,
char
*
inbound
,
size_t
inbound_size
,
size_t
inbound_size
,
int
flags
,
u64
*
version
)
u64
*
ver
)
{
{
struct
ceph_osd_req_op
*
ops
;
struct
rbd_obj_request
*
obj_request
;
int
class_name_len
=
strlen
(
class_name
);
struct
ceph_osd_client
*
osdc
;
int
method_name_len
=
strlen
(
method_name
);
struct
ceph_osd_req_op
*
op
;
int
payload_size
;
struct
page
**
pages
;
u32
page_count
;
int
ret
;
int
ret
;
/*
/*
*
Any input parameters required by the method we're calling
*
Method calls are ultimately read operations but they
*
will be sent along with the class and method names as
*
don't involve object data (so no offset or length).
*
part of the message payload. That data and its size are
*
The result should placed into the inbound buffer
*
supplied via the indata and indata_len fields (named from
*
provided. They also supply outbound data--parameters for
* the
perspective of the server side) in the OSD request
* the
object method. Currently if this is present it will
*
operation
.
*
be a snapshot id
.
*/
*/
pa
yload_size
=
class_name_len
+
method_name_len
+
outbound_size
;
pa
ge_count
=
(
u32
)
calc_pages_for
(
0
,
inbound_size
)
;
ops
=
rbd_create_rw_ops
(
1
,
CEPH_OSD_OP_CALL
,
payload_size
);
pages
=
ceph_alloc_page_vector
(
page_count
,
GFP_KERNEL
);
if
(
!
ops
)
if
(
IS_ERR
(
pages
)
)
return
-
ENOMEM
;
return
PTR_ERR
(
pages
)
;
ops
[
0
].
cls
.
class_name
=
class_name
;
ret
=
-
ENOMEM
;
ops
[
0
].
cls
.
class_len
=
(
__u8
)
class_name_len
;
obj_request
=
rbd_obj_request_create
(
object_name
,
0
,
0
,
ops
[
0
].
cls
.
method_name
=
method_name
;
OBJ_REQUEST_PAGES
);
ops
[
0
].
cls
.
method_len
=
(
__u8
)
method_name_len
;
if
(
!
obj_request
)
ops
[
0
].
cls
.
argc
=
0
;
goto
out
;
ops
[
0
].
cls
.
indata
=
outbound
;
ops
[
0
].
cls
.
indata_len
=
outbound_size
;
ret
=
rbd_req_sync_op
(
rbd_dev
,
NULL
,
obj_request
->
pages
=
pages
;
CEPH_NOSNAP
,
obj_request
->
page_count
=
page_count
;
flags
,
ops
,
object_name
,
0
,
inbound_size
,
inbound
,
NULL
,
ver
);
rbd_destroy_ops
(
ops
);
op
=
rbd_osd_req_op_create
(
CEPH_OSD_OP_CALL
,
class_name
,
method_name
,
outbound
,
outbound_size
);
if
(
!
op
)
goto
out
;
obj_request
->
osd_req
=
rbd_osd_req_create
(
rbd_dev
,
false
,
obj_request
,
op
);
rbd_osd_req_op_destroy
(
op
);
if
(
!
obj_request
->
osd_req
)
goto
out
;
dout
(
"cls_exec returned %d
\n
"
,
ret
);
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
return
ret
;
ret
=
rbd_obj_request_submit
(
osdc
,
obj_request
);
}
if
(
ret
)
goto
out
;
ret
=
rbd_obj_request_wait
(
obj_request
);
if
(
ret
)
goto
out
;
static
struct
rbd_req_coll
*
rbd_alloc_coll
(
int
num_reqs
)
ret
=
obj_request
->
result
;
{
if
(
ret
<
0
)
struct
rbd_req_coll
*
coll
=
goto
out
;
kzalloc
(
sizeof
(
struct
rbd_req_coll
)
+
ret
=
0
;
sizeof
(
struct
rbd_req_status
)
*
num_reqs
,
ceph_copy_from_page_vector
(
pages
,
inbound
,
0
,
obj_request
->
xferred
);
GFP_ATOMIC
);
if
(
version
)
*
version
=
obj_request
->
version
;
out:
if
(
obj_request
)
rbd_obj_request_put
(
obj_request
);
else
ceph_release_page_vector
(
pages
,
page_count
);
if
(
!
coll
)
return
ret
;
return
NULL
;
coll
->
total
=
num_reqs
;
kref_init
(
&
coll
->
kref
);
return
coll
;
}
}
/*
static
void
rbd_request_fn
(
struct
request_queue
*
q
)
* block device queue callback
*/
static
void
rbd_rq_fn
(
struct
request_queue
*
q
)
{
{
struct
rbd_device
*
rbd_dev
=
q
->
queuedata
;
struct
rbd_device
*
rbd_dev
=
q
->
queuedata
;
bool
read_only
=
rbd_dev
->
mapping
.
read_only
;
struct
request
*
rq
;
struct
request
*
rq
;
int
result
;
while
((
rq
=
blk_fetch_request
(
q
)))
{
while
((
rq
=
blk_fetch_request
(
q
)))
{
struct
bio
*
bio
;
bool
write_request
=
rq_data_dir
(
rq
)
==
WRITE
;
bool
do_write
;
struct
rbd_img_request
*
img_request
;
unsigned
int
size
;
u64
offset
;
u64
ofs
;
u64
length
;
int
num_segs
,
cur_seg
=
0
;
struct
rbd_req_coll
*
coll
;
/* Ignore any non-FS requests that filter through. */
struct
ceph_snap_context
*
snapc
;
unsigned
int
bio_offset
;
dout
(
"fetched request
\n
"
);
/* filter out block requests we don't understand */
if
((
rq
->
cmd_type
!=
REQ_TYPE_FS
))
{
__blk_end_request_all
(
rq
,
0
);
continue
;
}
/* deduce our operation (read, write) */
if
(
rq
->
cmd_type
!=
REQ_TYPE_FS
)
{
do_write
=
(
rq_data_dir
(
rq
)
==
WRITE
);
__blk_end_request_all
(
rq
,
0
);
if
(
do_write
&&
rbd_dev
->
mapping
.
read_only
)
{
__blk_end_request_all
(
rq
,
-
EROFS
);
continue
;
continue
;
}
}
spin_unlock_irq
(
q
->
queue_lock
);
spin_unlock_irq
(
q
->
queue_lock
);
down_read
(
&
rbd_dev
->
header_rwsem
);
/* Disallow writes to a read-only device */
if
(
!
rbd_dev
->
exists
)
{
if
(
write_request
)
{
rbd_assert
(
rbd_dev
->
spec
->
snap_id
!=
CEPH_NOSNAP
);
result
=
-
EROFS
;
up_read
(
&
rbd_dev
->
header_rwsem
);
if
(
read_only
)
dout
(
"request for non-existent snapshot"
);
goto
end_request
;
spin_lock_irq
(
q
->
queue_lock
);
rbd_assert
(
rbd_dev
->
spec
->
snap_id
==
CEPH_NOSNAP
);
__blk_end_request_all
(
rq
,
-
ENXIO
);
continue
;
}
}
snapc
=
ceph_get_snap_context
(
rbd_dev
->
header
.
snapc
);
/*
* Quit early if the mapped snapshot no longer
up_read
(
&
rbd_dev
->
header_rwsem
);
* exists. It's still possible the snapshot will
* have disappeared by the time our request arrives
size
=
blk_rq_bytes
(
rq
);
* at the osd, but there's no sense in sending it if
ofs
=
blk_rq_pos
(
rq
)
*
SECTOR_SIZE
;
* we already know.
bio
=
rq
->
bio
;
*/
if
(
!
test_bit
(
RBD_DEV_FLAG_EXISTS
,
&
rbd_dev
->
flags
))
{
dout
(
"%s 0x%x bytes at 0x%llx
\n
"
,
dout
(
"request for non-existent snapshot"
);
do_write
?
"write"
:
"read"
,
rbd_assert
(
rbd_dev
->
spec
->
snap_id
!=
CEPH_NOSNAP
);
size
,
(
unsigned
long
long
)
blk_rq_pos
(
rq
)
*
SECTOR_SIZE
);
result
=
-
ENXIO
;
goto
end_request
;
num_segs
=
rbd_get_num_segments
(
&
rbd_dev
->
header
,
ofs
,
size
);
if
(
num_segs
<=
0
)
{
spin_lock_irq
(
q
->
queue_lock
);
__blk_end_request_all
(
rq
,
num_segs
);
ceph_put_snap_context
(
snapc
);
continue
;
}
coll
=
rbd_alloc_coll
(
num_segs
);
if
(
!
coll
)
{
spin_lock_irq
(
q
->
queue_lock
);
__blk_end_request_all
(
rq
,
-
ENOMEM
);
ceph_put_snap_context
(
snapc
);
continue
;
}
}
bio_offset
=
0
;
offset
=
(
u64
)
blk_rq_pos
(
rq
)
<<
SECTOR_SHIFT
;
do
{
length
=
(
u64
)
blk_rq_bytes
(
rq
);
u64
limit
=
rbd_segment_length
(
rbd_dev
,
ofs
,
size
);
unsigned
int
chain_size
;
struct
bio
*
bio_chain
;
BUG_ON
(
limit
>
(
u64
)
UINT_MAX
)
;
result
=
-
EINVAL
;
chain_size
=
(
unsigned
int
)
limit
;
if
(
WARN_ON
(
offset
&&
length
>
U64_MAX
-
offset
+
1
))
dout
(
"rq->bio->bi_vcnt=%hu
\n
"
,
rq
->
bio
->
bi_vcnt
);
goto
end_request
;
/* Shouldn't happen */
kref_get
(
&
coll
->
kref
);
result
=
-
ENOMEM
;
img_request
=
rbd_img_request_create
(
rbd_dev
,
offset
,
length
,
write_request
);
if
(
!
img_request
)
goto
end_request
;
/* Pass a cloned bio chain via an osd request */
img_request
->
rq
=
rq
;
bio_chain
=
bio_chain_clone_range
(
&
bio
,
&
bio_offset
,
chain_size
,
GFP_ATOMIC
);
if
(
bio_chain
)
(
void
)
rbd_do_op
(
rq
,
rbd_dev
,
snapc
,
ofs
,
chain_size
,
bio_chain
,
coll
,
cur_seg
);
else
rbd_coll_end_req_index
(
rq
,
coll
,
cur_seg
,
-
ENOMEM
,
chain_size
);
size
-=
chain_size
;
ofs
+=
chain_size
;
cur_seg
++
;
}
while
(
size
>
0
);
kref_put
(
&
coll
->
kref
,
rbd_coll_release
);
result
=
rbd_img_request_fill_bio
(
img_request
,
rq
->
bio
);
if
(
!
result
)
result
=
rbd_img_request_submit
(
img_request
);
if
(
result
)
rbd_img_request_put
(
img_request
);
end_request:
spin_lock_irq
(
q
->
queue_lock
);
spin_lock_irq
(
q
->
queue_lock
);
if
(
result
<
0
)
{
ceph_put_snap_context
(
snapc
);
rbd_warn
(
rbd_dev
,
"obj_request %s result %d
\n
"
,
write_request
?
"write"
:
"read"
,
result
);
__blk_end_request_all
(
rq
,
result
);
}
}
}
}
}
...
@@ -1703,6 +2037,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
...
@@ -1703,6 +2037,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
put_disk
(
disk
);
put_disk
(
disk
);
}
}
static
int
rbd_obj_read_sync
(
struct
rbd_device
*
rbd_dev
,
const
char
*
object_name
,
u64
offset
,
u64
length
,
char
*
buf
,
u64
*
version
)
{
struct
ceph_osd_req_op
*
op
;
struct
rbd_obj_request
*
obj_request
;
struct
ceph_osd_client
*
osdc
;
struct
page
**
pages
=
NULL
;
u32
page_count
;
size_t
size
;
int
ret
;
page_count
=
(
u32
)
calc_pages_for
(
offset
,
length
);
pages
=
ceph_alloc_page_vector
(
page_count
,
GFP_KERNEL
);
if
(
IS_ERR
(
pages
))
ret
=
PTR_ERR
(
pages
);
ret
=
-
ENOMEM
;
obj_request
=
rbd_obj_request_create
(
object_name
,
offset
,
length
,
OBJ_REQUEST_PAGES
);
if
(
!
obj_request
)
goto
out
;
obj_request
->
pages
=
pages
;
obj_request
->
page_count
=
page_count
;
op
=
rbd_osd_req_op_create
(
CEPH_OSD_OP_READ
,
offset
,
length
);
if
(
!
op
)
goto
out
;
obj_request
->
osd_req
=
rbd_osd_req_create
(
rbd_dev
,
false
,
obj_request
,
op
);
rbd_osd_req_op_destroy
(
op
);
if
(
!
obj_request
->
osd_req
)
goto
out
;
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
ret
=
rbd_obj_request_submit
(
osdc
,
obj_request
);
if
(
ret
)
goto
out
;
ret
=
rbd_obj_request_wait
(
obj_request
);
if
(
ret
)
goto
out
;
ret
=
obj_request
->
result
;
if
(
ret
<
0
)
goto
out
;
rbd_assert
(
obj_request
->
xferred
<=
(
u64
)
SIZE_MAX
);
size
=
(
size_t
)
obj_request
->
xferred
;
ceph_copy_from_page_vector
(
pages
,
buf
,
0
,
size
);
rbd_assert
(
size
<=
(
size_t
)
INT_MAX
);
ret
=
(
int
)
size
;
if
(
version
)
*
version
=
obj_request
->
version
;
out:
if
(
obj_request
)
rbd_obj_request_put
(
obj_request
);
else
ceph_release_page_vector
(
pages
,
page_count
);
return
ret
;
}
/*
/*
* Read the complete header for the given rbd device.
* Read the complete header for the given rbd device.
*
*
...
@@ -1741,24 +2140,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
...
@@ -1741,24 +2140,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
if
(
!
ondisk
)
if
(
!
ondisk
)
return
ERR_PTR
(
-
ENOMEM
);
return
ERR_PTR
(
-
ENOMEM
);
ret
=
rbd_req_sync_read
(
rbd_dev
,
CEPH_NOSNAP
,
ret
=
rbd_obj_read_sync
(
rbd_dev
,
rbd_dev
->
header_name
,
rbd_dev
->
header_name
,
0
,
size
,
0
,
size
,
(
char
*
)
ondisk
,
version
);
(
char
*
)
ondisk
,
version
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out_err
;
goto
out_err
;
if
(
WARN_ON
((
size_t
)
ret
<
size
))
{
if
(
WARN_ON
((
size_t
)
ret
<
size
))
{
ret
=
-
ENXIO
;
ret
=
-
ENXIO
;
pr_warning
(
"short header read for image %s"
rbd_warn
(
rbd_dev
,
"short header read (want %zd got %d)"
,
" (want %zd got %d)
\n
"
,
size
,
ret
);
rbd_dev
->
spec
->
image_name
,
size
,
ret
);
goto
out_err
;
goto
out_err
;
}
}
if
(
!
rbd_dev_ondisk_valid
(
ondisk
))
{
if
(
!
rbd_dev_ondisk_valid
(
ondisk
))
{
ret
=
-
ENXIO
;
ret
=
-
ENXIO
;
pr_warning
(
"invalid header for image %s
\n
"
,
rbd_warn
(
rbd_dev
,
"invalid header"
);
rbd_dev
->
spec
->
image_name
);
goto
out_err
;
goto
out_err
;
}
}
...
@@ -1895,8 +2290,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
...
@@ -1895,8 +2290,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
disk
->
fops
=
&
rbd_bd_ops
;
disk
->
fops
=
&
rbd_bd_ops
;
disk
->
private_data
=
rbd_dev
;
disk
->
private_data
=
rbd_dev
;
/* init rq */
q
=
blk_init_queue
(
rbd_request_fn
,
&
rbd_dev
->
lock
);
q
=
blk_init_queue
(
rbd_rq_fn
,
&
rbd_dev
->
lock
);
if
(
!
q
)
if
(
!
q
)
goto
out_disk
;
goto
out_disk
;
...
@@ -2243,6 +2637,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
...
@@ -2243,6 +2637,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
return
NULL
;
return
NULL
;
spin_lock_init
(
&
rbd_dev
->
lock
);
spin_lock_init
(
&
rbd_dev
->
lock
);
rbd_dev
->
flags
=
0
;
INIT_LIST_HEAD
(
&
rbd_dev
->
node
);
INIT_LIST_HEAD
(
&
rbd_dev
->
node
);
INIT_LIST_HEAD
(
&
rbd_dev
->
snaps
);
INIT_LIST_HEAD
(
&
rbd_dev
->
snaps
);
init_rwsem
(
&
rbd_dev
->
header_rwsem
);
init_rwsem
(
&
rbd_dev
->
header_rwsem
);
...
@@ -2250,6 +2645,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
...
@@ -2250,6 +2645,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
rbd_dev
->
spec
=
spec
;
rbd_dev
->
spec
=
spec
;
rbd_dev
->
rbd_client
=
rbdc
;
rbd_dev
->
rbd_client
=
rbdc
;
/* Initialize the layout used for all rbd requests */
rbd_dev
->
layout
.
fl_stripe_unit
=
cpu_to_le32
(
1
<<
RBD_MAX_OBJ_ORDER
);
rbd_dev
->
layout
.
fl_stripe_count
=
cpu_to_le32
(
1
);
rbd_dev
->
layout
.
fl_object_size
=
cpu_to_le32
(
1
<<
RBD_MAX_OBJ_ORDER
);
rbd_dev
->
layout
.
fl_pg_pool
=
cpu_to_le32
((
u32
)
spec
->
pool_id
);
return
rbd_dev
;
return
rbd_dev
;
}
}
...
@@ -2360,12 +2762,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
...
@@ -2360,12 +2762,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
__le64
size
;
__le64
size
;
}
__attribute__
((
packed
))
size_buf
=
{
0
};
}
__attribute__
((
packed
))
size_buf
=
{
0
};
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_size"
,
"rbd"
,
"get_size"
,
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
&
size_buf
,
sizeof
(
size_buf
),
(
char
*
)
&
size_buf
,
sizeof
(
size_buf
),
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
return
ret
;
return
ret
;
...
@@ -2396,15 +2797,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
...
@@ -2396,15 +2797,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
if
(
!
reply_buf
)
if
(
!
reply_buf
)
return
-
ENOMEM
;
return
-
ENOMEM
;
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_object_prefix"
,
"rbd"
,
"get_object_prefix"
,
NULL
,
0
,
NULL
,
0
,
reply_buf
,
RBD_OBJ_PREFIX_LEN_MAX
,
reply_buf
,
RBD_OBJ_PREFIX_LEN_MAX
,
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out
;
goto
out
;
ret
=
0
;
/* rbd_req_sync_exec() can return positive */
p
=
reply_buf
;
p
=
reply_buf
;
rbd_dev
->
header
.
object_prefix
=
ceph_extract_encoded_string
(
&
p
,
rbd_dev
->
header
.
object_prefix
=
ceph_extract_encoded_string
(
&
p
,
...
@@ -2435,12 +2834,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
...
@@ -2435,12 +2834,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
u64
incompat
;
u64
incompat
;
int
ret
;
int
ret
;
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_features"
,
"rbd"
,
"get_features"
,
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
&
features_buf
,
sizeof
(
features_buf
),
(
char
*
)
&
features_buf
,
sizeof
(
features_buf
),
CEPH_OSD_FLAG_READ
,
NULL
);
NULL
);
dout
(
"%s: rbd_
req_sync_exe
c returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_
obj_method_syn
c returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
return
ret
;
return
ret
;
...
@@ -2474,7 +2873,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
...
@@ -2474,7 +2873,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
void
*
end
;
void
*
end
;
char
*
image_id
;
char
*
image_id
;
u64
overlap
;
u64
overlap
;
size_t
len
=
0
;
int
ret
;
int
ret
;
parent_spec
=
rbd_spec_alloc
();
parent_spec
=
rbd_spec_alloc
();
...
@@ -2492,12 +2890,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
...
@@ -2492,12 +2890,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}
}
snapid
=
cpu_to_le64
(
CEPH_NOSNAP
);
snapid
=
cpu_to_le64
(
CEPH_NOSNAP
);
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_parent"
,
"rbd"
,
"get_parent"
,
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
&
snapid
,
sizeof
(
snapid
),
(
char
*
)
reply_buf
,
size
,
(
char
*
)
reply_buf
,
size
,
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out_err
;
goto
out_err
;
...
@@ -2508,13 +2905,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
...
@@ -2508,13 +2905,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
if
(
parent_spec
->
pool_id
==
CEPH_NOPOOL
)
if
(
parent_spec
->
pool_id
==
CEPH_NOPOOL
)
goto
out
;
/* No parent? No problem. */
goto
out
;
/* No parent? No problem. */
image_id
=
ceph_extract_encoded_string
(
&
p
,
end
,
&
len
,
GFP_KERNEL
);
/* The ceph file layout needs to fit pool id in 32 bits */
ret
=
-
EIO
;
if
(
WARN_ON
(
parent_spec
->
pool_id
>
(
u64
)
U32_MAX
))
goto
out
;
image_id
=
ceph_extract_encoded_string
(
&
p
,
end
,
NULL
,
GFP_KERNEL
);
if
(
IS_ERR
(
image_id
))
{
if
(
IS_ERR
(
image_id
))
{
ret
=
PTR_ERR
(
image_id
);
ret
=
PTR_ERR
(
image_id
);
goto
out_err
;
goto
out_err
;
}
}
parent_spec
->
image_id
=
image_id
;
parent_spec
->
image_id
=
image_id
;
parent_spec
->
image_id_len
=
len
;
ceph_decode_64_safe
(
&
p
,
end
,
parent_spec
->
snap_id
,
out_err
);
ceph_decode_64_safe
(
&
p
,
end
,
parent_spec
->
snap_id
,
out_err
);
ceph_decode_64_safe
(
&
p
,
end
,
overlap
,
out_err
);
ceph_decode_64_safe
(
&
p
,
end
,
overlap
,
out_err
);
...
@@ -2544,26 +2946,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
...
@@ -2544,26 +2946,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
rbd_assert
(
!
rbd_dev
->
spec
->
image_name
);
rbd_assert
(
!
rbd_dev
->
spec
->
image_name
);
image_id_size
=
sizeof
(
__le32
)
+
rbd_dev
->
spec
->
image_id_len
;
len
=
strlen
(
rbd_dev
->
spec
->
image_id
);
image_id_size
=
sizeof
(
__le32
)
+
len
;
image_id
=
kmalloc
(
image_id_size
,
GFP_KERNEL
);
image_id
=
kmalloc
(
image_id_size
,
GFP_KERNEL
);
if
(
!
image_id
)
if
(
!
image_id
)
return
NULL
;
return
NULL
;
p
=
image_id
;
p
=
image_id
;
end
=
(
char
*
)
image_id
+
image_id_size
;
end
=
(
char
*
)
image_id
+
image_id_size
;
ceph_encode_string
(
&
p
,
end
,
rbd_dev
->
spec
->
image_id
,
ceph_encode_string
(
&
p
,
end
,
rbd_dev
->
spec
->
image_id
,
(
u32
)
len
);
(
u32
)
rbd_dev
->
spec
->
image_id_len
);
size
=
sizeof
(
__le32
)
+
RBD_IMAGE_NAME_LEN_MAX
;
size
=
sizeof
(
__le32
)
+
RBD_IMAGE_NAME_LEN_MAX
;
reply_buf
=
kmalloc
(
size
,
GFP_KERNEL
);
reply_buf
=
kmalloc
(
size
,
GFP_KERNEL
);
if
(
!
reply_buf
)
if
(
!
reply_buf
)
goto
out
;
goto
out
;
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
RBD_DIRECTORY
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
RBD_DIRECTORY
,
"rbd"
,
"dir_get_name"
,
"rbd"
,
"dir_get_name"
,
image_id
,
image_id_size
,
image_id
,
image_id_size
,
(
char
*
)
reply_buf
,
size
,
(
char
*
)
reply_buf
,
size
,
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out
;
goto
out
;
p
=
reply_buf
;
p
=
reply_buf
;
...
@@ -2602,8 +3003,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
...
@@ -2602,8 +3003,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
osdc
=
&
rbd_dev
->
rbd_client
->
client
->
osdc
;
name
=
ceph_pg_pool_name_by_id
(
osdc
->
osdmap
,
rbd_dev
->
spec
->
pool_id
);
name
=
ceph_pg_pool_name_by_id
(
osdc
->
osdmap
,
rbd_dev
->
spec
->
pool_id
);
if
(
!
name
)
if
(
!
name
)
{
return
-
EIO
;
/* pool id too large (>= 2^31) */
rbd_warn
(
rbd_dev
,
"there is no pool with id %llu"
,
rbd_dev
->
spec
->
pool_id
);
/* Really a BUG() */
return
-
EIO
;
}
rbd_dev
->
spec
->
pool_name
=
kstrdup
(
name
,
GFP_KERNEL
);
rbd_dev
->
spec
->
pool_name
=
kstrdup
(
name
,
GFP_KERNEL
);
if
(
!
rbd_dev
->
spec
->
pool_name
)
if
(
!
rbd_dev
->
spec
->
pool_name
)
...
@@ -2612,19 +3016,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
...
@@ -2612,19 +3016,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
/* Fetch the image name; tolerate failure here */
/* Fetch the image name; tolerate failure here */
name
=
rbd_dev_image_name
(
rbd_dev
);
name
=
rbd_dev_image_name
(
rbd_dev
);
if
(
name
)
{
if
(
name
)
rbd_dev
->
spec
->
image_name_len
=
strlen
(
name
);
rbd_dev
->
spec
->
image_name
=
(
char
*
)
name
;
rbd_dev
->
spec
->
image_name
=
(
char
*
)
name
;
}
else
{
else
pr_warning
(
RBD_DRV_NAME
"%d "
rbd_warn
(
rbd_dev
,
"unable to get image name"
);
"unable to get image name for image id %s
\n
"
,
rbd_dev
->
major
,
rbd_dev
->
spec
->
image_id
);
}
/* Look up the snapshot name. */
/* Look up the snapshot name. */
name
=
rbd_snap_name
(
rbd_dev
,
rbd_dev
->
spec
->
snap_id
);
name
=
rbd_snap_name
(
rbd_dev
,
rbd_dev
->
spec
->
snap_id
);
if
(
!
name
)
{
if
(
!
name
)
{
rbd_warn
(
rbd_dev
,
"no snapshot with id %llu"
,
rbd_dev
->
spec
->
snap_id
);
/* Really a BUG() */
ret
=
-
EIO
;
ret
=
-
EIO
;
goto
out_err
;
goto
out_err
;
}
}
...
@@ -2665,12 +3067,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
...
@@ -2665,12 +3067,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
if
(
!
reply_buf
)
if
(
!
reply_buf
)
return
-
ENOMEM
;
return
-
ENOMEM
;
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_snapcontext"
,
"rbd"
,
"get_snapcontext"
,
NULL
,
0
,
NULL
,
0
,
reply_buf
,
size
,
reply_buf
,
size
,
ver
);
CEPH_OSD_FLAG_READ
,
ver
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out
;
goto
out
;
...
@@ -2735,12 +3136,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
...
@@ -2735,12 +3136,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
return
ERR_PTR
(
-
ENOMEM
);
return
ERR_PTR
(
-
ENOMEM
);
snap_id
=
cpu_to_le64
(
rbd_dev
->
header
.
snapc
->
snaps
[
which
]);
snap_id
=
cpu_to_le64
(
rbd_dev
->
header
.
snapc
->
snaps
[
which
]);
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
rbd_dev
->
header_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
rbd_dev
->
header_name
,
"rbd"
,
"get_snapshot_name"
,
"rbd"
,
"get_snapshot_name"
,
(
char
*
)
&
snap_id
,
sizeof
(
snap_id
),
(
char
*
)
&
snap_id
,
sizeof
(
snap_id
),
reply_buf
,
size
,
reply_buf
,
size
,
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out
;
goto
out
;
...
@@ -2766,7 +3166,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
...
@@ -2766,7 +3166,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
static
char
*
rbd_dev_v2_snap_info
(
struct
rbd_device
*
rbd_dev
,
u32
which
,
static
char
*
rbd_dev_v2_snap_info
(
struct
rbd_device
*
rbd_dev
,
u32
which
,
u64
*
snap_size
,
u64
*
snap_features
)
u64
*
snap_size
,
u64
*
snap_features
)
{
{
__le
64
snap_id
;
u
64
snap_id
;
u8
order
;
u8
order
;
int
ret
;
int
ret
;
...
@@ -2865,10 +3265,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
...
@@ -2865,10 +3265,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
if
(
snap_id
==
CEPH_NOSNAP
||
(
snap
&&
snap
->
id
>
snap_id
))
{
if
(
snap_id
==
CEPH_NOSNAP
||
(
snap
&&
snap
->
id
>
snap_id
))
{
struct
list_head
*
next
=
links
->
next
;
struct
list_head
*
next
=
links
->
next
;
/* Existing snapshot not in the new snap context */
/*
* A previously-existing snapshot is not in
* the new snap context.
*
* If the now missing snapshot is the one the
* image is mapped to, clear its exists flag
* so we can avoid sending any more requests
* to it.
*/
if
(
rbd_dev
->
spec
->
snap_id
==
snap
->
id
)
if
(
rbd_dev
->
spec
->
snap_id
==
snap
->
id
)
rbd_dev
->
exists
=
false
;
clear_bit
(
RBD_DEV_FLAG_EXISTS
,
&
rbd_dev
->
flags
)
;
rbd_remove_snap_dev
(
snap
);
rbd_remove_snap_dev
(
snap
);
dout
(
"%ssnap id %llu has been removed
\n
"
,
dout
(
"%ssnap id %llu has been removed
\n
"
,
rbd_dev
->
spec
->
snap_id
==
snap
->
id
?
rbd_dev
->
spec
->
snap_id
==
snap
->
id
?
...
@@ -2983,22 +3390,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
...
@@ -2983,22 +3390,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
device_unregister
(
&
rbd_dev
->
dev
);
device_unregister
(
&
rbd_dev
->
dev
);
}
}
static
int
rbd_init_watch_dev
(
struct
rbd_device
*
rbd_dev
)
{
int
ret
,
rc
;
do
{
ret
=
rbd_req_sync_watch
(
rbd_dev
);
if
(
ret
==
-
ERANGE
)
{
rc
=
rbd_dev_refresh
(
rbd_dev
,
NULL
);
if
(
rc
<
0
)
return
rc
;
}
}
while
(
ret
==
-
ERANGE
);
return
ret
;
}
static
atomic64_t
rbd_dev_id_max
=
ATOMIC64_INIT
(
0
);
static
atomic64_t
rbd_dev_id_max
=
ATOMIC64_INIT
(
0
);
/*
/*
...
@@ -3138,11 +3529,9 @@ static inline char *dup_token(const char **buf, size_t *lenp)
...
@@ -3138,11 +3529,9 @@ static inline char *dup_token(const char **buf, size_t *lenp)
size_t
len
;
size_t
len
;
len
=
next_token
(
buf
);
len
=
next_token
(
buf
);
dup
=
km
alloc
(
len
+
1
,
GFP_KERNEL
);
dup
=
km
emdup
(
*
buf
,
len
+
1
,
GFP_KERNEL
);
if
(
!
dup
)
if
(
!
dup
)
return
NULL
;
return
NULL
;
memcpy
(
dup
,
*
buf
,
len
);
*
(
dup
+
len
)
=
'\0'
;
*
(
dup
+
len
)
=
'\0'
;
*
buf
+=
len
;
*
buf
+=
len
;
...
@@ -3210,8 +3599,10 @@ static int rbd_add_parse_args(const char *buf,
...
@@ -3210,8 +3599,10 @@ static int rbd_add_parse_args(const char *buf,
/* The first four tokens are required */
/* The first four tokens are required */
len
=
next_token
(
&
buf
);
len
=
next_token
(
&
buf
);
if
(
!
len
)
if
(
!
len
)
{
return
-
EINVAL
;
/* Missing monitor address(es) */
rbd_warn
(
NULL
,
"no monitor address(es) provided"
);
return
-
EINVAL
;
}
mon_addrs
=
buf
;
mon_addrs
=
buf
;
mon_addrs_size
=
len
+
1
;
mon_addrs_size
=
len
+
1
;
buf
+=
len
;
buf
+=
len
;
...
@@ -3220,8 +3611,10 @@ static int rbd_add_parse_args(const char *buf,
...
@@ -3220,8 +3611,10 @@ static int rbd_add_parse_args(const char *buf,
options
=
dup_token
(
&
buf
,
NULL
);
options
=
dup_token
(
&
buf
,
NULL
);
if
(
!
options
)
if
(
!
options
)
return
-
ENOMEM
;
return
-
ENOMEM
;
if
(
!*
options
)
if
(
!*
options
)
{
goto
out_err
;
/* Missing options */
rbd_warn
(
NULL
,
"no options provided"
);
goto
out_err
;
}
spec
=
rbd_spec_alloc
();
spec
=
rbd_spec_alloc
();
if
(
!
spec
)
if
(
!
spec
)
...
@@ -3230,14 +3623,18 @@ static int rbd_add_parse_args(const char *buf,
...
@@ -3230,14 +3623,18 @@ static int rbd_add_parse_args(const char *buf,
spec
->
pool_name
=
dup_token
(
&
buf
,
NULL
);
spec
->
pool_name
=
dup_token
(
&
buf
,
NULL
);
if
(
!
spec
->
pool_name
)
if
(
!
spec
->
pool_name
)
goto
out_mem
;
goto
out_mem
;
if
(
!*
spec
->
pool_name
)
if
(
!*
spec
->
pool_name
)
{
goto
out_err
;
/* Missing pool name */
rbd_warn
(
NULL
,
"no pool name provided"
);
goto
out_err
;
}
spec
->
image_name
=
dup_token
(
&
buf
,
&
spec
->
image_name_len
);
spec
->
image_name
=
dup_token
(
&
buf
,
NULL
);
if
(
!
spec
->
image_name
)
if
(
!
spec
->
image_name
)
goto
out_mem
;
goto
out_mem
;
if
(
!*
spec
->
image_name
)
if
(
!*
spec
->
image_name
)
{
goto
out_err
;
/* Missing image name */
rbd_warn
(
NULL
,
"no image name provided"
);
goto
out_err
;
}
/*
/*
* Snapshot name is optional; default is to use "-"
* Snapshot name is optional; default is to use "-"
...
@@ -3251,10 +3648,9 @@ static int rbd_add_parse_args(const char *buf,
...
@@ -3251,10 +3648,9 @@ static int rbd_add_parse_args(const char *buf,
ret
=
-
ENAMETOOLONG
;
ret
=
-
ENAMETOOLONG
;
goto
out_err
;
goto
out_err
;
}
}
spec
->
snap_name
=
km
alloc
(
len
+
1
,
GFP_KERNEL
);
spec
->
snap_name
=
km
emdup
(
buf
,
len
+
1
,
GFP_KERNEL
);
if
(
!
spec
->
snap_name
)
if
(
!
spec
->
snap_name
)
goto
out_mem
;
goto
out_mem
;
memcpy
(
spec
->
snap_name
,
buf
,
len
);
*
(
spec
->
snap_name
+
len
)
=
'\0'
;
*
(
spec
->
snap_name
+
len
)
=
'\0'
;
/* Initialize all rbd options to the defaults */
/* Initialize all rbd options to the defaults */
...
@@ -3323,7 +3719,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
...
@@ -3323,7 +3719,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
* First, see if the format 2 image id file exists, and if
* First, see if the format 2 image id file exists, and if
* so, get the image's persistent id from it.
* so, get the image's persistent id from it.
*/
*/
size
=
sizeof
(
RBD_ID_PREFIX
)
+
rbd_dev
->
spec
->
image_name_len
;
size
=
sizeof
(
RBD_ID_PREFIX
)
+
strlen
(
rbd_dev
->
spec
->
image_name
)
;
object_name
=
kmalloc
(
size
,
GFP_NOIO
);
object_name
=
kmalloc
(
size
,
GFP_NOIO
);
if
(
!
object_name
)
if
(
!
object_name
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
@@ -3339,21 +3735,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
...
@@ -3339,21 +3735,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
goto
out
;
goto
out
;
}
}
ret
=
rbd_
req_sync_exe
c
(
rbd_dev
,
object_name
,
ret
=
rbd_
obj_method_syn
c
(
rbd_dev
,
object_name
,
"rbd"
,
"get_id"
,
"rbd"
,
"get_id"
,
NULL
,
0
,
NULL
,
0
,
response
,
RBD_IMAGE_ID_LEN_MAX
,
response
,
RBD_IMAGE_ID_LEN_MAX
,
NULL
);
CEPH_OSD_FLAG_READ
,
NULL
);
dout
(
"%s: rbd_obj_method_sync returned %d
\n
"
,
__func__
,
ret
);
dout
(
"%s: rbd_req_sync_exec returned %d
\n
"
,
__func__
,
ret
);
if
(
ret
<
0
)
if
(
ret
<
0
)
goto
out
;
goto
out
;
ret
=
0
;
/* rbd_req_sync_exec() can return positive */
p
=
response
;
p
=
response
;
rbd_dev
->
spec
->
image_id
=
ceph_extract_encoded_string
(
&
p
,
rbd_dev
->
spec
->
image_id
=
ceph_extract_encoded_string
(
&
p
,
p
+
RBD_IMAGE_ID_LEN_MAX
,
p
+
RBD_IMAGE_ID_LEN_MAX
,
&
rbd_dev
->
spec
->
image_id_len
,
NULL
,
GFP_NOIO
);
GFP_NOIO
);
if
(
IS_ERR
(
rbd_dev
->
spec
->
image_id
))
{
if
(
IS_ERR
(
rbd_dev
->
spec
->
image_id
))
{
ret
=
PTR_ERR
(
rbd_dev
->
spec
->
image_id
);
ret
=
PTR_ERR
(
rbd_dev
->
spec
->
image_id
);
rbd_dev
->
spec
->
image_id
=
NULL
;
rbd_dev
->
spec
->
image_id
=
NULL
;
...
@@ -3377,11 +3770,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
...
@@ -3377,11 +3770,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
rbd_dev
->
spec
->
image_id
=
kstrdup
(
""
,
GFP_KERNEL
);
rbd_dev
->
spec
->
image_id
=
kstrdup
(
""
,
GFP_KERNEL
);
if
(
!
rbd_dev
->
spec
->
image_id
)
if
(
!
rbd_dev
->
spec
->
image_id
)
return
-
ENOMEM
;
return
-
ENOMEM
;
rbd_dev
->
spec
->
image_id_len
=
0
;
/* Record the header object name for this rbd image. */
/* Record the header object name for this rbd image. */
size
=
rbd_dev
->
spec
->
image_name_len
+
sizeof
(
RBD_SUFFIX
);
size
=
strlen
(
rbd_dev
->
spec
->
image_name
)
+
sizeof
(
RBD_SUFFIX
);
rbd_dev
->
header_name
=
kmalloc
(
size
,
GFP_KERNEL
);
rbd_dev
->
header_name
=
kmalloc
(
size
,
GFP_KERNEL
);
if
(
!
rbd_dev
->
header_name
)
{
if
(
!
rbd_dev
->
header_name
)
{
ret
=
-
ENOMEM
;
ret
=
-
ENOMEM
;
...
@@ -3427,7 +3819,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
...
@@ -3427,7 +3819,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
* Image id was filled in by the caller. Record the header
* Image id was filled in by the caller. Record the header
* object name for this rbd image.
* object name for this rbd image.
*/
*/
size
=
sizeof
(
RBD_HEADER_PREFIX
)
+
rbd_dev
->
spec
->
image_id_len
;
size
=
sizeof
(
RBD_HEADER_PREFIX
)
+
strlen
(
rbd_dev
->
spec
->
image_id
)
;
rbd_dev
->
header_name
=
kmalloc
(
size
,
GFP_KERNEL
);
rbd_dev
->
header_name
=
kmalloc
(
size
,
GFP_KERNEL
);
if
(
!
rbd_dev
->
header_name
)
if
(
!
rbd_dev
->
header_name
)
return
-
ENOMEM
;
return
-
ENOMEM
;
...
@@ -3542,7 +3934,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
...
@@ -3542,7 +3934,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
if
(
ret
)
if
(
ret
)
goto
err_out_bus
;
goto
err_out_bus
;
ret
=
rbd_
init_watch_dev
(
rbd_dev
);
ret
=
rbd_
dev_header_watch_sync
(
rbd_dev
,
1
);
if
(
ret
)
if
(
ret
)
goto
err_out_bus
;
goto
err_out_bus
;
...
@@ -3638,6 +4030,13 @@ static ssize_t rbd_add(struct bus_type *bus,
...
@@ -3638,6 +4030,13 @@ static ssize_t rbd_add(struct bus_type *bus,
goto
err_out_client
;
goto
err_out_client
;
spec
->
pool_id
=
(
u64
)
rc
;
spec
->
pool_id
=
(
u64
)
rc
;
/* The ceph file layout needs to fit pool id in 32 bits */
if
(
WARN_ON
(
spec
->
pool_id
>
(
u64
)
U32_MAX
))
{
rc
=
-
EIO
;
goto
err_out_client
;
}
rbd_dev
=
rbd_dev_create
(
rbdc
,
spec
);
rbd_dev
=
rbd_dev_create
(
rbdc
,
spec
);
if
(
!
rbd_dev
)
if
(
!
rbd_dev
)
goto
err_out_client
;
goto
err_out_client
;
...
@@ -3691,15 +4090,8 @@ static void rbd_dev_release(struct device *dev)
...
@@ -3691,15 +4090,8 @@ static void rbd_dev_release(struct device *dev)
{
{
struct
rbd_device
*
rbd_dev
=
dev_to_rbd_dev
(
dev
);
struct
rbd_device
*
rbd_dev
=
dev_to_rbd_dev
(
dev
);
if
(
rbd_dev
->
watch_request
)
{
struct
ceph_client
*
client
=
rbd_dev
->
rbd_client
->
client
;
ceph_osdc_unregister_linger_request
(
&
client
->
osdc
,
rbd_dev
->
watch_request
);
}
if
(
rbd_dev
->
watch_event
)
if
(
rbd_dev
->
watch_event
)
rbd_req_sync_unwatch
(
rbd_dev
);
rbd_dev_header_watch_sync
(
rbd_dev
,
0
);
/* clean up and free blkdev */
/* clean up and free blkdev */
rbd_free_disk
(
rbd_dev
);
rbd_free_disk
(
rbd_dev
);
...
@@ -3743,10 +4135,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
...
@@ -3743,10 +4135,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
goto
done
;
goto
done
;
}
}
if
(
rbd_dev
->
open_count
)
{
spin_lock_irq
(
&
rbd_dev
->
lock
);
if
(
rbd_dev
->
open_count
)
ret
=
-
EBUSY
;
ret
=
-
EBUSY
;
else
set_bit
(
RBD_DEV_FLAG_REMOVING
,
&
rbd_dev
->
flags
);
spin_unlock_irq
(
&
rbd_dev
->
lock
);
if
(
ret
<
0
)
goto
done
;
goto
done
;
}
rbd_remove_all_snaps
(
rbd_dev
);
rbd_remove_all_snaps
(
rbd_dev
);
rbd_bus_del_dev
(
rbd_dev
);
rbd_bus_del_dev
(
rbd_dev
);
...
@@ -3786,6 +4182,11 @@ int __init rbd_init(void)
...
@@ -3786,6 +4182,11 @@ int __init rbd_init(void)
{
{
int
rc
;
int
rc
;
if
(
!
libceph_compatible
(
NULL
))
{
rbd_warn
(
NULL
,
"libceph incompatibility (quitting)"
);
return
-
EINVAL
;
}
rc
=
rbd_sysfs_init
();
rc
=
rbd_sysfs_init
();
if
(
rc
)
if
(
rc
)
return
rc
;
return
rc
;
...
...
fs/ceph/addr.c
浏览文件 @
4c7a08c8
...
@@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
...
@@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
CEPH_OSD_OP_READ
,
CEPH_OSD_FLAG_READ
,
CEPH_OSD_OP_READ
,
CEPH_OSD_FLAG_READ
,
NULL
,
0
,
NULL
,
0
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
NULL
,
false
,
1
,
0
);
NULL
,
false
,
0
);
if
(
IS_ERR
(
req
))
if
(
IS_ERR
(
req
))
return
PTR_ERR
(
req
);
return
PTR_ERR
(
req
);
...
@@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
...
@@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
&
ci
->
i_layout
,
snapc
,
&
ci
->
i_layout
,
snapc
,
page_off
,
len
,
page_off
,
len
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
&
inode
->
i_mtime
,
&
inode
->
i_mtime
,
&
page
,
1
);
&
page
,
1
,
0
,
0
,
true
);
if
(
err
<
0
)
{
if
(
err
<
0
)
{
dout
(
"writepage setting page/mapping error %d %p
\n
"
,
err
,
page
);
dout
(
"writepage setting page/mapping error %d %p
\n
"
,
err
,
page
);
SetPageError
(
page
);
SetPageError
(
page
);
...
@@ -838,7 +837,7 @@ static int ceph_writepages_start(struct address_space *mapping,
...
@@ -838,7 +837,7 @@ static int ceph_writepages_start(struct address_space *mapping,
snapc
,
do_sync
,
snapc
,
do_sync
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
ci
->
i_truncate_size
,
&
inode
->
i_mtime
,
true
,
1
,
0
);
&
inode
->
i_mtime
,
true
,
0
);
if
(
IS_ERR
(
req
))
{
if
(
IS_ERR
(
req
))
{
rc
=
PTR_ERR
(
req
);
rc
=
PTR_ERR
(
req
);
...
...
fs/ceph/caps.c
浏览文件 @
4c7a08c8
...
@@ -611,8 +611,16 @@ int ceph_add_cap(struct inode *inode,
...
@@ -611,8 +611,16 @@ int ceph_add_cap(struct inode *inode,
if
(
flags
&
CEPH_CAP_FLAG_AUTH
)
if
(
flags
&
CEPH_CAP_FLAG_AUTH
)
ci
->
i_auth_cap
=
cap
;
ci
->
i_auth_cap
=
cap
;
else
if
(
ci
->
i_auth_cap
==
cap
)
else
if
(
ci
->
i_auth_cap
==
cap
)
{
ci
->
i_auth_cap
=
NULL
;
ci
->
i_auth_cap
=
NULL
;
spin_lock
(
&
mdsc
->
cap_dirty_lock
);
if
(
!
list_empty
(
&
ci
->
i_dirty_item
))
{
dout
(
" moving %p to cap_dirty_migrating
\n
"
,
inode
);
list_move
(
&
ci
->
i_dirty_item
,
&
mdsc
->
cap_dirty_migrating
);
}
spin_unlock
(
&
mdsc
->
cap_dirty_lock
);
}
dout
(
"add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d
\n
"
,
dout
(
"add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d
\n
"
,
inode
,
ceph_vinop
(
inode
),
cap
,
ceph_cap_string
(
issued
),
inode
,
ceph_vinop
(
inode
),
cap
,
ceph_cap_string
(
issued
),
...
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
...
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct
ceph_mds_client
*
mdsc
=
fsc
->
mdsc
;
struct
ceph_mds_client
*
mdsc
=
fsc
->
mdsc
;
struct
inode
*
inode
=
&
ci
->
vfs_inode
;
struct
inode
*
inode
=
&
ci
->
vfs_inode
;
struct
ceph_cap
*
cap
;
struct
ceph_cap
*
cap
;
int
file_wanted
,
used
;
int
file_wanted
,
used
,
cap_used
;
int
took_snap_rwsem
=
0
;
/* true if mdsc->snap_rwsem held */
int
took_snap_rwsem
=
0
;
/* true if mdsc->snap_rwsem held */
int
issued
,
implemented
,
want
,
retain
,
revoking
,
flushing
=
0
;
int
issued
,
implemented
,
want
,
retain
,
revoking
,
flushing
=
0
;
int
mds
=
-
1
;
/* keep track of how far we've gone through i_caps list
int
mds
=
-
1
;
/* keep track of how far we've gone through i_caps list
...
@@ -1563,9 +1571,14 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
...
@@ -1563,9 +1571,14 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
/* NOTE: no side-effects allowed, until we take s_mutex */
/* NOTE: no side-effects allowed, until we take s_mutex */
cap_used
=
used
;
if
(
ci
->
i_auth_cap
&&
cap
!=
ci
->
i_auth_cap
)
cap_used
&=
~
ci
->
i_auth_cap
->
issued
;
revoking
=
cap
->
implemented
&
~
cap
->
issued
;
revoking
=
cap
->
implemented
&
~
cap
->
issued
;
dout
(
" mds%d cap %p issued %s implemented %s revoking %s
\n
"
,
dout
(
" mds%d cap %p
used %s
issued %s implemented %s revoking %s
\n
"
,
cap
->
mds
,
cap
,
ceph_cap_string
(
cap
->
issued
),
cap
->
mds
,
cap
,
ceph_cap_string
(
cap
->
issued
),
ceph_cap_string
(
cap_used
),
ceph_cap_string
(
cap
->
implemented
),
ceph_cap_string
(
cap
->
implemented
),
ceph_cap_string
(
revoking
));
ceph_cap_string
(
revoking
));
...
@@ -1593,7 +1606,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
...
@@ -1593,7 +1606,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
}
}
/* completed revocation? going down and there are no caps? */
/* completed revocation? going down and there are no caps? */
if
(
revoking
&&
(
revoking
&
used
)
==
0
)
{
if
(
revoking
&&
(
revoking
&
cap_
used
)
==
0
)
{
dout
(
"completed revocation of %s
\n
"
,
dout
(
"completed revocation of %s
\n
"
,
ceph_cap_string
(
cap
->
implemented
&
~
cap
->
issued
));
ceph_cap_string
(
cap
->
implemented
&
~
cap
->
issued
));
goto
ack
;
goto
ack
;
...
@@ -1670,8 +1683,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
...
@@ -1670,8 +1683,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
sent
++
;
sent
++
;
/* __send_cap drops i_ceph_lock */
/* __send_cap drops i_ceph_lock */
delayed
+=
__send_cap
(
mdsc
,
cap
,
CEPH_CAP_OP_UPDATE
,
used
,
want
,
delayed
+=
__send_cap
(
mdsc
,
cap
,
CEPH_CAP_OP_UPDATE
,
cap_used
,
retain
,
flushing
,
NULL
);
want
,
retain
,
flushing
,
NULL
);
goto
retry
;
/* retake i_ceph_lock and restart our cap scan. */
goto
retry
;
/* retake i_ceph_lock and restart our cap scan. */
}
}
...
@@ -2416,7 +2429,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
...
@@ -2416,7 +2429,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
dout
(
"mds wanted %s -> %s
\n
"
,
dout
(
"mds wanted %s -> %s
\n
"
,
ceph_cap_string
(
le32_to_cpu
(
grant
->
wanted
)),
ceph_cap_string
(
le32_to_cpu
(
grant
->
wanted
)),
ceph_cap_string
(
wanted
));
ceph_cap_string
(
wanted
));
grant
->
wanted
=
cpu_to_le32
(
wanted
);
/* imported cap may not have correct mds_wanted */
if
(
le32_to_cpu
(
grant
->
op
)
==
CEPH_CAP_OP_IMPORT
)
check_caps
=
1
;
}
}
cap
->
seq
=
seq
;
cap
->
seq
=
seq
;
...
@@ -2820,6 +2835,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
...
@@ -2820,6 +2835,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
dout
(
" mds%d seq %lld cap seq %u
\n
"
,
session
->
s_mds
,
session
->
s_seq
,
dout
(
" mds%d seq %lld cap seq %u
\n
"
,
session
->
s_mds
,
session
->
s_seq
,
(
unsigned
)
seq
);
(
unsigned
)
seq
);
if
(
op
==
CEPH_CAP_OP_IMPORT
)
ceph_add_cap_releases
(
mdsc
,
session
);
/* lookup ino */
/* lookup ino */
inode
=
ceph_find_inode
(
sb
,
vino
);
inode
=
ceph_find_inode
(
sb
,
vino
);
ci
=
ceph_inode
(
inode
);
ci
=
ceph_inode
(
inode
);
...
...
fs/ceph/file.c
浏览文件 @
4c7a08c8
...
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
...
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err
=
ceph_mdsc_do_request
(
mdsc
,
err
=
ceph_mdsc_do_request
(
mdsc
,
(
flags
&
(
O_CREAT
|
O_TRUNC
))
?
dir
:
NULL
,
(
flags
&
(
O_CREAT
|
O_TRUNC
))
?
dir
:
NULL
,
req
);
req
);
if
(
err
)
goto
out_err
;
err
=
ceph_handle_snapdir
(
req
,
dentry
,
err
);
err
=
ceph_handle_snapdir
(
req
,
dentry
,
err
);
if
(
err
==
0
&&
(
flags
&
O_CREAT
)
&&
!
req
->
r_reply_info
.
head
->
is_dentry
)
if
(
err
==
0
&&
(
flags
&
O_CREAT
)
&&
!
req
->
r_reply_info
.
head
->
is_dentry
)
err
=
ceph_handle_notrace_create
(
dir
,
dentry
);
err
=
ceph_handle_notrace_create
(
dir
,
dentry
);
...
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
...
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err
=
finish_no_open
(
file
,
dn
);
err
=
finish_no_open
(
file
,
dn
);
}
else
{
}
else
{
dout
(
"atomic_open finish_open on dn %p
\n
"
,
dn
);
dout
(
"atomic_open finish_open on dn %p
\n
"
,
dn
);
if
(
req
->
r_op
==
CEPH_MDS_OP_CREATE
&&
req
->
r_reply_info
.
has_create_ino
)
{
*
opened
|=
FILE_CREATED
;
}
err
=
finish_open
(
file
,
dentry
,
ceph_open
,
opened
);
err
=
finish_open
(
file
,
dentry
,
ceph_open
,
opened
);
}
}
...
@@ -535,7 +541,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
...
@@ -535,7 +541,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
ci
->
i_snap_realm
->
cached_context
,
ci
->
i_snap_realm
->
cached_context
,
do_sync
,
do_sync
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
ci
->
i_truncate_seq
,
ci
->
i_truncate_size
,
&
mtime
,
false
,
2
,
page_align
);
&
mtime
,
false
,
page_align
);
if
(
IS_ERR
(
req
))
if
(
IS_ERR
(
req
))
return
PTR_ERR
(
req
);
return
PTR_ERR
(
req
);
...
...
fs/ceph/ioctl.c
浏览文件 @
4c7a08c8
...
@@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
...
@@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
return
-
EFAULT
;
return
-
EFAULT
;
down_read
(
&
osdc
->
map_sem
);
down_read
(
&
osdc
->
map_sem
);
r
=
ceph_calc_file_object_mapping
(
&
ci
->
i_layout
,
dl
.
file_offset
,
&
len
,
r
=
ceph_calc_file_object_mapping
(
&
ci
->
i_layout
,
dl
.
file_offset
,
len
,
&
dl
.
object_no
,
&
dl
.
object_offset
,
&
dl
.
object_no
,
&
dl
.
object_offset
,
&
olen
);
&
olen
);
if
(
r
<
0
)
if
(
r
<
0
)
...
...
fs/ceph/mds_client.c
浏览文件 @
4c7a08c8
...
@@ -232,6 +232,30 @@ static int parse_reply_info_filelock(void **p, void *end,
...
@@ -232,6 +232,30 @@ static int parse_reply_info_filelock(void **p, void *end,
return
-
EIO
;
return
-
EIO
;
}
}
/*
* parse create results
*/
static
int
parse_reply_info_create
(
void
**
p
,
void
*
end
,
struct
ceph_mds_reply_info_parsed
*
info
,
int
features
)
{
if
(
features
&
CEPH_FEATURE_REPLY_CREATE_INODE
)
{
if
(
*
p
==
end
)
{
info
->
has_create_ino
=
false
;
}
else
{
info
->
has_create_ino
=
true
;
info
->
ino
=
ceph_decode_64
(
p
);
}
}
if
(
unlikely
(
*
p
!=
end
))
goto
bad
;
return
0
;
bad:
return
-
EIO
;
}
/*
/*
* parse extra results
* parse extra results
*/
*/
...
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
...
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
{
{
if
(
info
->
head
->
op
==
CEPH_MDS_OP_GETFILELOCK
)
if
(
info
->
head
->
op
==
CEPH_MDS_OP_GETFILELOCK
)
return
parse_reply_info_filelock
(
p
,
end
,
info
,
features
);
return
parse_reply_info_filelock
(
p
,
end
,
info
,
features
);
else
else
if
(
info
->
head
->
op
==
CEPH_MDS_OP_READDIR
)
return
parse_reply_info_dir
(
p
,
end
,
info
,
features
);
return
parse_reply_info_dir
(
p
,
end
,
info
,
features
);
else
if
(
info
->
head
->
op
==
CEPH_MDS_OP_CREATE
)
return
parse_reply_info_create
(
p
,
end
,
info
,
features
);
else
return
-
EIO
;
}
}
/*
/*
...
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
...
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock
(
&
req
->
r_fill_mutex
);
mutex_lock
(
&
req
->
r_fill_mutex
);
err
=
ceph_fill_trace
(
mdsc
->
fsc
->
sb
,
req
,
req
->
r_session
);
err
=
ceph_fill_trace
(
mdsc
->
fsc
->
sb
,
req
,
req
->
r_session
);
if
(
err
==
0
)
{
if
(
err
==
0
)
{
if
(
result
==
0
&&
req
->
r_op
!=
CEPH_MDS_OP_GETFILELOCK
&&
if
(
result
==
0
&&
(
req
->
r_op
==
CEPH_MDS_OP_READDIR
||
req
->
r_op
==
CEPH_MDS_OP_LSSNAP
)
&&
rinfo
->
dir_nr
)
rinfo
->
dir_nr
)
ceph_readdir_prepopulate
(
req
,
req
->
r_session
);
ceph_readdir_prepopulate
(
req
,
req
->
r_session
);
ceph_unreserve_caps
(
mdsc
,
&
req
->
r_caps_reservation
);
ceph_unreserve_caps
(
mdsc
,
&
req
->
r_caps_reservation
);
...
...
fs/ceph/mds_client.h
浏览文件 @
4c7a08c8
...
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
...
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
struct
ceph_mds_reply_info_in
*
dir_in
;
struct
ceph_mds_reply_info_in
*
dir_in
;
u8
dir_complete
,
dir_end
;
u8
dir_complete
,
dir_end
;
};
};
/* for create results */
struct
{
bool
has_create_ino
;
u64
ino
;
};
};
};
/* encoded blob describing snapshot contexts for certain
/* encoded blob describing snapshot contexts for certain
...
...
fs/ceph/strings.c
浏览文件 @
4c7a08c8
...
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
...
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
case
CEPH_MDS_STATE_BOOT
:
return
"up:boot"
;
case
CEPH_MDS_STATE_BOOT
:
return
"up:boot"
;
case
CEPH_MDS_STATE_STANDBY
:
return
"up:standby"
;
case
CEPH_MDS_STATE_STANDBY
:
return
"up:standby"
;
case
CEPH_MDS_STATE_STANDBY_REPLAY
:
return
"up:standby-replay"
;
case
CEPH_MDS_STATE_STANDBY_REPLAY
:
return
"up:standby-replay"
;
case
CEPH_MDS_STATE_REPLAYONCE
:
return
"up:oneshot-replay"
;
case
CEPH_MDS_STATE_CREATING
:
return
"up:creating"
;
case
CEPH_MDS_STATE_CREATING
:
return
"up:creating"
;
case
CEPH_MDS_STATE_STARTING
:
return
"up:starting"
;
case
CEPH_MDS_STATE_STARTING
:
return
"up:starting"
;
/* up and in */
/* up and in */
...
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
...
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
case
CEPH_MDS_OP_LOOKUP
:
return
"lookup"
;
case
CEPH_MDS_OP_LOOKUP
:
return
"lookup"
;
case
CEPH_MDS_OP_LOOKUPHASH
:
return
"lookuphash"
;
case
CEPH_MDS_OP_LOOKUPHASH
:
return
"lookuphash"
;
case
CEPH_MDS_OP_LOOKUPPARENT
:
return
"lookupparent"
;
case
CEPH_MDS_OP_LOOKUPPARENT
:
return
"lookupparent"
;
case
CEPH_MDS_OP_LOOKUPINO
:
return
"lookupino"
;
case
CEPH_MDS_OP_GETATTR
:
return
"getattr"
;
case
CEPH_MDS_OP_GETATTR
:
return
"getattr"
;
case
CEPH_MDS_OP_SETXATTR
:
return
"setxattr"
;
case
CEPH_MDS_OP_SETXATTR
:
return
"setxattr"
;
case
CEPH_MDS_OP_SETATTR
:
return
"setattr"
;
case
CEPH_MDS_OP_SETATTR
:
return
"setattr"
;
case
CEPH_MDS_OP_RMXATTR
:
return
"rmxattr"
;
case
CEPH_MDS_OP_RMXATTR
:
return
"rmxattr"
;
case
CEPH_MDS_OP_SETLAYOUT
:
return
"setlayou"
;
case
CEPH_MDS_OP_SETDIRLAYOUT
:
return
"setdirlayout"
;
case
CEPH_MDS_OP_READDIR
:
return
"readdir"
;
case
CEPH_MDS_OP_READDIR
:
return
"readdir"
;
case
CEPH_MDS_OP_MKNOD
:
return
"mknod"
;
case
CEPH_MDS_OP_MKNOD
:
return
"mknod"
;
case
CEPH_MDS_OP_LINK
:
return
"link"
;
case
CEPH_MDS_OP_LINK
:
return
"link"
;
...
...
fs/ceph/super.h
浏览文件 @
4c7a08c8
...
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
...
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
/* file.c */
extern
const
struct
file_operations
ceph_file_fops
;
extern
const
struct
file_operations
ceph_file_fops
;
extern
const
struct
address_space_operations
ceph_aops
;
extern
const
struct
address_space_operations
ceph_aops
;
extern
int
ceph_copy_to_page_vector
(
struct
page
**
pages
,
const
char
*
data
,
loff_t
off
,
size_t
len
);
extern
int
ceph_copy_from_page_vector
(
struct
page
**
pages
,
char
*
data
,
loff_t
off
,
size_t
len
);
extern
struct
page
**
ceph_alloc_page_vector
(
int
num_pages
,
gfp_t
flags
);
extern
int
ceph_open
(
struct
inode
*
inode
,
struct
file
*
file
);
extern
int
ceph_open
(
struct
inode
*
inode
,
struct
file
*
file
);
extern
int
ceph_atomic_open
(
struct
inode
*
dir
,
struct
dentry
*
dentry
,
extern
int
ceph_atomic_open
(
struct
inode
*
dir
,
struct
dentry
*
dentry
,
struct
file
*
file
,
unsigned
flags
,
umode_t
mode
,
struct
file
*
file
,
unsigned
flags
,
umode_t
mode
,
...
...
fs/ceph/xattr.c
浏览文件 @
4c7a08c8
...
@@ -29,9 +29,94 @@ struct ceph_vxattr {
...
@@ -29,9 +29,94 @@ struct ceph_vxattr {
size_t
name_size
;
/* strlen(name) + 1 (for '\0') */
size_t
name_size
;
/* strlen(name) + 1 (for '\0') */
size_t
(
*
getxattr_cb
)(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
(
*
getxattr_cb
)(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
);
size_t
size
);
bool
readonly
;
bool
readonly
,
hidden
;
bool
(
*
exists_cb
)(
struct
ceph_inode_info
*
ci
);
};
};
/* layouts */
static
bool
ceph_vxattrcb_layout_exists
(
struct
ceph_inode_info
*
ci
)
{
size_t
s
;
char
*
p
=
(
char
*
)
&
ci
->
i_layout
;
for
(
s
=
0
;
s
<
sizeof
(
ci
->
i_layout
);
s
++
,
p
++
)
if
(
*
p
)
return
true
;
return
false
;
}
static
size_t
ceph_vxattrcb_layout
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
int
ret
;
struct
ceph_fs_client
*
fsc
=
ceph_sb_to_client
(
ci
->
vfs_inode
.
i_sb
);
struct
ceph_osd_client
*
osdc
=
&
fsc
->
client
->
osdc
;
s64
pool
=
ceph_file_layout_pg_pool
(
ci
->
i_layout
);
const
char
*
pool_name
;
dout
(
"ceph_vxattrcb_layout %p
\n
"
,
&
ci
->
vfs_inode
);
down_read
(
&
osdc
->
map_sem
);
pool_name
=
ceph_pg_pool_name_by_id
(
osdc
->
osdmap
,
pool
);
if
(
pool_name
)
ret
=
snprintf
(
val
,
size
,
"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s"
,
(
unsigned
long
long
)
ceph_file_layout_su
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_stripe_count
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_object_size
(
ci
->
i_layout
),
pool_name
);
else
ret
=
snprintf
(
val
,
size
,
"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld"
,
(
unsigned
long
long
)
ceph_file_layout_su
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_stripe_count
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_object_size
(
ci
->
i_layout
),
(
unsigned
long
long
)
pool
);
up_read
(
&
osdc
->
map_sem
);
return
ret
;
}
static
size_t
ceph_vxattrcb_layout_stripe_unit
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
return
snprintf
(
val
,
size
,
"%lld"
,
(
unsigned
long
long
)
ceph_file_layout_su
(
ci
->
i_layout
));
}
static
size_t
ceph_vxattrcb_layout_stripe_count
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
return
snprintf
(
val
,
size
,
"%lld"
,
(
unsigned
long
long
)
ceph_file_layout_stripe_count
(
ci
->
i_layout
));
}
static
size_t
ceph_vxattrcb_layout_object_size
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
return
snprintf
(
val
,
size
,
"%lld"
,
(
unsigned
long
long
)
ceph_file_layout_object_size
(
ci
->
i_layout
));
}
static
size_t
ceph_vxattrcb_layout_pool
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
int
ret
;
struct
ceph_fs_client
*
fsc
=
ceph_sb_to_client
(
ci
->
vfs_inode
.
i_sb
);
struct
ceph_osd_client
*
osdc
=
&
fsc
->
client
->
osdc
;
s64
pool
=
ceph_file_layout_pg_pool
(
ci
->
i_layout
);
const
char
*
pool_name
;
down_read
(
&
osdc
->
map_sem
);
pool_name
=
ceph_pg_pool_name_by_id
(
osdc
->
osdmap
,
pool
);
if
(
pool_name
)
ret
=
snprintf
(
val
,
size
,
"%s"
,
pool_name
);
else
ret
=
snprintf
(
val
,
size
,
"%lld"
,
(
unsigned
long
long
)
pool
);
up_read
(
&
osdc
->
map_sem
);
return
ret
;
}
/* directories */
/* directories */
static
size_t
ceph_vxattrcb_dir_entries
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
static
size_t
ceph_vxattrcb_dir_entries
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
...
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
...
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
(
long
)
ci
->
i_rctime
.
tv_nsec
);
(
long
)
ci
->
i_rctime
.
tv_nsec
);
}
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define XATTR_NAME_CEPH(_type, _name) \
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
{ \
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
.name = CEPH_XATTR_NAME(_type, _name), \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
#define XATTR_NAME_CEPH(_type, _name) \
.readonly = true, \
{ \
}
.name = CEPH_XATTR_NAME(_type, _name), \
.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
.readonly = true, \
.hidden = false, \
.exists_cb = NULL, \
}
#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
{ \
.name = CEPH_XATTR_NAME2(_type, _name, _field), \
.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
.readonly = false, \
.hidden = true, \
.exists_cb = ceph_vxattrcb_layout_exists, \
}
static
struct
ceph_vxattr
ceph_dir_vxattrs
[]
=
{
static
struct
ceph_vxattr
ceph_dir_vxattrs
[]
=
{
{
.
name
=
"ceph.dir.layout"
,
.
name_size
=
sizeof
(
"ceph.dir.layout"
),
.
getxattr_cb
=
ceph_vxattrcb_layout
,
.
readonly
=
false
,
.
hidden
=
false
,
.
exists_cb
=
ceph_vxattrcb_layout_exists
,
},
XATTR_LAYOUT_FIELD
(
dir
,
layout
,
stripe_unit
),
XATTR_LAYOUT_FIELD
(
dir
,
layout
,
stripe_count
),
XATTR_LAYOUT_FIELD
(
dir
,
layout
,
object_size
),
XATTR_LAYOUT_FIELD
(
dir
,
layout
,
pool
),
XATTR_NAME_CEPH
(
dir
,
entries
),
XATTR_NAME_CEPH
(
dir
,
entries
),
XATTR_NAME_CEPH
(
dir
,
files
),
XATTR_NAME_CEPH
(
dir
,
files
),
XATTR_NAME_CEPH
(
dir
,
subdirs
),
XATTR_NAME_CEPH
(
dir
,
subdirs
),
...
@@ -108,28 +219,19 @@ static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
...
@@ -108,28 +219,19 @@ static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
/* files */
/* files */
static
size_t
ceph_vxattrcb_file_layout
(
struct
ceph_inode_info
*
ci
,
char
*
val
,
size_t
size
)
{
int
ret
;
ret
=
snprintf
(
val
,
size
,
"chunk_bytes=%lld
\n
stripe_count=%lld
\n
object_size=%lld
\n
"
,
(
unsigned
long
long
)
ceph_file_layout_su
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_stripe_count
(
ci
->
i_layout
),
(
unsigned
long
long
)
ceph_file_layout_object_size
(
ci
->
i_layout
));
return
ret
;
}
static
struct
ceph_vxattr
ceph_file_vxattrs
[]
=
{
static
struct
ceph_vxattr
ceph_file_vxattrs
[]
=
{
XATTR_NAME_CEPH
(
file
,
layout
),
/* The following extended attribute name is deprecated */
{
{
.
name
=
XATTR_CEPH_PREFIX
"layout"
,
.
name
=
"ceph.file.layout"
,
.
name_size
=
sizeof
(
XATTR_CEPH_PREFIX
"layout"
),
.
name_size
=
sizeof
(
"ceph.file.layout"
),
.
getxattr_cb
=
ceph_vxattrcb_file_layout
,
.
getxattr_cb
=
ceph_vxattrcb_layout
,
.
readonly
=
true
,
.
readonly
=
false
,
.
hidden
=
false
,
.
exists_cb
=
ceph_vxattrcb_layout_exists
,
},
},
XATTR_LAYOUT_FIELD
(
file
,
layout
,
stripe_unit
),
XATTR_LAYOUT_FIELD
(
file
,
layout
,
stripe_count
),
XATTR_LAYOUT_FIELD
(
file
,
layout
,
object_size
),
XATTR_LAYOUT_FIELD
(
file
,
layout
,
pool
),
{
0
}
/* Required table terminator */
{
0
}
/* Required table terminator */
};
};
static
size_t
ceph_file_vxattrs_name_size
;
/* total size of all names */
static
size_t
ceph_file_vxattrs_name_size
;
/* total size of all names */
...
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
...
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
size_t
size
=
0
;
size_t
size
=
0
;
for
(
vxattr
=
vxattrs
;
vxattr
->
name
;
vxattr
++
)
for
(
vxattr
=
vxattrs
;
vxattr
->
name
;
vxattr
++
)
size
+=
vxattr
->
name_size
;
if
(
!
vxattr
->
hidden
)
size
+=
vxattr
->
name_size
;
return
size
;
return
size
;
}
}
...
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
...
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if
(
!
ceph_is_valid_xattr
(
name
))
if
(
!
ceph_is_valid_xattr
(
name
))
return
-
ENODATA
;
return
-
ENODATA
;
/* let's see if a virtual xattr was requested */
vxattr
=
ceph_match_vxattr
(
inode
,
name
);
spin_lock
(
&
ci
->
i_ceph_lock
);
spin_lock
(
&
ci
->
i_ceph_lock
);
dout
(
"getxattr %p ver=%lld index_ver=%lld
\n
"
,
inode
,
dout
(
"getxattr %p ver=%lld index_ver=%lld
\n
"
,
inode
,
ci
->
i_xattrs
.
version
,
ci
->
i_xattrs
.
index_version
);
ci
->
i_xattrs
.
version
,
ci
->
i_xattrs
.
index_version
);
/* let's see if a virtual xattr was requested */
vxattr
=
ceph_match_vxattr
(
inode
,
name
);
if
(
vxattr
&&
!
(
vxattr
->
exists_cb
&&
!
vxattr
->
exists_cb
(
ci
)))
{
err
=
vxattr
->
getxattr_cb
(
ci
,
value
,
size
);
goto
out
;
}
if
(
__ceph_caps_issued_mask
(
ci
,
CEPH_CAP_XATTR_SHARED
,
1
)
&&
if
(
__ceph_caps_issued_mask
(
ci
,
CEPH_CAP_XATTR_SHARED
,
1
)
&&
(
ci
->
i_xattrs
.
index_version
>=
ci
->
i_xattrs
.
version
))
{
(
ci
->
i_xattrs
.
index_version
>=
ci
->
i_xattrs
.
version
))
{
goto
get_xattr
;
goto
get_xattr
;
...
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
...
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
spin_lock
(
&
ci
->
i_ceph_lock
);
spin_lock
(
&
ci
->
i_ceph_lock
);
if
(
vxattr
&&
vxattr
->
readonly
)
{
err
=
vxattr
->
getxattr_cb
(
ci
,
value
,
size
);
goto
out
;
}
err
=
__build_xattrs
(
inode
);
err
=
__build_xattrs
(
inode
);
if
(
err
<
0
)
if
(
err
<
0
)
goto
out
;
goto
out
;
...
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
...
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
get_xattr:
get_xattr:
err
=
-
ENODATA
;
/* == ENOATTR */
err
=
-
ENODATA
;
/* == ENOATTR */
xattr
=
__get_xattr
(
ci
,
name
);
xattr
=
__get_xattr
(
ci
,
name
);
if
(
!
xattr
)
{
if
(
!
xattr
)
if
(
vxattr
)
err
=
vxattr
->
getxattr_cb
(
ci
,
value
,
size
);
goto
out
;
goto
out
;
}
err
=
-
ERANGE
;
err
=
-
ERANGE
;
if
(
size
&&
size
<
xattr
->
val_len
)
if
(
size
&&
size
<
xattr
->
val_len
)
...
@@ -664,23 +763,30 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
...
@@ -664,23 +763,30 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
vir_namelen
=
ceph_vxattrs_name_size
(
vxattrs
);
vir_namelen
=
ceph_vxattrs_name_size
(
vxattrs
);
/* adding 1 byte per each variable due to the null termination */
/* adding 1 byte per each variable due to the null termination */
namelen
=
vir_namelen
+
ci
->
i_xattrs
.
names_size
+
ci
->
i_xattrs
.
count
;
namelen
=
ci
->
i_xattrs
.
names_size
+
ci
->
i_xattrs
.
count
;
err
=
-
ERANGE
;
err
=
-
ERANGE
;
if
(
size
&&
namelen
>
size
)
if
(
size
&&
vir_namelen
+
namelen
>
size
)
goto
out
;
goto
out
;
err
=
namelen
;
err
=
namelen
+
vir_namelen
;
if
(
size
==
0
)
if
(
size
==
0
)
goto
out
;
goto
out
;
names
=
__copy_xattr_names
(
ci
,
names
);
names
=
__copy_xattr_names
(
ci
,
names
);
/* virtual xattr names, too */
/* virtual xattr names, too */
if
(
vxattrs
)
err
=
namelen
;
if
(
vxattrs
)
{
for
(
i
=
0
;
vxattrs
[
i
].
name
;
i
++
)
{
for
(
i
=
0
;
vxattrs
[
i
].
name
;
i
++
)
{
len
=
sprintf
(
names
,
"%s"
,
vxattrs
[
i
].
name
);
if
(
!
vxattrs
[
i
].
hidden
&&
names
+=
len
+
1
;
!
(
vxattrs
[
i
].
exists_cb
&&
!
vxattrs
[
i
].
exists_cb
(
ci
)))
{
len
=
sprintf
(
names
,
"%s"
,
vxattrs
[
i
].
name
);
names
+=
len
+
1
;
err
+=
len
+
1
;
}
}
}
}
out:
out:
spin_unlock
(
&
ci
->
i_ceph_lock
);
spin_unlock
(
&
ci
->
i_ceph_lock
);
...
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
...
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if
(
vxattr
&&
vxattr
->
readonly
)
if
(
vxattr
&&
vxattr
->
readonly
)
return
-
EOPNOTSUPP
;
return
-
EOPNOTSUPP
;
/* pass any unhandled ceph.* xattrs through to the MDS */
if
(
!
strncmp
(
name
,
XATTR_CEPH_PREFIX
,
XATTR_CEPH_PREFIX_LEN
))
goto
do_sync_unlocked
;
/* preallocate memory for xattr name, value, index node */
/* preallocate memory for xattr name, value, index node */
err
=
-
ENOMEM
;
err
=
-
ENOMEM
;
newname
=
kmemdup
(
name
,
name_len
+
1
,
GFP_NOFS
);
newname
=
kmemdup
(
name
,
name_len
+
1
,
GFP_NOFS
);
...
@@ -838,6 +948,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
...
@@ -838,6 +948,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
do_sync:
do_sync:
spin_unlock
(
&
ci
->
i_ceph_lock
);
spin_unlock
(
&
ci
->
i_ceph_lock
);
do_sync_unlocked:
err
=
ceph_sync_setxattr
(
dentry
,
name
,
value
,
size
,
flags
);
err
=
ceph_sync_setxattr
(
dentry
,
name
,
value
,
size
,
flags
);
out:
out:
kfree
(
newname
);
kfree
(
newname
);
...
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
...
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
if
(
vxattr
&&
vxattr
->
readonly
)
if
(
vxattr
&&
vxattr
->
readonly
)
return
-
EOPNOTSUPP
;
return
-
EOPNOTSUPP
;
/* pass any unhandled ceph.* xattrs through to the MDS */
if
(
!
strncmp
(
name
,
XATTR_CEPH_PREFIX
,
XATTR_CEPH_PREFIX_LEN
))
goto
do_sync_unlocked
;
err
=
-
ENOMEM
;
err
=
-
ENOMEM
;
spin_lock
(
&
ci
->
i_ceph_lock
);
spin_lock
(
&
ci
->
i_ceph_lock
);
retry:
retry:
...
@@ -931,6 +1046,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
...
@@ -931,6 +1046,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
return
err
;
return
err
;
do_sync:
do_sync:
spin_unlock
(
&
ci
->
i_ceph_lock
);
spin_unlock
(
&
ci
->
i_ceph_lock
);
do_sync_unlocked:
err
=
ceph_send_removexattr
(
dentry
,
name
);
err
=
ceph_send_removexattr
(
dentry
,
name
);
out:
out:
return
err
;
return
err
;
...
...
include/linux/ceph/ceph_features.h
浏览文件 @
4c7a08c8
...
@@ -14,13 +14,19 @@
...
@@ -14,13 +14,19 @@
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
/* bits 8-17 defined by user-space; not supported yet here */
/* bits 8-17 defined by user-space; not supported yet here */
#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
/* bits 19-24 defined by user-space; not supported yet here */
#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
/* bit 26 defined by user-space; not supported yet here */
#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
/*
/*
* Features supported.
* Features supported.
*/
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_CRUSH_TUNABLES)
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR)
(CEPH_FEATURE_NOSRCADDR)
...
...
include/linux/ceph/ceph_fs.h
浏览文件 @
4c7a08c8
...
@@ -21,16 +21,14 @@
...
@@ -21,16 +21,14 @@
* internal cluster protocols separately from the public,
* internal cluster protocols separately from the public,
* client-facing protocol.
* client-facing protocol.
*/
*/
#define CEPH_OSD_PROTOCOL 8
/* cluster internal */
#define CEPH_MDS_PROTOCOL 12
/* cluster internal */
#define CEPH_MON_PROTOCOL 5
/* cluster internal */
#define CEPH_OSDC_PROTOCOL 24
/* server/client */
#define CEPH_OSDC_PROTOCOL 24
/* server/client */
#define CEPH_MDSC_PROTOCOL 32
/* server/client */
#define CEPH_MDSC_PROTOCOL 32
/* server/client */
#define CEPH_MONC_PROTOCOL 15
/* server/client */
#define CEPH_MONC_PROTOCOL 15
/* server/client */
#define CEPH_INO_ROOT 1
#define CEPH_INO_ROOT 1
#define CEPH_INO_CEPH 2
/* hidden .ceph dir */
#define CEPH_INO_CEPH 2
/* hidden .ceph dir */
#define CEPH_INO_DOTDOT 3
/* used by ceph fuse for parent (..) */
/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
#define CEPH_MAX_MON 31
#define CEPH_MAX_MON 31
...
@@ -51,7 +49,7 @@ struct ceph_file_layout {
...
@@ -51,7 +49,7 @@ struct ceph_file_layout {
__le32
fl_object_stripe_unit
;
/* UNUSED. for per-object parity, if any */
__le32
fl_object_stripe_unit
;
/* UNUSED. for per-object parity, if any */
/* object -> pg layout */
/* object -> pg layout */
__le32
fl_unused
;
/* unused; used to be preferred primary
(-1
) */
__le32
fl_unused
;
/* unused; used to be preferred primary
for pg (-1 for none
) */
__le32
fl_pg_pool
;
/* namespace, crush ruleset, rep level */
__le32
fl_pg_pool
;
/* namespace, crush ruleset, rep level */
}
__attribute__
((
packed
));
}
__attribute__
((
packed
));
...
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
...
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
#define CEPH_MSG_AUTH 17
#define CEPH_MSG_AUTH 17
#define CEPH_MSG_AUTH_REPLY 18
#define CEPH_MSG_AUTH_REPLY 18
#define CEPH_MSG_MON_GET_VERSION 19
#define CEPH_MSG_MON_GET_VERSION_REPLY 20
/* client <-> mds */
/* client <-> mds */
#define CEPH_MSG_MDS_MAP 21
#define CEPH_MSG_MDS_MAP 21
...
@@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack {
...
@@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack {
struct
ceph_fsid
fsid
;
struct
ceph_fsid
fsid
;
}
__attribute__
((
packed
));
}
__attribute__
((
packed
));
/*
* mdsmap flags
*/
#define CEPH_MDSMAP_DOWN (1<<0)
/* cluster deliberately down */
/*
/*
* mds states
* mds states
* > 0 -> in
* > 0 -> in
...
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
...
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
#define CEPH_MDS_STATE_CREATING -6
/* up, creating MDS instance. */
#define CEPH_MDS_STATE_CREATING -6
/* up, creating MDS instance. */
#define CEPH_MDS_STATE_STARTING -7
/* up, starting previously stopped mds */
#define CEPH_MDS_STATE_STARTING -7
/* up, starting previously stopped mds */
#define CEPH_MDS_STATE_STANDBY_REPLAY -8
/* up, tailing active node's journal */
#define CEPH_MDS_STATE_STANDBY_REPLAY -8
/* up, tailing active node's journal */
#define CEPH_MDS_STATE_REPLAYONCE -9
/* up, replaying an active node's journal */
#define CEPH_MDS_STATE_REPLAY 8
/* up, replaying journal. */
#define CEPH_MDS_STATE_REPLAY 8
/* up, replaying journal. */
#define CEPH_MDS_STATE_RESOLVE 9
/* up, disambiguating distributed
#define CEPH_MDS_STATE_RESOLVE 9
/* up, disambiguating distributed
...
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
...
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
#define CEPH_LOCK_IXATTR 2048
#define CEPH_LOCK_IXATTR 2048
#define CEPH_LOCK_IFLOCK 4096
/* advisory file locks */
#define CEPH_LOCK_IFLOCK 4096
/* advisory file locks */
#define CEPH_LOCK_INO 8192
/* immutable inode bits; not a lock */
#define CEPH_LOCK_INO 8192
/* immutable inode bits; not a lock */
#define CEPH_LOCK_IPOLICY 16384
/* policy lock on dirs. MDS internal */
/* client_session ops */
/* client_session ops */
enum
{
enum
{
...
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
...
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
#define CEPH_SETATTR_SIZE 32
#define CEPH_SETATTR_SIZE 32
#define CEPH_SETATTR_CTIME 64
#define CEPH_SETATTR_CTIME 64
/*
* Ceph setxattr request flags.
*/
#define CEPH_XATTR_CREATE 1
#define CEPH_XATTR_REPLACE 2
union
ceph_mds_request_args
{
union
ceph_mds_request_args
{
struct
{
struct
{
__le32
mask
;
/* CEPH_CAP_* */
__le32
mask
;
/* CEPH_CAP_* */
...
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
...
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_GWREXTEND 64
/* (file) client can extend EOF */
#define CEPH_CAP_GWREXTEND 64
/* (file) client can extend EOF */
#define CEPH_CAP_GLAZYIO 128
/* (file) client can perform lazy io */
#define CEPH_CAP_GLAZYIO 128
/* (file) client can perform lazy io */
#define CEPH_CAP_SIMPLE_BITS 2
#define CEPH_CAP_FILE_BITS 8
/* per-lock shift */
/* per-lock shift */
#define CEPH_CAP_SAUTH 2
#define CEPH_CAP_SAUTH 2
#define CEPH_CAP_SLINK 4
#define CEPH_CAP_SLINK 4
#define CEPH_CAP_SXATTR 6
#define CEPH_CAP_SXATTR 6
#define CEPH_CAP_SFILE 8
#define CEPH_CAP_SFILE 8
#define CEPH_CAP_SFLOCK 20
#define CEPH_CAP_SFLOCK 20
#define CEPH_CAP_BITS
22
#define CEPH_CAP_BITS 22
/* composed values */
/* composed values */
#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
...
...
include/linux/ceph/decode.h
浏览文件 @
4c7a08c8
...
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
...
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
return
end
>=
*
p
&&
n
<=
end
-
*
p
;
return
end
>=
*
p
&&
n
<=
end
-
*
p
;
}
}
#define ceph_decode_need(p, end, n, bad) \
#define ceph_decode_need(p, end, n, bad)
\
do { \
do {
\
if (!likely(ceph_has_room(p, end, n))) \
if (!likely(ceph_has_room(p, end, n)))
\
goto bad; \
goto bad;
\
} while (0)
} while (0)
#define ceph_decode_64_safe(p, end, v, bad) \
#define ceph_decode_64_safe(p, end, v, bad) \
...
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
...
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
*
*
* There are two possible failures:
* There are two possible failures:
* - converting the string would require accessing memory at or
* - converting the string would require accessing memory at or
* beyond the "end" pointer provided (-E
* beyond the "end" pointer provided (-E
RANGE)
* - memory could not be allocated for the result
* - memory could not be allocated for the result
(-ENOMEM)
*/
*/
static
inline
char
*
ceph_extract_encoded_string
(
void
**
p
,
void
*
end
,
static
inline
char
*
ceph_extract_encoded_string
(
void
**
p
,
void
*
end
,
size_t
*
lenp
,
gfp_t
gfp
)
size_t
*
lenp
,
gfp_t
gfp
)
...
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
...
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
*
p
+=
len
;
*
p
+=
len
;
}
}
#define ceph_encode_need(p, end, n, bad) \
#define ceph_encode_need(p, end, n, bad)
\
do { \
do {
\
if (!likely(ceph_has_room(p, end, n))) \
if (!likely(ceph_has_room(p, end, n)))
\
goto bad; \
goto bad;
\
} while (0)
} while (0)
#define ceph_encode_64_safe(p, end, v, bad) \
#define ceph_encode_64_safe(p, end, v, bad) \
...
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
...
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
#define ceph_encode_32_safe(p, end, v, bad) \
#define ceph_encode_32_safe(p, end, v, bad) \
do { \
do { \
ceph_encode_need(p, end, sizeof(u32), bad); \
ceph_encode_need(p, end, sizeof(u32), bad); \
ceph_encode_32(p, v); \
ceph_encode_32(p, v);
\
} while (0)
} while (0)
#define ceph_encode_16_safe(p, end, v, bad) \
#define ceph_encode_16_safe(p, end, v, bad) \
do { \
do { \
ceph_encode_need(p, end, sizeof(u16), bad); \
ceph_encode_need(p, end, sizeof(u16), bad); \
ceph_encode_16(p, v); \
ceph_encode_16(p, v); \
} while (0)
#define ceph_encode_8_safe(p, end, v, bad) \
do { \
ceph_encode_need(p, end, sizeof(u8), bad); \
ceph_encode_8(p, v); \
} while (0)
} while (0)
#define ceph_encode_copy_safe(p, end, pv, n, bad) \
#define ceph_encode_copy_safe(p, end, pv, n, bad) \
...
...
include/linux/ceph/libceph.h
浏览文件 @
4c7a08c8
...
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
...
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
}
}
/* ceph_common.c */
/* ceph_common.c */
extern
bool
libceph_compatible
(
void
*
data
);
extern
const
char
*
ceph_msg_type_name
(
int
type
);
extern
const
char
*
ceph_msg_type_name
(
int
type
);
extern
int
ceph_check_fsid
(
struct
ceph_client
*
client
,
struct
ceph_fsid
*
fsid
);
extern
int
ceph_check_fsid
(
struct
ceph_client
*
client
,
struct
ceph_fsid
*
fsid
);
extern
struct
kmem_cache
*
ceph_inode_cachep
;
extern
struct
kmem_cache
*
ceph_inode_cachep
;
...
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
...
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
/* pagevec.c */
/* pagevec.c */
extern
void
ceph_release_page_vector
(
struct
page
**
pages
,
int
num_pages
);
extern
void
ceph_release_page_vector
(
struct
page
**
pages
,
int
num_pages
);
extern
struct
page
**
ceph_get_direct_page_vector
(
const
char
__user
*
data
,
extern
struct
page
**
ceph_get_direct_page_vector
(
const
void
__user
*
data
,
int
num_pages
,
int
num_pages
,
bool
write_page
);
bool
write_page
);
extern
void
ceph_put_page_vector
(
struct
page
**
pages
,
int
num_pages
,
extern
void
ceph_put_page_vector
(
struct
page
**
pages
,
int
num_pages
,
...
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
...
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
extern
void
ceph_release_page_vector
(
struct
page
**
pages
,
int
num_pages
);
extern
void
ceph_release_page_vector
(
struct
page
**
pages
,
int
num_pages
);
extern
struct
page
**
ceph_alloc_page_vector
(
int
num_pages
,
gfp_t
flags
);
extern
struct
page
**
ceph_alloc_page_vector
(
int
num_pages
,
gfp_t
flags
);
extern
int
ceph_copy_user_to_page_vector
(
struct
page
**
pages
,
extern
int
ceph_copy_user_to_page_vector
(
struct
page
**
pages
,
const
char
__user
*
data
,
const
void
__user
*
data
,
loff_t
off
,
size_t
len
);
loff_t
off
,
size_t
len
);
extern
int
ceph_copy_to_page_vector
(
struct
page
**
pages
,
extern
void
ceph_copy_to_page_vector
(
struct
page
**
pages
,
const
char
*
data
,
const
void
*
data
,
loff_t
off
,
size_t
len
);
loff_t
off
,
size_t
len
);
extern
int
ceph_copy_from_page_vector
(
struct
page
**
pages
,
extern
void
ceph_copy_from_page_vector
(
struct
page
**
pages
,
char
*
data
,
void
*
data
,
loff_t
off
,
size_t
len
);
loff_t
off
,
size_t
len
);
extern
int
ceph_copy_page_vector_to_user
(
struct
page
**
pages
,
char
__user
*
data
,
extern
int
ceph_copy_page_vector_to_user
(
struct
page
**
pages
,
void
__user
*
data
,
loff_t
off
,
size_t
len
);
loff_t
off
,
size_t
len
);
extern
void
ceph_zero_page_vector_range
(
int
off
,
int
len
,
struct
page
**
pages
);
extern
void
ceph_zero_page_vector_range
(
int
off
,
int
len
,
struct
page
**
pages
);
...
...
include/linux/ceph/messenger.h
浏览文件 @
4c7a08c8
...
@@ -83,9 +83,11 @@ struct ceph_msg {
...
@@ -83,9 +83,11 @@ struct ceph_msg {
struct
list_head
list_head
;
struct
list_head
list_head
;
struct
kref
kref
;
struct
kref
kref
;
#ifdef CONFIG_BLOCK
struct
bio
*
bio
;
/* instead of pages/pagelist */
struct
bio
*
bio
;
/* instead of pages/pagelist */
struct
bio
*
bio_iter
;
/* bio iterator */
struct
bio
*
bio_iter
;
/* bio iterator */
int
bio_seg
;
/* current bio segment */
int
bio_seg
;
/* current bio segment */
#endif
/* CONFIG_BLOCK */
struct
ceph_pagelist
*
trail
;
/* the trailing part of the data */
struct
ceph_pagelist
*
trail
;
/* the trailing part of the data */
bool
front_is_vmalloc
;
bool
front_is_vmalloc
;
bool
more_to_follow
;
bool
more_to_follow
;
...
...
include/linux/ceph/osd_client.h
浏览文件 @
4c7a08c8
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
#include <linux/ceph/osdmap.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/auth.h>
#include <linux/ceph/pagelist.h>
/*
/*
* Maximum object name size
* Maximum object name size
...
@@ -22,7 +23,6 @@ struct ceph_snap_context;
...
@@ -22,7 +23,6 @@ struct ceph_snap_context;
struct
ceph_osd_request
;
struct
ceph_osd_request
;
struct
ceph_osd_client
;
struct
ceph_osd_client
;
struct
ceph_authorizer
;
struct
ceph_authorizer
;
struct
ceph_pagelist
;
/*
/*
* completion callback for async writepages
* completion callback for async writepages
...
@@ -95,7 +95,7 @@ struct ceph_osd_request {
...
@@ -95,7 +95,7 @@ struct ceph_osd_request {
struct
bio
*
r_bio
;
/* instead of pages */
struct
bio
*
r_bio
;
/* instead of pages */
#endif
#endif
struct
ceph_pagelist
*
r_trail
;
/* trailing part of the data */
struct
ceph_pagelist
r_trail
;
/* trailing part of the data */
};
};
struct
ceph_osd_event
{
struct
ceph_osd_event
{
...
@@ -107,7 +107,6 @@ struct ceph_osd_event {
...
@@ -107,7 +107,6 @@ struct ceph_osd_event {
struct
rb_node
node
;
struct
rb_node
node
;
struct
list_head
osd_node
;
struct
list_head
osd_node
;
struct
kref
kref
;
struct
kref
kref
;
struct
completion
completion
;
};
};
struct
ceph_osd_event_work
{
struct
ceph_osd_event_work
{
...
@@ -157,7 +156,7 @@ struct ceph_osd_client {
...
@@ -157,7 +156,7 @@ struct ceph_osd_client {
struct
ceph_osd_req_op
{
struct
ceph_osd_req_op
{
u16
op
;
/* CEPH_OSD_OP_* */
u16
op
;
/* CEPH_OSD_OP_* */
u32
flags
;
/* CEPH_OSD_FLAG_* */
u32
payload_len
;
union
{
union
{
struct
{
struct
{
u64
offset
,
length
;
u64
offset
,
length
;
...
@@ -166,23 +165,24 @@ struct ceph_osd_req_op {
...
@@ -166,23 +165,24 @@ struct ceph_osd_req_op {
}
extent
;
}
extent
;
struct
{
struct
{
const
char
*
name
;
const
char
*
name
;
u32
name_len
;
const
char
*
val
;
const
char
*
val
;
u32
name_len
;
u32
value_len
;
u32
value_len
;
__u8
cmp_op
;
/* CEPH_OSD_CMPXATTR_OP_* */
__u8
cmp_op
;
/* CEPH_OSD_CMPXATTR_OP_* */
__u8
cmp_mode
;
/* CEPH_OSD_CMPXATTR_MODE_* */
__u8
cmp_mode
;
/* CEPH_OSD_CMPXATTR_MODE_* */
}
xattr
;
}
xattr
;
struct
{
struct
{
const
char
*
class_name
;
const
char
*
class_name
;
__u8
class_len
;
const
char
*
method_name
;
const
char
*
method_name
;
__u8
method_len
;
__u8
argc
;
const
char
*
indata
;
const
char
*
indata
;
u32
indata_len
;
u32
indata_len
;
__u8
class_len
;
__u8
method_len
;
__u8
argc
;
}
cls
;
}
cls
;
struct
{
struct
{
u64
cookie
,
count
;
u64
cookie
;
u64
count
;
}
pgls
;
}
pgls
;
struct
{
struct
{
u64
snapid
;
u64
snapid
;
...
@@ -190,12 +190,11 @@ struct ceph_osd_req_op {
...
@@ -190,12 +190,11 @@ struct ceph_osd_req_op {
struct
{
struct
{
u64
cookie
;
u64
cookie
;
u64
ver
;
u64
ver
;
__u8
flag
;
u32
prot_ver
;
u32
prot_ver
;
u32
timeout
;
u32
timeout
;
__u8
flag
;
}
watch
;
}
watch
;
};
};
u32
payload_len
;
};
};
extern
int
ceph_osdc_init
(
struct
ceph_osd_client
*
osdc
,
extern
int
ceph_osdc_init
(
struct
ceph_osd_client
*
osdc
,
...
@@ -207,29 +206,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
...
@@ -207,29 +206,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
extern
void
ceph_osdc_handle_map
(
struct
ceph_osd_client
*
osdc
,
extern
void
ceph_osdc_handle_map
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_msg
*
msg
);
struct
ceph_msg
*
msg
);
extern
int
ceph_calc_raw_layout
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_file_layout
*
layout
,
u64
snapid
,
u64
off
,
u64
*
plen
,
u64
*
bno
,
struct
ceph_osd_request
*
req
,
struct
ceph_osd_req_op
*
op
);
extern
struct
ceph_osd_request
*
ceph_osdc_alloc_request
(
struct
ceph_osd_client
*
osdc
,
extern
struct
ceph_osd_request
*
ceph_osdc_alloc_request
(
struct
ceph_osd_client
*
osdc
,
int
flags
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_osd_req_op
*
ops
,
unsigned
int
num_op
,
bool
use_mempool
,
bool
use_mempool
,
gfp_t
gfp_flags
,
gfp_t
gfp_flags
);
struct
page
**
pages
,
struct
bio
*
bio
);
extern
void
ceph_osdc_build_request
(
struct
ceph_osd_request
*
req
,
extern
void
ceph_osdc_build_request
(
struct
ceph_osd_request
*
req
,
u64
off
,
u64
*
plen
,
u64
off
,
u64
len
,
unsigned
int
num_op
,
struct
ceph_osd_req_op
*
src_ops
,
struct
ceph_osd_req_op
*
src_ops
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_snap_context
*
snapc
,
struct
timespec
*
mtime
,
u64
snap_id
,
const
char
*
oid
,
struct
timespec
*
mtime
);
int
oid_len
);
extern
struct
ceph_osd_request
*
ceph_osdc_new_request
(
struct
ceph_osd_client
*
,
extern
struct
ceph_osd_request
*
ceph_osdc_new_request
(
struct
ceph_osd_client
*
,
struct
ceph_file_layout
*
layout
,
struct
ceph_file_layout
*
layout
,
...
@@ -239,8 +228,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
...
@@ -239,8 +228,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
int
do_sync
,
u32
truncate_seq
,
int
do_sync
,
u32
truncate_seq
,
u64
truncate_size
,
u64
truncate_size
,
struct
timespec
*
mtime
,
struct
timespec
*
mtime
,
bool
use_mempool
,
int
num_reply
,
bool
use_mempool
,
int
page_align
);
int
page_align
);
extern
void
ceph_osdc_set_request_linger
(
struct
ceph_osd_client
*
osdc
,
extern
void
ceph_osdc_set_request_linger
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd_request
*
req
);
struct
ceph_osd_request
*
req
);
...
@@ -279,17 +267,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
...
@@ -279,17 +267,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
u64
off
,
u64
len
,
u64
off
,
u64
len
,
u32
truncate_seq
,
u64
truncate_size
,
u32
truncate_seq
,
u64
truncate_size
,
struct
timespec
*
mtime
,
struct
timespec
*
mtime
,
struct
page
**
pages
,
int
nr_pages
,
struct
page
**
pages
,
int
nr_pages
);
int
flags
,
int
do_sync
,
bool
nofail
);
/* watch/notify events */
/* watch/notify events */
extern
int
ceph_osdc_create_event
(
struct
ceph_osd_client
*
osdc
,
extern
int
ceph_osdc_create_event
(
struct
ceph_osd_client
*
osdc
,
void
(
*
event_cb
)(
u64
,
u64
,
u8
,
void
*
),
void
(
*
event_cb
)(
u64
,
u64
,
u8
,
void
*
),
int
one_shot
,
void
*
data
,
void
*
data
,
struct
ceph_osd_event
**
pevent
);
struct
ceph_osd_event
**
pevent
);
extern
void
ceph_osdc_cancel_event
(
struct
ceph_osd_event
*
event
);
extern
void
ceph_osdc_cancel_event
(
struct
ceph_osd_event
*
event
);
extern
int
ceph_osdc_wait_event
(
struct
ceph_osd_event
*
event
,
unsigned
long
timeout
);
extern
void
ceph_osdc_put_event
(
struct
ceph_osd_event
*
event
);
extern
void
ceph_osdc_put_event
(
struct
ceph_osd_event
*
event
);
#endif
#endif
include/linux/ceph/osdmap.h
浏览文件 @
4c7a08c8
...
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
...
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
/* calculate mapping of a file extent to an object */
/* calculate mapping of a file extent to an object */
extern
int
ceph_calc_file_object_mapping
(
struct
ceph_file_layout
*
layout
,
extern
int
ceph_calc_file_object_mapping
(
struct
ceph_file_layout
*
layout
,
u64
off
,
u64
*
p
len
,
u64
off
,
u64
len
,
u64
*
bno
,
u64
*
oxoff
,
u64
*
oxlen
);
u64
*
bno
,
u64
*
oxoff
,
u64
*
oxlen
);
/* calculate mapping of object to a placement group */
/* calculate mapping of object to a placement group */
...
...
include/linux/ceph/rados.h
浏览文件 @
4c7a08c8
...
@@ -145,8 +145,12 @@ struct ceph_eversion {
...
@@ -145,8 +145,12 @@ struct ceph_eversion {
*/
*/
/* status bits */
/* status bits */
#define CEPH_OSD_EXISTS 1
#define CEPH_OSD_EXISTS (1<<0)
#define CEPH_OSD_UP 2
#define CEPH_OSD_UP (1<<1)
#define CEPH_OSD_AUTOOUT (1<<2)
/* osd was automatically marked out */
#define CEPH_OSD_NEW (1<<3)
/* osd is new, never marked in */
extern
const
char
*
ceph_osd_state_name
(
int
s
);
/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
#define CEPH_OSD_IN 0x10000
#define CEPH_OSD_IN 0x10000
...
@@ -161,9 +165,25 @@ struct ceph_eversion {
...
@@ -161,9 +165,25 @@ struct ceph_eversion {
#define CEPH_OSDMAP_PAUSERD (1<<2)
/* pause all reads */
#define CEPH_OSDMAP_PAUSERD (1<<2)
/* pause all reads */
#define CEPH_OSDMAP_PAUSEWR (1<<3)
/* pause all writes */
#define CEPH_OSDMAP_PAUSEWR (1<<3)
/* pause all writes */
#define CEPH_OSDMAP_PAUSEREC (1<<4)
/* pause recovery */
#define CEPH_OSDMAP_PAUSEREC (1<<4)
/* pause recovery */
#define CEPH_OSDMAP_NOUP (1<<5)
/* block osd boot */
#define CEPH_OSDMAP_NODOWN (1<<6)
/* block osd mark-down/failure */
#define CEPH_OSDMAP_NOOUT (1<<7)
/* block osd auto mark-out */
#define CEPH_OSDMAP_NOIN (1<<8)
/* block osd auto mark-in */
#define CEPH_OSDMAP_NOBACKFILL (1<<9)
/* block osd backfill */
#define CEPH_OSDMAP_NORECOVER (1<<10)
/* block osd recovery and backfill */
/*
* The error code to return when an OSD can't handle a write
* because it is too large.
*/
#define OSD_WRITETOOBIG EMSGSIZE
/*
/*
* osd ops
* osd ops
*
* WARNING: do not use these op codes directly. Use the helpers
* defined below instead. In certain cases, op code behavior was
* redefined, resulting in special-cases in the helpers.
*/
*/
#define CEPH_OSD_OP_MODE 0xf000
#define CEPH_OSD_OP_MODE 0xf000
#define CEPH_OSD_OP_MODE_RD 0x1000
#define CEPH_OSD_OP_MODE_RD 0x1000
...
@@ -177,6 +197,7 @@ struct ceph_eversion {
...
@@ -177,6 +197,7 @@ struct ceph_eversion {
#define CEPH_OSD_OP_TYPE_ATTR 0x0300
#define CEPH_OSD_OP_TYPE_ATTR 0x0300
#define CEPH_OSD_OP_TYPE_EXEC 0x0400
#define CEPH_OSD_OP_TYPE_EXEC 0x0400
#define CEPH_OSD_OP_TYPE_PG 0x0500
#define CEPH_OSD_OP_TYPE_PG 0x0500
#define CEPH_OSD_OP_TYPE_MULTI 0x0600
/* multiobject */
enum
{
enum
{
/** data **/
/** data **/
...
@@ -217,6 +238,23 @@ enum {
...
@@ -217,6 +238,23 @@ enum {
CEPH_OSD_OP_WATCH
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
15
,
CEPH_OSD_OP_WATCH
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
15
,
/* omap */
CEPH_OSD_OP_OMAPGETKEYS
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_DATA
|
17
,
CEPH_OSD_OP_OMAPGETVALS
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_DATA
|
18
,
CEPH_OSD_OP_OMAPGETHEADER
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_DATA
|
19
,
CEPH_OSD_OP_OMAPGETVALSBYKEYS
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_DATA
|
20
,
CEPH_OSD_OP_OMAPSETVALS
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
21
,
CEPH_OSD_OP_OMAPSETHEADER
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
22
,
CEPH_OSD_OP_OMAPCLEAR
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
23
,
CEPH_OSD_OP_OMAPRMKEYS
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_DATA
|
24
,
CEPH_OSD_OP_OMAP_CMP
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_DATA
|
25
,
/** multi **/
CEPH_OSD_OP_CLONERANGE
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_MULTI
|
1
,
CEPH_OSD_OP_ASSERT_SRC_VERSION
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_MULTI
|
2
,
CEPH_OSD_OP_SRC_CMPXATTR
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_MULTI
|
3
,
/** attrs **/
/** attrs **/
/* read */
/* read */
CEPH_OSD_OP_GETXATTR
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_ATTR
|
1
,
CEPH_OSD_OP_GETXATTR
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_ATTR
|
1
,
...
@@ -238,6 +276,7 @@ enum {
...
@@ -238,6 +276,7 @@ enum {
CEPH_OSD_OP_SCRUB_RESERVE
=
CEPH_OSD_OP_MODE_SUB
|
6
,
CEPH_OSD_OP_SCRUB_RESERVE
=
CEPH_OSD_OP_MODE_SUB
|
6
,
CEPH_OSD_OP_SCRUB_UNRESERVE
=
CEPH_OSD_OP_MODE_SUB
|
7
,
CEPH_OSD_OP_SCRUB_UNRESERVE
=
CEPH_OSD_OP_MODE_SUB
|
7
,
CEPH_OSD_OP_SCRUB_STOP
=
CEPH_OSD_OP_MODE_SUB
|
8
,
CEPH_OSD_OP_SCRUB_STOP
=
CEPH_OSD_OP_MODE_SUB
|
8
,
CEPH_OSD_OP_SCRUB_MAP
=
CEPH_OSD_OP_MODE_SUB
|
9
,
/** lock **/
/** lock **/
CEPH_OSD_OP_WRLOCK
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_LOCK
|
1
,
CEPH_OSD_OP_WRLOCK
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_LOCK
|
1
,
...
@@ -248,10 +287,12 @@ enum {
...
@@ -248,10 +287,12 @@ enum {
CEPH_OSD_OP_DNLOCK
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_LOCK
|
6
,
CEPH_OSD_OP_DNLOCK
=
CEPH_OSD_OP_MODE_WR
|
CEPH_OSD_OP_TYPE_LOCK
|
6
,
/** exec **/
/** exec **/
/* note: the RD bit here is wrong; see special-case below in helper */
CEPH_OSD_OP_CALL
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_EXEC
|
1
,
CEPH_OSD_OP_CALL
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_EXEC
|
1
,
/** pg **/
/** pg **/
CEPH_OSD_OP_PGLS
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_PG
|
1
,
CEPH_OSD_OP_PGLS
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_PG
|
1
,
CEPH_OSD_OP_PGLS_FILTER
=
CEPH_OSD_OP_MODE_RD
|
CEPH_OSD_OP_TYPE_PG
|
2
,
};
};
static
inline
int
ceph_osd_op_type_lock
(
int
op
)
static
inline
int
ceph_osd_op_type_lock
(
int
op
)
...
@@ -274,6 +315,10 @@ static inline int ceph_osd_op_type_pg(int op)
...
@@ -274,6 +315,10 @@ static inline int ceph_osd_op_type_pg(int op)
{
{
return
(
op
&
CEPH_OSD_OP_TYPE
)
==
CEPH_OSD_OP_TYPE_PG
;
return
(
op
&
CEPH_OSD_OP_TYPE
)
==
CEPH_OSD_OP_TYPE_PG
;
}
}
static
inline
int
ceph_osd_op_type_multi
(
int
op
)
{
return
(
op
&
CEPH_OSD_OP_TYPE
)
==
CEPH_OSD_OP_TYPE_MULTI
;
}
static
inline
int
ceph_osd_op_mode_subop
(
int
op
)
static
inline
int
ceph_osd_op_mode_subop
(
int
op
)
{
{
...
@@ -281,11 +326,12 @@ static inline int ceph_osd_op_mode_subop(int op)
...
@@ -281,11 +326,12 @@ static inline int ceph_osd_op_mode_subop(int op)
}
}
static
inline
int
ceph_osd_op_mode_read
(
int
op
)
static
inline
int
ceph_osd_op_mode_read
(
int
op
)
{
{
return
(
op
&
CEPH_OSD_OP_MODE
)
==
CEPH_OSD_OP_MODE_RD
;
return
(
op
&
CEPH_OSD_OP_MODE_RD
)
&&
op
!=
CEPH_OSD_OP_CALL
;
}
}
static
inline
int
ceph_osd_op_mode_modify
(
int
op
)
static
inline
int
ceph_osd_op_mode_modify
(
int
op
)
{
{
return
(
op
&
CEPH_OSD_OP_MODE
)
==
CEPH_OSD_OP_MODE_WR
;
return
op
&
CEPH_OSD_OP_MODE_WR
;
}
}
/*
/*
...
@@ -294,34 +340,38 @@ static inline int ceph_osd_op_mode_modify(int op)
...
@@ -294,34 +340,38 @@ static inline int ceph_osd_op_mode_modify(int op)
*/
*/
#define CEPH_OSD_TMAP_HDR 'h'
#define CEPH_OSD_TMAP_HDR 'h'
#define CEPH_OSD_TMAP_SET 's'
#define CEPH_OSD_TMAP_SET 's'
#define CEPH_OSD_TMAP_CREATE 'c'
/* create key */
#define CEPH_OSD_TMAP_RM 'r'
#define CEPH_OSD_TMAP_RM 'r'
#define CEPH_OSD_TMAP_RMSLOPPY 'R'
extern
const
char
*
ceph_osd_op_name
(
int
op
);
extern
const
char
*
ceph_osd_op_name
(
int
op
);
/*
/*
* osd op flags
* osd op flags
*
*
* An op may be READ, WRITE, or READ|WRITE.
* An op may be READ, WRITE, or READ|WRITE.
*/
*/
enum
{
enum
{
CEPH_OSD_FLAG_ACK
=
1
,
/* want (or is) "ack" ack */
CEPH_OSD_FLAG_ACK
=
0x0001
,
/* want (or is) "ack" ack */
CEPH_OSD_FLAG_ONNVRAM
=
2
,
/* want (or is) "onnvram" ack */
CEPH_OSD_FLAG_ONNVRAM
=
0x0002
,
/* want (or is) "onnvram" ack */
CEPH_OSD_FLAG_ONDISK
=
4
,
/* want (or is) "ondisk" ack */
CEPH_OSD_FLAG_ONDISK
=
0x0004
,
/* want (or is) "ondisk" ack */
CEPH_OSD_FLAG_RETRY
=
8
,
/* resend attempt */
CEPH_OSD_FLAG_RETRY
=
0x0008
,
/* resend attempt */
CEPH_OSD_FLAG_READ
=
16
,
/* op may read */
CEPH_OSD_FLAG_READ
=
0x0010
,
/* op may read */
CEPH_OSD_FLAG_WRITE
=
32
,
/* op may write */
CEPH_OSD_FLAG_WRITE
=
0x0020
,
/* op may write */
CEPH_OSD_FLAG_ORDERSNAP
=
64
,
/* EOLDSNAP if snapc is out of order */
CEPH_OSD_FLAG_ORDERSNAP
=
0x0040
,
/* EOLDSNAP if snapc is out of order */
CEPH_OSD_FLAG_PEERSTAT
=
128
,
/* msg includes osd_peer_stat */
CEPH_OSD_FLAG_PEERSTAT_OLD
=
0x0080
,
/* DEPRECATED msg includes osd_peer_stat */
CEPH_OSD_FLAG_BALANCE_READS
=
256
,
CEPH_OSD_FLAG_BALANCE_READS
=
0x0100
,
CEPH_OSD_FLAG_PARALLELEXEC
=
512
,
/* execute op in parallel */
CEPH_OSD_FLAG_PARALLELEXEC
=
0x0200
,
/* execute op in parallel */
CEPH_OSD_FLAG_PGOP
=
1024
,
/* pg op, no object */
CEPH_OSD_FLAG_PGOP
=
0x0400
,
/* pg op, no object */
CEPH_OSD_FLAG_EXEC
=
2048
,
/* op may exec */
CEPH_OSD_FLAG_EXEC
=
0x0800
,
/* op may exec */
CEPH_OSD_FLAG_EXEC_PUBLIC
=
4096
,
/* op may exec (public) */
CEPH_OSD_FLAG_EXEC_PUBLIC
=
0x1000
,
/* DEPRECATED op may exec (public) */
CEPH_OSD_FLAG_LOCALIZE_READS
=
0x2000
,
/* read from nearby replica, if any */
CEPH_OSD_FLAG_RWORDERED
=
0x4000
,
/* order wrt concurrent reads */
};
};
enum
{
enum
{
CEPH_OSD_OP_FLAG_EXCL
=
1
,
/* EXCL object create */
CEPH_OSD_OP_FLAG_EXCL
=
1
,
/* EXCL object create */
CEPH_OSD_OP_FLAG_FAILOK
=
2
,
/* continue despite failure */
};
};
#define EOLDSNAPC ERESTART
/* ORDERSNAP flag set; writer has old snapc*/
#define EOLDSNAPC ERESTART
/* ORDERSNAP flag set; writer has old snapc*/
...
@@ -381,7 +431,11 @@ struct ceph_osd_op {
...
@@ -381,7 +431,11 @@ struct ceph_osd_op {
__le64
ver
;
__le64
ver
;
__u8
flag
;
/* 0 = unwatch, 1 = watch */
__u8
flag
;
/* 0 = unwatch, 1 = watch */
}
__attribute__
((
packed
))
watch
;
}
__attribute__
((
packed
))
watch
;
};
struct
{
__le64
offset
,
length
;
__le64
src_offset
;
}
__attribute__
((
packed
))
clonerange
;
};
__le32
payload_len
;
__le32
payload_len
;
}
__attribute__
((
packed
));
}
__attribute__
((
packed
));
...
@@ -424,5 +478,4 @@ struct ceph_osd_reply_head {
...
@@ -424,5 +478,4 @@ struct ceph_osd_reply_head {
}
__attribute__
((
packed
));
}
__attribute__
((
packed
));
#endif
#endif
include/linux/crush/crush.h
浏览文件 @
4c7a08c8
...
@@ -162,6 +162,8 @@ struct crush_map {
...
@@ -162,6 +162,8 @@ struct crush_map {
__u32
choose_local_fallback_tries
;
__u32
choose_local_fallback_tries
;
/* choose attempts before giving up */
/* choose attempts before giving up */
__u32
choose_total_tries
;
__u32
choose_total_tries
;
/* attempt chooseleaf inner descent once; on failure retry outer descent */
__u32
chooseleaf_descend_once
;
};
};
...
...
net/ceph/ceph_common.c
浏览文件 @
4c7a08c8
...
@@ -26,6 +26,22 @@
...
@@ -26,6 +26,22 @@
#include "crypto.h"
#include "crypto.h"
/*
* Module compatibility interface. For now it doesn't do anything,
* but its existence signals a certain level of functionality.
*
* The data buffer is used to pass information both to and from
* libceph. The return value indicates whether libceph determines
* it is compatible with the caller (from another kernel module),
* given the provided data.
*
* The data pointer can be null.
*/
bool
libceph_compatible
(
void
*
data
)
{
return
true
;
}
EXPORT_SYMBOL
(
libceph_compatible
);
/*
/*
* find filename portion of a path (/foo/bar/baz -> baz)
* find filename portion of a path (/foo/bar/baz -> baz)
...
...
net/ceph/ceph_strings.c
浏览文件 @
4c7a08c8
...
@@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op)
...
@@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op)
switch
(
op
)
{
switch
(
op
)
{
case
CEPH_OSD_OP_READ
:
return
"read"
;
case
CEPH_OSD_OP_READ
:
return
"read"
;
case
CEPH_OSD_OP_STAT
:
return
"stat"
;
case
CEPH_OSD_OP_STAT
:
return
"stat"
;
case
CEPH_OSD_OP_MAPEXT
:
return
"mapext"
;
case
CEPH_OSD_OP_SPARSE_READ
:
return
"sparse-read"
;
case
CEPH_OSD_OP_NOTIFY
:
return
"notify"
;
case
CEPH_OSD_OP_NOTIFY_ACK
:
return
"notify-ack"
;
case
CEPH_OSD_OP_ASSERT_VER
:
return
"assert-version"
;
case
CEPH_OSD_OP_MASKTRUNC
:
return
"masktrunc"
;
case
CEPH_OSD_OP_MASKTRUNC
:
return
"masktrunc"
;
case
CEPH_OSD_OP_CREATE
:
return
"create"
;
case
CEPH_OSD_OP_WRITE
:
return
"write"
;
case
CEPH_OSD_OP_WRITE
:
return
"write"
;
case
CEPH_OSD_OP_DELETE
:
return
"delete"
;
case
CEPH_OSD_OP_DELETE
:
return
"delete"
;
case
CEPH_OSD_OP_TRUNCATE
:
return
"truncate"
;
case
CEPH_OSD_OP_TRUNCATE
:
return
"truncate"
;
...
@@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op)
...
@@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op)
case
CEPH_OSD_OP_TMAPUP
:
return
"tmapup"
;
case
CEPH_OSD_OP_TMAPUP
:
return
"tmapup"
;
case
CEPH_OSD_OP_TMAPGET
:
return
"tmapget"
;
case
CEPH_OSD_OP_TMAPGET
:
return
"tmapget"
;
case
CEPH_OSD_OP_TMAPPUT
:
return
"tmapput"
;
case
CEPH_OSD_OP_TMAPPUT
:
return
"tmapput"
;
case
CEPH_OSD_OP_WATCH
:
return
"watch"
;
case
CEPH_OSD_OP_CLONERANGE
:
return
"clonerange"
;
case
CEPH_OSD_OP_ASSERT_SRC_VERSION
:
return
"assert-src-version"
;
case
CEPH_OSD_OP_SRC_CMPXATTR
:
return
"src-cmpxattr"
;
case
CEPH_OSD_OP_GETXATTR
:
return
"getxattr"
;
case
CEPH_OSD_OP_GETXATTR
:
return
"getxattr"
;
case
CEPH_OSD_OP_GETXATTRS
:
return
"getxattrs"
;
case
CEPH_OSD_OP_GETXATTRS
:
return
"getxattrs"
;
...
@@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op)
...
@@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op)
case
CEPH_OSD_OP_BALANCEREADS
:
return
"balance-reads"
;
case
CEPH_OSD_OP_BALANCEREADS
:
return
"balance-reads"
;
case
CEPH_OSD_OP_UNBALANCEREADS
:
return
"unbalance-reads"
;
case
CEPH_OSD_OP_UNBALANCEREADS
:
return
"unbalance-reads"
;
case
CEPH_OSD_OP_SCRUB
:
return
"scrub"
;
case
CEPH_OSD_OP_SCRUB
:
return
"scrub"
;
case
CEPH_OSD_OP_SCRUB_RESERVE
:
return
"scrub-reserve"
;
case
CEPH_OSD_OP_SCRUB_UNRESERVE
:
return
"scrub-unreserve"
;
case
CEPH_OSD_OP_SCRUB_STOP
:
return
"scrub-stop"
;
case
CEPH_OSD_OP_SCRUB_MAP
:
return
"scrub-map"
;
case
CEPH_OSD_OP_WRLOCK
:
return
"wrlock"
;
case
CEPH_OSD_OP_WRLOCK
:
return
"wrlock"
;
case
CEPH_OSD_OP_WRUNLOCK
:
return
"wrunlock"
;
case
CEPH_OSD_OP_WRUNLOCK
:
return
"wrunlock"
;
...
@@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op)
...
@@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op)
case
CEPH_OSD_OP_CALL
:
return
"call"
;
case
CEPH_OSD_OP_CALL
:
return
"call"
;
case
CEPH_OSD_OP_PGLS
:
return
"pgls"
;
case
CEPH_OSD_OP_PGLS
:
return
"pgls"
;
case
CEPH_OSD_OP_PGLS_FILTER
:
return
"pgls-filter"
;
case
CEPH_OSD_OP_OMAPGETKEYS
:
return
"omap-get-keys"
;
case
CEPH_OSD_OP_OMAPGETVALS
:
return
"omap-get-vals"
;
case
CEPH_OSD_OP_OMAPGETHEADER
:
return
"omap-get-header"
;
case
CEPH_OSD_OP_OMAPGETVALSBYKEYS
:
return
"omap-get-vals-by-keys"
;
case
CEPH_OSD_OP_OMAPSETVALS
:
return
"omap-set-vals"
;
case
CEPH_OSD_OP_OMAPSETHEADER
:
return
"omap-set-header"
;
case
CEPH_OSD_OP_OMAPCLEAR
:
return
"omap-clear"
;
case
CEPH_OSD_OP_OMAPRMKEYS
:
return
"omap-rm-keys"
;
}
}
return
"???"
;
return
"???"
;
}
}
const
char
*
ceph_osd_state_name
(
int
s
)
{
switch
(
s
)
{
case
CEPH_OSD_EXISTS
:
return
"exists"
;
case
CEPH_OSD_UP
:
return
"up"
;
case
CEPH_OSD_AUTOOUT
:
return
"autoout"
;
case
CEPH_OSD_NEW
:
return
"new"
;
default:
return
"???"
;
}
}
const
char
*
ceph_pool_op_name
(
int
op
)
const
char
*
ceph_pool_op_name
(
int
op
)
{
{
...
...
net/ceph/crush/mapper.c
浏览文件 @
4c7a08c8
...
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
...
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
* @outpos: our position in that vector
* @outpos: our position in that vector
* @firstn: true if choosing "first n" items, false if choosing "indep"
* @firstn: true if choosing "first n" items, false if choosing "indep"
* @recurse_to_leaf: true if we want one device under each item of given type
* @recurse_to_leaf: true if we want one device under each item of given type
* @descend_once: true if we should only try one descent before giving up
* @out2: second output vector for leaf items (if @recurse_to_leaf)
* @out2: second output vector for leaf items (if @recurse_to_leaf)
*/
*/
static
int
crush_choose
(
const
struct
crush_map
*
map
,
static
int
crush_choose
(
const
struct
crush_map
*
map
,
...
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
...
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
int
x
,
int
numrep
,
int
type
,
int
x
,
int
numrep
,
int
type
,
int
*
out
,
int
outpos
,
int
*
out
,
int
outpos
,
int
firstn
,
int
recurse_to_leaf
,
int
firstn
,
int
recurse_to_leaf
,
int
*
out2
)
int
descend_once
,
int
*
out2
)
{
{
int
rep
;
int
rep
;
unsigned
int
ftotal
,
flocal
;
unsigned
int
ftotal
,
flocal
;
...
@@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
...
@@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
}
}
reject
=
0
;
reject
=
0
;
if
(
recurse_to_leaf
)
{
if
(
!
collide
&&
recurse_to_leaf
)
{
if
(
item
<
0
)
{
if
(
item
<
0
)
{
if
(
crush_choose
(
map
,
if
(
crush_choose
(
map
,
map
->
buckets
[
-
1
-
item
],
map
->
buckets
[
-
1
-
item
],
...
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
...
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
x
,
outpos
+
1
,
0
,
x
,
outpos
+
1
,
0
,
out2
,
outpos
,
out2
,
outpos
,
firstn
,
0
,
firstn
,
0
,
map
->
chooseleaf_descend_once
,
NULL
)
<=
outpos
)
NULL
)
<=
outpos
)
/* didn't get leaf */
/* didn't get leaf */
reject
=
1
;
reject
=
1
;
...
@@ -422,7 +424,10 @@ static int crush_choose(const struct crush_map *map,
...
@@ -422,7 +424,10 @@ static int crush_choose(const struct crush_map *map,
ftotal
++
;
ftotal
++
;
flocal
++
;
flocal
++
;
if
(
collide
&&
flocal
<=
map
->
choose_local_tries
)
if
(
reject
&&
descend_once
)
/* let outer call try again */
skip_rep
=
1
;
else
if
(
collide
&&
flocal
<=
map
->
choose_local_tries
)
/* retry locally a few times */
/* retry locally a few times */
retry_bucket
=
1
;
retry_bucket
=
1
;
else
if
(
map
->
choose_local_fallback_tries
>
0
&&
else
if
(
map
->
choose_local_fallback_tries
>
0
&&
...
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
...
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
int
i
,
j
;
int
i
,
j
;
int
numrep
;
int
numrep
;
int
firstn
;
int
firstn
;
const
int
descend_once
=
0
;
if
((
__u32
)
ruleno
>=
map
->
max_rules
)
{
if
((
__u32
)
ruleno
>=
map
->
max_rules
)
{
dprintk
(
" bad ruleno %d
\n
"
,
ruleno
);
dprintk
(
" bad ruleno %d
\n
"
,
ruleno
);
...
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
...
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
curstep
->
arg2
,
curstep
->
arg2
,
o
+
osize
,
j
,
o
+
osize
,
j
,
firstn
,
firstn
,
recurse_to_leaf
,
c
+
osize
);
recurse_to_leaf
,
descend_once
,
c
+
osize
);
}
}
if
(
recurse_to_leaf
)
if
(
recurse_to_leaf
)
...
...
net/ceph/messenger.c
浏览文件 @
4c7a08c8
...
@@ -9,8 +9,9 @@
...
@@ -9,8 +9,9 @@
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/socket.h>
#include <linux/string.h>
#include <linux/string.h>
#ifdef CONFIG_BLOCK
#include <linux/bio.h>
#include <linux/bio.h>
#
include <linux/blkdev.h>
#
endif
/* CONFIG_BLOCK */
#include <linux/dns_resolver.h>
#include <linux/dns_resolver.h>
#include <net/tcp.h>
#include <net/tcp.h>
...
@@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
...
@@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
m
->
page_alignment
=
0
;
m
->
page_alignment
=
0
;
m
->
pages
=
NULL
;
m
->
pages
=
NULL
;
m
->
pagelist
=
NULL
;
m
->
pagelist
=
NULL
;
#ifdef CONFIG_BLOCK
m
->
bio
=
NULL
;
m
->
bio
=
NULL
;
m
->
bio_iter
=
NULL
;
m
->
bio_iter
=
NULL
;
m
->
bio_seg
=
0
;
m
->
bio_seg
=
0
;
#endif
/* CONFIG_BLOCK */
m
->
trail
=
NULL
;
m
->
trail
=
NULL
;
/* front */
/* front */
...
...
net/ceph/osd_client.c
浏览文件 @
4c7a08c8
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
static
const
struct
ceph_connection_operations
osd_con_ops
;
static
const
struct
ceph_connection_operations
osd_con_ops
;
static
void
send_queued
(
struct
ceph_osd_client
*
osdc
);
static
void
__
send_queued
(
struct
ceph_osd_client
*
osdc
);
static
int
__reset_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
osd
);
static
int
__reset_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
osd
);
static
void
__register_request
(
struct
ceph_osd_client
*
osdc
,
static
void
__register_request
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd_request
*
req
);
struct
ceph_osd_request
*
req
);
...
@@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
...
@@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
static
void
__send_request
(
struct
ceph_osd_client
*
osdc
,
static
void
__send_request
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd_request
*
req
);
struct
ceph_osd_request
*
req
);
static
int
op_needs_trail
(
int
op
)
{
switch
(
op
)
{
case
CEPH_OSD_OP_GETXATTR
:
case
CEPH_OSD_OP_SETXATTR
:
case
CEPH_OSD_OP_CMPXATTR
:
case
CEPH_OSD_OP_CALL
:
case
CEPH_OSD_OP_NOTIFY
:
return
1
;
default:
return
0
;
}
}
static
int
op_has_extent
(
int
op
)
static
int
op_has_extent
(
int
op
)
{
{
return
(
op
==
CEPH_OSD_OP_READ
||
return
(
op
==
CEPH_OSD_OP_READ
||
op
==
CEPH_OSD_OP_WRITE
);
op
==
CEPH_OSD_OP_WRITE
);
}
}
int
ceph_calc_raw_layout
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_file_layout
*
layout
,
u64
snapid
,
u64
off
,
u64
*
plen
,
u64
*
bno
,
struct
ceph_osd_request
*
req
,
struct
ceph_osd_req_op
*
op
)
{
struct
ceph_osd_request_head
*
reqhead
=
req
->
r_request
->
front
.
iov_base
;
u64
orig_len
=
*
plen
;
u64
objoff
,
objlen
;
/* extent in object */
int
r
;
reqhead
->
snapid
=
cpu_to_le64
(
snapid
);
/* object extent? */
r
=
ceph_calc_file_object_mapping
(
layout
,
off
,
plen
,
bno
,
&
objoff
,
&
objlen
);
if
(
r
<
0
)
return
r
;
if
(
*
plen
<
orig_len
)
dout
(
" skipping last %llu, final file extent %llu~%llu
\n
"
,
orig_len
-
*
plen
,
off
,
*
plen
);
if
(
op_has_extent
(
op
->
op
))
{
op
->
extent
.
offset
=
objoff
;
op
->
extent
.
length
=
objlen
;
}
req
->
r_num_pages
=
calc_pages_for
(
off
,
*
plen
);
req
->
r_page_alignment
=
off
&
~
PAGE_MASK
;
if
(
op
->
op
==
CEPH_OSD_OP_WRITE
)
op
->
payload_len
=
*
plen
;
dout
(
"calc_layout bno=%llx %llu~%llu (%d pages)
\n
"
,
*
bno
,
objoff
,
objlen
,
req
->
r_num_pages
);
return
0
;
}
EXPORT_SYMBOL
(
ceph_calc_raw_layout
);
/*
/*
* Implement client access to distributed object storage cluster.
* Implement client access to distributed object storage cluster.
*
*
...
@@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
...
@@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
*
*
* fill osd op in request message.
* fill osd op in request message.
*/
*/
static
int
calc_layout
(
struct
ceph_osd_client
*
osdc
,
static
int
calc_layout
(
struct
ceph_vino
vino
,
struct
ceph_vino
vino
,
struct
ceph_file_layout
*
layout
,
struct
ceph_file_layout
*
layout
,
u64
off
,
u64
*
plen
,
u64
off
,
u64
*
plen
,
struct
ceph_osd_request
*
req
,
struct
ceph_osd_request
*
req
,
struct
ceph_osd_req_op
*
op
)
struct
ceph_osd_req_op
*
op
)
{
{
u64
bno
;
u64
orig_len
=
*
plen
;
u64
bno
=
0
;
u64
objoff
=
0
;
u64
objlen
=
0
;
int
r
;
int
r
;
r
=
ceph_calc_raw_layout
(
osdc
,
layout
,
vino
.
snap
,
off
,
/* object extent? */
plen
,
&
bno
,
req
,
op
);
r
=
ceph_calc_file_object_mapping
(
layout
,
off
,
orig_len
,
&
bno
,
&
objoff
,
&
objlen
);
if
(
r
<
0
)
if
(
r
<
0
)
return
r
;
return
r
;
if
(
objlen
<
orig_len
)
{
*
plen
=
objlen
;
dout
(
" skipping last %llu, final file extent %llu~%llu
\n
"
,
orig_len
-
*
plen
,
off
,
*
plen
);
}
if
(
op_has_extent
(
op
->
op
))
{
u32
osize
=
le32_to_cpu
(
layout
->
fl_object_size
);
op
->
extent
.
offset
=
objoff
;
op
->
extent
.
length
=
objlen
;
if
(
op
->
extent
.
truncate_size
<=
off
-
objoff
)
{
op
->
extent
.
truncate_size
=
0
;
}
else
{
op
->
extent
.
truncate_size
-=
off
-
objoff
;
if
(
op
->
extent
.
truncate_size
>
osize
)
op
->
extent
.
truncate_size
=
osize
;
}
}
req
->
r_num_pages
=
calc_pages_for
(
off
,
*
plen
);
req
->
r_page_alignment
=
off
&
~
PAGE_MASK
;
if
(
op
->
op
==
CEPH_OSD_OP_WRITE
)
op
->
payload_len
=
*
plen
;
dout
(
"calc_layout bno=%llx %llu~%llu (%d pages)
\n
"
,
bno
,
objoff
,
objlen
,
req
->
r_num_pages
);
snprintf
(
req
->
r_oid
,
sizeof
(
req
->
r_oid
),
"%llx.%08llx"
,
vino
.
ino
,
bno
);
snprintf
(
req
->
r_oid
,
sizeof
(
req
->
r_oid
),
"%llx.%08llx"
,
vino
.
ino
,
bno
);
req
->
r_oid_len
=
strlen
(
req
->
r_oid
);
req
->
r_oid_len
=
strlen
(
req
->
r_oid
);
...
@@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref)
...
@@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref)
if
(
req
->
r_request
)
if
(
req
->
r_request
)
ceph_msg_put
(
req
->
r_request
);
ceph_msg_put
(
req
->
r_request
);
if
(
req
->
r_con_filling_msg
)
{
if
(
req
->
r_con_filling_msg
)
{
dout
(
"%s revoking
pages
%p from con %p
\n
"
,
__func__
,
dout
(
"%s revoking
msg
%p from con %p
\n
"
,
__func__
,
req
->
r_
pages
,
req
->
r_con_filling_msg
);
req
->
r_
reply
,
req
->
r_con_filling_msg
);
ceph_msg_revoke_incoming
(
req
->
r_reply
);
ceph_msg_revoke_incoming
(
req
->
r_reply
);
req
->
r_con_filling_msg
->
ops
->
put
(
req
->
r_con_filling_msg
);
req
->
r_con_filling_msg
->
ops
->
put
(
req
->
r_con_filling_msg
);
req
->
r_con_filling_msg
=
NULL
;
}
}
if
(
req
->
r_reply
)
if
(
req
->
r_reply
)
ceph_msg_put
(
req
->
r_reply
);
ceph_msg_put
(
req
->
r_reply
);
if
(
req
->
r_own_pages
)
if
(
req
->
r_own_pages
)
ceph_release_page_vector
(
req
->
r_pages
,
ceph_release_page_vector
(
req
->
r_pages
,
req
->
r_num_pages
);
req
->
r_num_pages
);
#ifdef CONFIG_BLOCK
if
(
req
->
r_bio
)
bio_put
(
req
->
r_bio
);
#endif
ceph_put_snap_context
(
req
->
r_snapc
);
ceph_put_snap_context
(
req
->
r_snapc
);
if
(
req
->
r_trail
)
{
ceph_pagelist_release
(
&
req
->
r_trail
);
ceph_pagelist_release
(
req
->
r_trail
);
kfree
(
req
->
r_trail
);
}
if
(
req
->
r_mempool
)
if
(
req
->
r_mempool
)
mempool_free
(
req
,
req
->
r_osdc
->
req_mempool
);
mempool_free
(
req
,
req
->
r_osdc
->
req_mempool
);
else
else
...
@@ -174,34 +144,14 @@ void ceph_osdc_release_request(struct kref *kref)
...
@@ -174,34 +144,14 @@ void ceph_osdc_release_request(struct kref *kref)
}
}
EXPORT_SYMBOL
(
ceph_osdc_release_request
);
EXPORT_SYMBOL
(
ceph_osdc_release_request
);
static
int
get_num_ops
(
struct
ceph_osd_req_op
*
ops
,
int
*
needs_trail
)
{
int
i
=
0
;
if
(
needs_trail
)
*
needs_trail
=
0
;
while
(
ops
[
i
].
op
)
{
if
(
needs_trail
&&
op_needs_trail
(
ops
[
i
].
op
))
*
needs_trail
=
1
;
i
++
;
}
return
i
;
}
struct
ceph_osd_request
*
ceph_osdc_alloc_request
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd_request
*
ceph_osdc_alloc_request
(
struct
ceph_osd_client
*
osdc
,
int
flags
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_osd_req_op
*
ops
,
unsigned
int
num_op
,
bool
use_mempool
,
bool
use_mempool
,
gfp_t
gfp_flags
,
gfp_t
gfp_flags
)
struct
page
**
pages
,
struct
bio
*
bio
)
{
{
struct
ceph_osd_request
*
req
;
struct
ceph_osd_request
*
req
;
struct
ceph_msg
*
msg
;
struct
ceph_msg
*
msg
;
int
needs_trail
;
int
num_op
=
get_num_ops
(
ops
,
&
needs_trail
);
size_t
msg_size
=
sizeof
(
struct
ceph_osd_request_head
);
size_t
msg_size
=
sizeof
(
struct
ceph_osd_request_head
);
msg_size
+=
num_op
*
sizeof
(
struct
ceph_osd_op
);
msg_size
+=
num_op
*
sizeof
(
struct
ceph_osd_op
);
...
@@ -228,10 +178,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
...
@@ -228,10 +178,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
INIT_LIST_HEAD
(
&
req
->
r_req_lru_item
);
INIT_LIST_HEAD
(
&
req
->
r_req_lru_item
);
INIT_LIST_HEAD
(
&
req
->
r_osd_item
);
INIT_LIST_HEAD
(
&
req
->
r_osd_item
);
req
->
r_flags
=
flags
;
WARN_ON
((
flags
&
(
CEPH_OSD_FLAG_READ
|
CEPH_OSD_FLAG_WRITE
))
==
0
);
/* create reply message */
/* create reply message */
if
(
use_mempool
)
if
(
use_mempool
)
msg
=
ceph_msgpool_get
(
&
osdc
->
msgpool_op_reply
,
0
);
msg
=
ceph_msgpool_get
(
&
osdc
->
msgpool_op_reply
,
0
);
...
@@ -244,15 +190,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
...
@@ -244,15 +190,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
}
req
->
r_reply
=
msg
;
req
->
r_reply
=
msg
;
/* allocate space for the trailing data */
ceph_pagelist_init
(
&
req
->
r_trail
);
if
(
needs_trail
)
{
req
->
r_trail
=
kmalloc
(
sizeof
(
struct
ceph_pagelist
),
gfp_flags
);
if
(
!
req
->
r_trail
)
{
ceph_osdc_put_request
(
req
);
return
NULL
;
}
ceph_pagelist_init
(
req
->
r_trail
);
}
/* create request message; allow space for oid */
/* create request message; allow space for oid */
msg_size
+=
MAX_OBJ_NAME_SIZE
;
msg_size
+=
MAX_OBJ_NAME_SIZE
;
...
@@ -270,13 +208,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
...
@@ -270,13 +208,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
memset
(
msg
->
front
.
iov_base
,
0
,
msg
->
front
.
iov_len
);
memset
(
msg
->
front
.
iov_base
,
0
,
msg
->
front
.
iov_len
);
req
->
r_request
=
msg
;
req
->
r_request
=
msg
;
req
->
r_pages
=
pages
;
#ifdef CONFIG_BLOCK
if
(
bio
)
{
req
->
r_bio
=
bio
;
bio_get
(
req
->
r_bio
);
}
#endif
return
req
;
return
req
;
}
}
...
@@ -289,6 +220,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
...
@@ -289,6 +220,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
dst
->
op
=
cpu_to_le16
(
src
->
op
);
dst
->
op
=
cpu_to_le16
(
src
->
op
);
switch
(
src
->
op
)
{
switch
(
src
->
op
)
{
case
CEPH_OSD_OP_STAT
:
break
;
case
CEPH_OSD_OP_READ
:
case
CEPH_OSD_OP_READ
:
case
CEPH_OSD_OP_WRITE
:
case
CEPH_OSD_OP_WRITE
:
dst
->
extent
.
offset
=
dst
->
extent
.
offset
=
...
@@ -300,52 +233,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
...
@@ -300,52 +233,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
dst
->
extent
.
truncate_seq
=
dst
->
extent
.
truncate_seq
=
cpu_to_le32
(
src
->
extent
.
truncate_seq
);
cpu_to_le32
(
src
->
extent
.
truncate_seq
);
break
;
break
;
case
CEPH_OSD_OP_GETXATTR
:
case
CEPH_OSD_OP_SETXATTR
:
case
CEPH_OSD_OP_CMPXATTR
:
BUG_ON
(
!
req
->
r_trail
);
dst
->
xattr
.
name_len
=
cpu_to_le32
(
src
->
xattr
.
name_len
);
dst
->
xattr
.
value_len
=
cpu_to_le32
(
src
->
xattr
.
value_len
);
dst
->
xattr
.
cmp_op
=
src
->
xattr
.
cmp_op
;
dst
->
xattr
.
cmp_mode
=
src
->
xattr
.
cmp_mode
;
ceph_pagelist_append
(
req
->
r_trail
,
src
->
xattr
.
name
,
src
->
xattr
.
name_len
);
ceph_pagelist_append
(
req
->
r_trail
,
src
->
xattr
.
val
,
src
->
xattr
.
value_len
);
break
;
case
CEPH_OSD_OP_CALL
:
case
CEPH_OSD_OP_CALL
:
BUG_ON
(
!
req
->
r_trail
);
dst
->
cls
.
class_len
=
src
->
cls
.
class_len
;
dst
->
cls
.
class_len
=
src
->
cls
.
class_len
;
dst
->
cls
.
method_len
=
src
->
cls
.
method_len
;
dst
->
cls
.
method_len
=
src
->
cls
.
method_len
;
dst
->
cls
.
indata_len
=
cpu_to_le32
(
src
->
cls
.
indata_len
);
dst
->
cls
.
indata_len
=
cpu_to_le32
(
src
->
cls
.
indata_len
);
ceph_pagelist_append
(
req
->
r_trail
,
src
->
cls
.
class_name
,
ceph_pagelist_append
(
&
req
->
r_trail
,
src
->
cls
.
class_name
,
src
->
cls
.
class_len
);
src
->
cls
.
class_len
);
ceph_pagelist_append
(
req
->
r_trail
,
src
->
cls
.
method_name
,
ceph_pagelist_append
(
&
req
->
r_trail
,
src
->
cls
.
method_name
,
src
->
cls
.
method_len
);
src
->
cls
.
method_len
);
ceph_pagelist_append
(
req
->
r_trail
,
src
->
cls
.
indata
,
ceph_pagelist_append
(
&
req
->
r_trail
,
src
->
cls
.
indata
,
src
->
cls
.
indata_len
);
src
->
cls
.
indata_len
);
break
;
break
;
case
CEPH_OSD_OP_ROLLBACK
:
dst
->
snap
.
snapid
=
cpu_to_le64
(
src
->
snap
.
snapid
);
break
;
case
CEPH_OSD_OP_STARTSYNC
:
case
CEPH_OSD_OP_STARTSYNC
:
break
;
break
;
case
CEPH_OSD_OP_NOTIFY
:
{
__le32
prot_ver
=
cpu_to_le32
(
src
->
watch
.
prot_ver
);
__le32
timeout
=
cpu_to_le32
(
src
->
watch
.
timeout
);
BUG_ON
(
!
req
->
r_trail
);
ceph_pagelist_append
(
req
->
r_trail
,
&
prot_ver
,
sizeof
(
prot_ver
));
ceph_pagelist_append
(
req
->
r_trail
,
&
timeout
,
sizeof
(
timeout
));
}
case
CEPH_OSD_OP_NOTIFY_ACK
:
case
CEPH_OSD_OP_NOTIFY_ACK
:
case
CEPH_OSD_OP_WATCH
:
case
CEPH_OSD_OP_WATCH
:
dst
->
watch
.
cookie
=
cpu_to_le64
(
src
->
watch
.
cookie
);
dst
->
watch
.
cookie
=
cpu_to_le64
(
src
->
watch
.
cookie
);
...
@@ -356,6 +257,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
...
@@ -356,6 +257,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
pr_err
(
"unrecognized osd opcode %d
\n
"
,
dst
->
op
);
pr_err
(
"unrecognized osd opcode %d
\n
"
,
dst
->
op
);
WARN_ON
(
1
);
WARN_ON
(
1
);
break
;
break
;
case
CEPH_OSD_OP_MAPEXT
:
case
CEPH_OSD_OP_MASKTRUNC
:
case
CEPH_OSD_OP_SPARSE_READ
:
case
CEPH_OSD_OP_NOTIFY
:
case
CEPH_OSD_OP_ASSERT_VER
:
case
CEPH_OSD_OP_WRITEFULL
:
case
CEPH_OSD_OP_TRUNCATE
:
case
CEPH_OSD_OP_ZERO
:
case
CEPH_OSD_OP_DELETE
:
case
CEPH_OSD_OP_APPEND
:
case
CEPH_OSD_OP_SETTRUNC
:
case
CEPH_OSD_OP_TRIMTRUNC
:
case
CEPH_OSD_OP_TMAPUP
:
case
CEPH_OSD_OP_TMAPPUT
:
case
CEPH_OSD_OP_TMAPGET
:
case
CEPH_OSD_OP_CREATE
:
case
CEPH_OSD_OP_ROLLBACK
:
case
CEPH_OSD_OP_OMAPGETKEYS
:
case
CEPH_OSD_OP_OMAPGETVALS
:
case
CEPH_OSD_OP_OMAPGETHEADER
:
case
CEPH_OSD_OP_OMAPGETVALSBYKEYS
:
case
CEPH_OSD_OP_MODE_RD
:
case
CEPH_OSD_OP_OMAPSETVALS
:
case
CEPH_OSD_OP_OMAPSETHEADER
:
case
CEPH_OSD_OP_OMAPCLEAR
:
case
CEPH_OSD_OP_OMAPRMKEYS
:
case
CEPH_OSD_OP_OMAP_CMP
:
case
CEPH_OSD_OP_CLONERANGE
:
case
CEPH_OSD_OP_ASSERT_SRC_VERSION
:
case
CEPH_OSD_OP_SRC_CMPXATTR
:
case
CEPH_OSD_OP_GETXATTR
:
case
CEPH_OSD_OP_GETXATTRS
:
case
CEPH_OSD_OP_CMPXATTR
:
case
CEPH_OSD_OP_SETXATTR
:
case
CEPH_OSD_OP_SETXATTRS
:
case
CEPH_OSD_OP_RESETXATTRS
:
case
CEPH_OSD_OP_RMXATTR
:
case
CEPH_OSD_OP_PULL
:
case
CEPH_OSD_OP_PUSH
:
case
CEPH_OSD_OP_BALANCEREADS
:
case
CEPH_OSD_OP_UNBALANCEREADS
:
case
CEPH_OSD_OP_SCRUB
:
case
CEPH_OSD_OP_SCRUB_RESERVE
:
case
CEPH_OSD_OP_SCRUB_UNRESERVE
:
case
CEPH_OSD_OP_SCRUB_STOP
:
case
CEPH_OSD_OP_SCRUB_MAP
:
case
CEPH_OSD_OP_WRLOCK
:
case
CEPH_OSD_OP_WRUNLOCK
:
case
CEPH_OSD_OP_RDLOCK
:
case
CEPH_OSD_OP_RDUNLOCK
:
case
CEPH_OSD_OP_UPLOCK
:
case
CEPH_OSD_OP_DNLOCK
:
case
CEPH_OSD_OP_PGLS
:
case
CEPH_OSD_OP_PGLS_FILTER
:
pr_err
(
"unsupported osd opcode %s
\n
"
,
ceph_osd_op_name
(
dst
->
op
));
WARN_ON
(
1
);
break
;
}
}
dst
->
payload_len
=
cpu_to_le32
(
src
->
payload_len
);
dst
->
payload_len
=
cpu_to_le32
(
src
->
payload_len
);
}
}
...
@@ -365,25 +324,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
...
@@ -365,25 +324,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
*
*
*/
*/
void
ceph_osdc_build_request
(
struct
ceph_osd_request
*
req
,
void
ceph_osdc_build_request
(
struct
ceph_osd_request
*
req
,
u64
off
,
u64
*
plen
,
u64
off
,
u64
len
,
unsigned
int
num_op
,
struct
ceph_osd_req_op
*
src_ops
,
struct
ceph_osd_req_op
*
src_ops
,
struct
ceph_snap_context
*
snapc
,
struct
ceph_snap_context
*
snapc
,
u64
snap_id
,
struct
timespec
*
mtime
,
struct
timespec
*
mtime
)
const
char
*
oid
,
int
oid_len
)
{
{
struct
ceph_msg
*
msg
=
req
->
r_request
;
struct
ceph_msg
*
msg
=
req
->
r_request
;
struct
ceph_osd_request_head
*
head
;
struct
ceph_osd_request_head
*
head
;
struct
ceph_osd_req_op
*
src_op
;
struct
ceph_osd_req_op
*
src_op
;
struct
ceph_osd_op
*
op
;
struct
ceph_osd_op
*
op
;
void
*
p
;
void
*
p
;
int
num_op
=
get_num_ops
(
src_ops
,
NULL
);
size_t
msg_size
=
sizeof
(
*
head
)
+
num_op
*
sizeof
(
*
op
);
size_t
msg_size
=
sizeof
(
*
head
)
+
num_op
*
sizeof
(
*
op
);
int
flags
=
req
->
r_flags
;
int
flags
=
req
->
r_flags
;
u64
data_len
=
0
;
u64
data_len
;
int
i
;
int
i
;
WARN_ON
((
flags
&
(
CEPH_OSD_FLAG_READ
|
CEPH_OSD_FLAG_WRITE
))
==
0
);
head
=
msg
->
front
.
iov_base
;
head
=
msg
->
front
.
iov_base
;
head
->
snapid
=
cpu_to_le64
(
snap_id
);
op
=
(
void
*
)(
head
+
1
);
op
=
(
void
*
)(
head
+
1
);
p
=
(
void
*
)(
op
+
num_op
);
p
=
(
void
*
)(
op
+
num_op
);
...
@@ -393,23 +352,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
...
@@ -393,23 +352,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
head
->
flags
=
cpu_to_le32
(
flags
);
head
->
flags
=
cpu_to_le32
(
flags
);
if
(
flags
&
CEPH_OSD_FLAG_WRITE
)
if
(
flags
&
CEPH_OSD_FLAG_WRITE
)
ceph_encode_timespec
(
&
head
->
mtime
,
mtime
);
ceph_encode_timespec
(
&
head
->
mtime
,
mtime
);
BUG_ON
(
num_op
>
(
unsigned
int
)
((
u16
)
-
1
));
head
->
num_ops
=
cpu_to_le16
(
num_op
);
head
->
num_ops
=
cpu_to_le16
(
num_op
);
/* fill in oid */
/* fill in oid */
head
->
object_len
=
cpu_to_le32
(
oid_len
);
head
->
object_len
=
cpu_to_le32
(
req
->
r_
oid_len
);
memcpy
(
p
,
oid
,
oid_len
);
memcpy
(
p
,
req
->
r_oid
,
req
->
r_
oid_len
);
p
+=
oid_len
;
p
+=
req
->
r_
oid_len
;
src_op
=
src_ops
;
src_op
=
src_ops
;
while
(
src_op
->
op
)
{
while
(
num_op
--
)
osd_req_encode_op
(
req
,
op
,
src_op
);
osd_req_encode_op
(
req
,
op
++
,
src_op
++
);
src_op
++
;
op
++
;
}
if
(
req
->
r_trail
)
data_len
+=
req
->
r_trail
->
length
;
if
(
snapc
)
{
if
(
snapc
)
{
head
->
snap_seq
=
cpu_to_le64
(
snapc
->
seq
);
head
->
snap_seq
=
cpu_to_le64
(
snapc
->
seq
);
...
@@ -420,14 +373,12 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
...
@@ -420,14 +373,12 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
}
}
}
}
data_len
=
req
->
r_trail
.
length
;
if
(
flags
&
CEPH_OSD_FLAG_WRITE
)
{
if
(
flags
&
CEPH_OSD_FLAG_WRITE
)
{
req
->
r_request
->
hdr
.
data_off
=
cpu_to_le16
(
off
);
req
->
r_request
->
hdr
.
data_off
=
cpu_to_le16
(
off
);
req
->
r_request
->
hdr
.
data_len
=
cpu_to_le32
(
*
plen
+
data_len
);
data_len
+=
len
;
}
else
if
(
data_len
)
{
req
->
r_request
->
hdr
.
data_off
=
0
;
req
->
r_request
->
hdr
.
data_len
=
cpu_to_le32
(
data_len
);
}
}
req
->
r_request
->
hdr
.
data_len
=
cpu_to_le32
(
data_len
);
req
->
r_request
->
page_alignment
=
req
->
r_page_alignment
;
req
->
r_request
->
page_alignment
=
req
->
r_page_alignment
;
BUG_ON
(
p
>
msg
->
front
.
iov_base
+
msg
->
front
.
iov_len
);
BUG_ON
(
p
>
msg
->
front
.
iov_base
+
msg
->
front
.
iov_len
);
...
@@ -459,34 +410,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
...
@@ -459,34 +410,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u32
truncate_seq
,
u32
truncate_seq
,
u64
truncate_size
,
u64
truncate_size
,
struct
timespec
*
mtime
,
struct
timespec
*
mtime
,
bool
use_mempool
,
int
num_reply
,
bool
use_mempool
,
int
page_align
)
int
page_align
)
{
{
struct
ceph_osd_req_op
ops
[
3
];
struct
ceph_osd_req_op
ops
[
2
];
struct
ceph_osd_request
*
req
;
struct
ceph_osd_request
*
req
;
unsigned
int
num_op
=
1
;
int
r
;
int
r
;
memset
(
&
ops
,
0
,
sizeof
ops
);
ops
[
0
].
op
=
opcode
;
ops
[
0
].
op
=
opcode
;
ops
[
0
].
extent
.
truncate_seq
=
truncate_seq
;
ops
[
0
].
extent
.
truncate_seq
=
truncate_seq
;
ops
[
0
].
extent
.
truncate_size
=
truncate_size
;
ops
[
0
].
extent
.
truncate_size
=
truncate_size
;
ops
[
0
].
payload_len
=
0
;
if
(
do_sync
)
{
if
(
do_sync
)
{
ops
[
1
].
op
=
CEPH_OSD_OP_STARTSYNC
;
ops
[
1
].
op
=
CEPH_OSD_OP_STARTSYNC
;
ops
[
1
].
payload_len
=
0
;
num_op
++
;
ops
[
2
].
op
=
0
;
}
}
else
ops
[
1
].
op
=
0
;
req
=
ceph_osdc_alloc_request
(
osdc
,
snapc
,
num_op
,
use_mempool
,
GFP_NOFS
);
req
=
ceph_osdc_alloc_request
(
osdc
,
flags
,
snapc
,
ops
,
use_mempool
,
GFP_NOFS
,
NULL
,
NULL
);
if
(
!
req
)
if
(
!
req
)
return
ERR_PTR
(
-
ENOMEM
);
return
ERR_PTR
(
-
ENOMEM
);
req
->
r_flags
=
flags
;
/* calculate max write size */
/* calculate max write size */
r
=
calc_layout
(
osdc
,
vino
,
layout
,
off
,
plen
,
req
,
ops
);
r
=
calc_layout
(
vino
,
layout
,
off
,
plen
,
req
,
ops
);
if
(
r
<
0
)
if
(
r
<
0
)
return
ERR_PTR
(
r
);
return
ERR_PTR
(
r
);
req
->
r_file_layout
=
*
layout
;
/* keep a copy */
req
->
r_file_layout
=
*
layout
;
/* keep a copy */
...
@@ -496,10 +446,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
...
@@ -496,10 +446,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
req
->
r_num_pages
=
calc_pages_for
(
page_align
,
*
plen
);
req
->
r_num_pages
=
calc_pages_for
(
page_align
,
*
plen
);
req
->
r_page_alignment
=
page_align
;
req
->
r_page_alignment
=
page_align
;
ceph_osdc_build_request
(
req
,
off
,
plen
,
ops
,
ceph_osdc_build_request
(
req
,
off
,
*
plen
,
num_op
,
ops
,
snapc
,
snapc
,
vino
.
snap
,
mtime
);
mtime
,
req
->
r_oid
,
req
->
r_oid_len
);
return
req
;
return
req
;
}
}
...
@@ -623,8 +571,8 @@ static void osd_reset(struct ceph_connection *con)
...
@@ -623,8 +571,8 @@ static void osd_reset(struct ceph_connection *con)
down_read
(
&
osdc
->
map_sem
);
down_read
(
&
osdc
->
map_sem
);
mutex_lock
(
&
osdc
->
request_mutex
);
mutex_lock
(
&
osdc
->
request_mutex
);
__kick_osd_requests
(
osdc
,
osd
);
__kick_osd_requests
(
osdc
,
osd
);
__send_queued
(
osdc
);
mutex_unlock
(
&
osdc
->
request_mutex
);
mutex_unlock
(
&
osdc
->
request_mutex
);
send_queued
(
osdc
);
up_read
(
&
osdc
->
map_sem
);
up_read
(
&
osdc
->
map_sem
);
}
}
...
@@ -739,31 +687,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
...
@@ -739,31 +687,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
*/
*/
static
int
__reset_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
osd
)
static
int
__reset_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
osd
)
{
{
struct
ceph_osd_request
*
req
;
struct
ceph_entity_addr
*
peer_addr
;
int
ret
=
0
;
dout
(
"__reset_osd %p osd%d
\n
"
,
osd
,
osd
->
o_osd
);
dout
(
"__reset_osd %p osd%d
\n
"
,
osd
,
osd
->
o_osd
);
if
(
list_empty
(
&
osd
->
o_requests
)
&&
if
(
list_empty
(
&
osd
->
o_requests
)
&&
list_empty
(
&
osd
->
o_linger_requests
))
{
list_empty
(
&
osd
->
o_linger_requests
))
{
__remove_osd
(
osdc
,
osd
);
__remove_osd
(
osdc
,
osd
);
ret
=
-
ENODEV
;
}
else
if
(
memcmp
(
&
osdc
->
osdmap
->
osd_addr
[
osd
->
o_osd
],
return
-
ENODEV
;
&
osd
->
o_con
.
peer_addr
,
}
sizeof
(
osd
->
o_con
.
peer_addr
))
==
0
&&
!
ceph_con_opened
(
&
osd
->
o_con
))
{
peer_addr
=
&
osdc
->
osdmap
->
osd_addr
[
osd
->
o_osd
];
if
(
!
memcmp
(
peer_addr
,
&
osd
->
o_con
.
peer_addr
,
sizeof
(
*
peer_addr
))
&&
!
ceph_con_opened
(
&
osd
->
o_con
))
{
struct
ceph_osd_request
*
req
;
dout
(
" osd addr hasn't changed and connection never opened,"
dout
(
" osd addr hasn't changed and connection never opened,"
" letting msgr retry"
);
" letting msgr retry"
);
/* touch each r_stamp for handle_timeout()'s benfit */
/* touch each r_stamp for handle_timeout()'s benfit */
list_for_each_entry
(
req
,
&
osd
->
o_requests
,
r_osd_item
)
list_for_each_entry
(
req
,
&
osd
->
o_requests
,
r_osd_item
)
req
->
r_stamp
=
jiffies
;
req
->
r_stamp
=
jiffies
;
ret
=
-
EAGAIN
;
}
else
{
return
-
EAGAIN
;
ceph_con_close
(
&
osd
->
o_con
);
ceph_con_open
(
&
osd
->
o_con
,
CEPH_ENTITY_TYPE_OSD
,
osd
->
o_osd
,
&
osdc
->
osdmap
->
osd_addr
[
osd
->
o_osd
]);
osd
->
o_incarnation
++
;
}
}
return
ret
;
ceph_con_close
(
&
osd
->
o_con
);
ceph_con_open
(
&
osd
->
o_con
,
CEPH_ENTITY_TYPE_OSD
,
osd
->
o_osd
,
peer_addr
);
osd
->
o_incarnation
++
;
return
0
;
}
}
static
void
__insert_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
new
)
static
void
__insert_osd
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_osd
*
new
)
...
@@ -1062,16 +1014,13 @@ static void __send_request(struct ceph_osd_client *osdc,
...
@@ -1062,16 +1014,13 @@ static void __send_request(struct ceph_osd_client *osdc,
/*
/*
* Send any requests in the queue (req_unsent).
* Send any requests in the queue (req_unsent).
*/
*/
static
void
send_queued
(
struct
ceph_osd_client
*
osdc
)
static
void
__
send_queued
(
struct
ceph_osd_client
*
osdc
)
{
{
struct
ceph_osd_request
*
req
,
*
tmp
;
struct
ceph_osd_request
*
req
,
*
tmp
;
dout
(
"send_queued
\n
"
);
dout
(
"__send_queued
\n
"
);
mutex_lock
(
&
osdc
->
request_mutex
);
list_for_each_entry_safe
(
req
,
tmp
,
&
osdc
->
req_unsent
,
r_req_lru_item
)
list_for_each_entry_safe
(
req
,
tmp
,
&
osdc
->
req_unsent
,
r_req_lru_item
)
{
__send_request
(
osdc
,
req
);
__send_request
(
osdc
,
req
);
}
mutex_unlock
(
&
osdc
->
request_mutex
);
}
}
/*
/*
...
@@ -1123,8 +1072,8 @@ static void handle_timeout(struct work_struct *work)
...
@@ -1123,8 +1072,8 @@ static void handle_timeout(struct work_struct *work)
}
}
__schedule_osd_timeout
(
osdc
);
__schedule_osd_timeout
(
osdc
);
__send_queued
(
osdc
);
mutex_unlock
(
&
osdc
->
request_mutex
);
mutex_unlock
(
&
osdc
->
request_mutex
);
send_queued
(
osdc
);
up_read
(
&
osdc
->
map_sem
);
up_read
(
&
osdc
->
map_sem
);
}
}
...
@@ -1462,7 +1411,9 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
...
@@ -1462,7 +1411,9 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
if
(
ceph_osdmap_flag
(
osdc
->
osdmap
,
CEPH_OSDMAP_FULL
))
if
(
ceph_osdmap_flag
(
osdc
->
osdmap
,
CEPH_OSDMAP_FULL
))
ceph_monc_request_next_osdmap
(
&
osdc
->
client
->
monc
);
ceph_monc_request_next_osdmap
(
&
osdc
->
client
->
monc
);
send_queued
(
osdc
);
mutex_lock
(
&
osdc
->
request_mutex
);
__send_queued
(
osdc
);
mutex_unlock
(
&
osdc
->
request_mutex
);
up_read
(
&
osdc
->
map_sem
);
up_read
(
&
osdc
->
map_sem
);
wake_up_all
(
&
osdc
->
client
->
auth_wq
);
wake_up_all
(
&
osdc
->
client
->
auth_wq
);
return
;
return
;
...
@@ -1556,8 +1507,7 @@ static void __remove_event(struct ceph_osd_event *event)
...
@@ -1556,8 +1507,7 @@ static void __remove_event(struct ceph_osd_event *event)
int
ceph_osdc_create_event
(
struct
ceph_osd_client
*
osdc
,
int
ceph_osdc_create_event
(
struct
ceph_osd_client
*
osdc
,
void
(
*
event_cb
)(
u64
,
u64
,
u8
,
void
*
),
void
(
*
event_cb
)(
u64
,
u64
,
u8
,
void
*
),
int
one_shot
,
void
*
data
,
void
*
data
,
struct
ceph_osd_event
**
pevent
)
struct
ceph_osd_event
**
pevent
)
{
{
struct
ceph_osd_event
*
event
;
struct
ceph_osd_event
*
event
;
...
@@ -1567,14 +1517,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
...
@@ -1567,14 +1517,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
dout
(
"create_event %p
\n
"
,
event
);
dout
(
"create_event %p
\n
"
,
event
);
event
->
cb
=
event_cb
;
event
->
cb
=
event_cb
;
event
->
one_shot
=
one_shot
;
event
->
one_shot
=
0
;
event
->
data
=
data
;
event
->
data
=
data
;
event
->
osdc
=
osdc
;
event
->
osdc
=
osdc
;
INIT_LIST_HEAD
(
&
event
->
osd_node
);
INIT_LIST_HEAD
(
&
event
->
osd_node
);
RB_CLEAR_NODE
(
&
event
->
node
);
RB_CLEAR_NODE
(
&
event
->
node
);
kref_init
(
&
event
->
kref
);
/* one ref for us */
kref_init
(
&
event
->
kref
);
/* one ref for us */
kref_get
(
&
event
->
kref
);
/* one ref for the caller */
kref_get
(
&
event
->
kref
);
/* one ref for the caller */
init_completion
(
&
event
->
completion
);
spin_lock
(
&
osdc
->
event_lock
);
spin_lock
(
&
osdc
->
event_lock
);
event
->
cookie
=
++
osdc
->
event_count
;
event
->
cookie
=
++
osdc
->
event_count
;
...
@@ -1610,7 +1559,6 @@ static void do_event_work(struct work_struct *work)
...
@@ -1610,7 +1559,6 @@ static void do_event_work(struct work_struct *work)
dout
(
"do_event_work completing %p
\n
"
,
event
);
dout
(
"do_event_work completing %p
\n
"
,
event
);
event
->
cb
(
ver
,
notify_id
,
opcode
,
event
->
data
);
event
->
cb
(
ver
,
notify_id
,
opcode
,
event
->
data
);
complete
(
&
event
->
completion
);
dout
(
"do_event_work completed %p
\n
"
,
event
);
dout
(
"do_event_work completed %p
\n
"
,
event
);
ceph_osdc_put_event
(
event
);
ceph_osdc_put_event
(
event
);
kfree
(
event_work
);
kfree
(
event_work
);
...
@@ -1620,7 +1568,8 @@ static void do_event_work(struct work_struct *work)
...
@@ -1620,7 +1568,8 @@ static void do_event_work(struct work_struct *work)
/*
/*
* Process osd watch notifications
* Process osd watch notifications
*/
*/
void
handle_watch_notify
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_msg
*
msg
)
static
void
handle_watch_notify
(
struct
ceph_osd_client
*
osdc
,
struct
ceph_msg
*
msg
)
{
{
void
*
p
,
*
end
;
void
*
p
,
*
end
;
u8
proto_ver
;
u8
proto_ver
;
...
@@ -1641,9 +1590,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
...
@@ -1641,9 +1590,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
spin_lock
(
&
osdc
->
event_lock
);
spin_lock
(
&
osdc
->
event_lock
);
event
=
__find_event
(
osdc
,
cookie
);
event
=
__find_event
(
osdc
,
cookie
);
if
(
event
)
{
if
(
event
)
{
BUG_ON
(
event
->
one_shot
);
get_event
(
event
);
get_event
(
event
);
if
(
event
->
one_shot
)
__remove_event
(
event
);
}
}
spin_unlock
(
&
osdc
->
event_lock
);
spin_unlock
(
&
osdc
->
event_lock
);
dout
(
"handle_watch_notify cookie %lld ver %lld event %p
\n
"
,
dout
(
"handle_watch_notify cookie %lld ver %lld event %p
\n
"
,
...
@@ -1668,7 +1616,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
...
@@ -1668,7 +1616,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
return
;
return
;
done_err:
done_err:
complete
(
&
event
->
completion
);
ceph_osdc_put_event
(
event
);
ceph_osdc_put_event
(
event
);
return
;
return
;
...
@@ -1677,21 +1624,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
...
@@ -1677,21 +1624,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
return
;
return
;
}
}
int
ceph_osdc_wait_event
(
struct
ceph_osd_event
*
event
,
unsigned
long
timeout
)
{
int
err
;
dout
(
"wait_event %p
\n
"
,
event
);
err
=
wait_for_completion_interruptible_timeout
(
&
event
->
completion
,
timeout
*
HZ
);
ceph_osdc_put_event
(
event
);
if
(
err
>
0
)
err
=
0
;
dout
(
"wait_event %p returns %d
\n
"
,
event
,
err
);
return
err
;
}
EXPORT_SYMBOL
(
ceph_osdc_wait_event
);
/*
/*
* Register request, send initial attempt.
* Register request, send initial attempt.
*/
*/
...
@@ -1706,7 +1638,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
...
@@ -1706,7 +1638,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
#ifdef CONFIG_BLOCK
#ifdef CONFIG_BLOCK
req
->
r_request
->
bio
=
req
->
r_bio
;
req
->
r_request
->
bio
=
req
->
r_bio
;
#endif
#endif
req
->
r_request
->
trail
=
req
->
r_trail
;
req
->
r_request
->
trail
=
&
req
->
r_trail
;
register_request
(
osdc
,
req
);
register_request
(
osdc
,
req
);
...
@@ -1865,7 +1797,6 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
...
@@ -1865,7 +1797,6 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
out:
out:
return
err
;
return
err
;
}
}
EXPORT_SYMBOL
(
ceph_osdc_init
);
void
ceph_osdc_stop
(
struct
ceph_osd_client
*
osdc
)
void
ceph_osdc_stop
(
struct
ceph_osd_client
*
osdc
)
{
{
...
@@ -1882,7 +1813,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
...
@@ -1882,7 +1813,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
ceph_msgpool_destroy
(
&
osdc
->
msgpool_op
);
ceph_msgpool_destroy
(
&
osdc
->
msgpool_op
);
ceph_msgpool_destroy
(
&
osdc
->
msgpool_op_reply
);
ceph_msgpool_destroy
(
&
osdc
->
msgpool_op_reply
);
}
}
EXPORT_SYMBOL
(
ceph_osdc_stop
);
/*
/*
* Read some contiguous pages. If we cross a stripe boundary, shorten
* Read some contiguous pages. If we cross a stripe boundary, shorten
...
@@ -1902,7 +1832,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
...
@@ -1902,7 +1832,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
req
=
ceph_osdc_new_request
(
osdc
,
layout
,
vino
,
off
,
plen
,
req
=
ceph_osdc_new_request
(
osdc
,
layout
,
vino
,
off
,
plen
,
CEPH_OSD_OP_READ
,
CEPH_OSD_FLAG_READ
,
CEPH_OSD_OP_READ
,
CEPH_OSD_FLAG_READ
,
NULL
,
0
,
truncate_seq
,
truncate_size
,
NULL
,
NULL
,
0
,
truncate_seq
,
truncate_size
,
NULL
,
false
,
1
,
page_align
);
false
,
page_align
);
if
(
IS_ERR
(
req
))
if
(
IS_ERR
(
req
))
return
PTR_ERR
(
req
);
return
PTR_ERR
(
req
);
...
@@ -1931,8 +1861,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
...
@@ -1931,8 +1861,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
u64
off
,
u64
len
,
u64
off
,
u64
len
,
u32
truncate_seq
,
u64
truncate_size
,
u32
truncate_seq
,
u64
truncate_size
,
struct
timespec
*
mtime
,
struct
timespec
*
mtime
,
struct
page
**
pages
,
int
num_pages
,
struct
page
**
pages
,
int
num_pages
)
int
flags
,
int
do_sync
,
bool
nofail
)
{
{
struct
ceph_osd_request
*
req
;
struct
ceph_osd_request
*
req
;
int
rc
=
0
;
int
rc
=
0
;
...
@@ -1941,11 +1870,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
...
@@ -1941,11 +1870,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
BUG_ON
(
vino
.
snap
!=
CEPH_NOSNAP
);
BUG_ON
(
vino
.
snap
!=
CEPH_NOSNAP
);
req
=
ceph_osdc_new_request
(
osdc
,
layout
,
vino
,
off
,
&
len
,
req
=
ceph_osdc_new_request
(
osdc
,
layout
,
vino
,
off
,
&
len
,
CEPH_OSD_OP_WRITE
,
CEPH_OSD_OP_WRITE
,
flags
|
CEPH_OSD_FLAG_ONDISK
|
CEPH_OSD_FLAG_ONDISK
|
CEPH_OSD_FLAG_WRITE
,
CEPH_OSD_FLAG_WRITE
,
snapc
,
0
,
snapc
,
do_sync
,
truncate_seq
,
truncate_size
,
mtime
,
truncate_seq
,
truncate_size
,
mtime
,
nofail
,
1
,
page_align
);
true
,
page_align
);
if
(
IS_ERR
(
req
))
if
(
IS_ERR
(
req
))
return
PTR_ERR
(
req
);
return
PTR_ERR
(
req
);
...
@@ -1954,7 +1882,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
...
@@ -1954,7 +1882,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
dout
(
"writepages %llu~%llu (%d pages)
\n
"
,
off
,
len
,
dout
(
"writepages %llu~%llu (%d pages)
\n
"
,
off
,
len
,
req
->
r_num_pages
);
req
->
r_num_pages
);
rc
=
ceph_osdc_start_request
(
osdc
,
req
,
nofail
);
rc
=
ceph_osdc_start_request
(
osdc
,
req
,
true
);
if
(
!
rc
)
if
(
!
rc
)
rc
=
ceph_osdc_wait_request
(
osdc
,
req
);
rc
=
ceph_osdc_wait_request
(
osdc
,
req
);
...
@@ -2047,7 +1975,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
...
@@ -2047,7 +1975,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
if
(
data_len
>
0
)
{
if
(
data_len
>
0
)
{
int
want
=
calc_pages_for
(
req
->
r_page_alignment
,
data_len
);
int
want
=
calc_pages_for
(
req
->
r_page_alignment
,
data_len
);
if
(
unlikely
(
req
->
r_num_pages
<
want
))
{
if
(
req
->
r_pages
&&
unlikely
(
req
->
r_num_pages
<
want
))
{
pr_warning
(
"tid %lld reply has %d bytes %d pages, we"
pr_warning
(
"tid %lld reply has %d bytes %d pages, we"
" had only %d pages ready
\n
"
,
tid
,
data_len
,
" had only %d pages ready
\n
"
,
tid
,
data_len
,
want
,
req
->
r_num_pages
);
want
,
req
->
r_num_pages
);
...
...
net/ceph/osdmap.c
浏览文件 @
4c7a08c8
...
@@ -13,26 +13,18 @@
...
@@ -13,26 +13,18 @@
char
*
ceph_osdmap_state_str
(
char
*
str
,
int
len
,
int
state
)
char
*
ceph_osdmap_state_str
(
char
*
str
,
int
len
,
int
state
)
{
{
int
flag
=
0
;
if
(
!
len
)
if
(
!
len
)
goto
done
;
return
str
;
*
str
=
'\0'
;
if
((
state
&
CEPH_OSD_EXISTS
)
&&
(
state
&
CEPH_OSD_UP
))
if
(
state
)
{
snprintf
(
str
,
len
,
"exists, up"
);
if
(
state
&
CEPH_OSD_EXISTS
)
{
else
if
(
state
&
CEPH_OSD_EXISTS
)
snprintf
(
str
,
len
,
"exists"
);
snprintf
(
str
,
len
,
"exists"
);
flag
=
1
;
else
if
(
state
&
CEPH_OSD_UP
)
}
snprintf
(
str
,
len
,
"up"
);
if
(
state
&
CEPH_OSD_UP
)
{
else
snprintf
(
str
,
len
,
"%s%s%s"
,
str
,
(
flag
?
", "
:
""
),
"up"
);
flag
=
1
;
}
}
else
{
snprintf
(
str
,
len
,
"doesn't exist"
);
snprintf
(
str
,
len
,
"doesn't exist"
);
}
done:
return
str
;
return
str
;
}
}
...
@@ -170,6 +162,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
...
@@ -170,6 +162,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
c
->
choose_local_tries
=
2
;
c
->
choose_local_tries
=
2
;
c
->
choose_local_fallback_tries
=
5
;
c
->
choose_local_fallback_tries
=
5
;
c
->
choose_total_tries
=
19
;
c
->
choose_total_tries
=
19
;
c
->
chooseleaf_descend_once
=
0
;
ceph_decode_need
(
p
,
end
,
4
*
sizeof
(
u32
),
bad
);
ceph_decode_need
(
p
,
end
,
4
*
sizeof
(
u32
),
bad
);
magic
=
ceph_decode_32
(
p
);
magic
=
ceph_decode_32
(
p
);
...
@@ -336,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
...
@@ -336,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
dout
(
"crush decode tunable choose_total_tries = %d"
,
dout
(
"crush decode tunable choose_total_tries = %d"
,
c
->
choose_total_tries
);
c
->
choose_total_tries
);
ceph_decode_need
(
p
,
end
,
sizeof
(
u32
),
done
);
c
->
chooseleaf_descend_once
=
ceph_decode_32
(
p
);
dout
(
"crush decode tunable chooseleaf_descend_once = %d"
,
c
->
chooseleaf_descend_once
);
done:
done:
dout
(
"crush_decode success
\n
"
);
dout
(
"crush_decode success
\n
"
);
return
c
;
return
c
;
...
@@ -1010,7 +1008,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
...
@@ -1010,7 +1008,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
* pass a stride back to the caller.
* pass a stride back to the caller.
*/
*/
int
ceph_calc_file_object_mapping
(
struct
ceph_file_layout
*
layout
,
int
ceph_calc_file_object_mapping
(
struct
ceph_file_layout
*
layout
,
u64
off
,
u64
*
p
len
,
u64
off
,
u64
len
,
u64
*
ono
,
u64
*
ono
,
u64
*
oxoff
,
u64
*
oxlen
)
u64
*
oxoff
,
u64
*
oxlen
)
{
{
...
@@ -1021,7 +1019,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
...
@@ -1021,7 +1019,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u32
su_per_object
;
u32
su_per_object
;
u64
t
,
su_offset
;
u64
t
,
su_offset
;
dout
(
"mapping %llu~%llu osize %u fl_su %u
\n
"
,
off
,
*
p
len
,
dout
(
"mapping %llu~%llu osize %u fl_su %u
\n
"
,
off
,
len
,
osize
,
su
);
osize
,
su
);
if
(
su
==
0
||
sc
==
0
)
if
(
su
==
0
||
sc
==
0
)
goto
invalid
;
goto
invalid
;
...
@@ -1054,11 +1052,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
...
@@ -1054,11 +1052,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
/*
/*
* Calculate the length of the extent being written to the selected
* Calculate the length of the extent being written to the selected
* object. This is the minimum of the full length requested (
p
len) or
* object. This is the minimum of the full length requested (len) or
* the remainder of the current stripe being written to.
* the remainder of the current stripe being written to.
*/
*/
*
oxlen
=
min_t
(
u64
,
*
plen
,
su
-
su_offset
);
*
oxlen
=
min_t
(
u64
,
len
,
su
-
su_offset
);
*
plen
=
*
oxlen
;
dout
(
" obj extent %llu~%llu
\n
"
,
*
oxoff
,
*
oxlen
);
dout
(
" obj extent %llu~%llu
\n
"
,
*
oxoff
,
*
oxlen
);
return
0
;
return
0
;
...
...
net/ceph/pagevec.c
浏览文件 @
4c7a08c8
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
/*
/*
* build a vector of user pages
* build a vector of user pages
*/
*/
struct
page
**
ceph_get_direct_page_vector
(
const
char
__user
*
data
,
struct
page
**
ceph_get_direct_page_vector
(
const
void
__user
*
data
,
int
num_pages
,
bool
write_page
)
int
num_pages
,
bool
write_page
)
{
{
struct
page
**
pages
;
struct
page
**
pages
;
...
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector);
...
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector);
* copy user data into a page vector
* copy user data into a page vector
*/
*/
int
ceph_copy_user_to_page_vector
(
struct
page
**
pages
,
int
ceph_copy_user_to_page_vector
(
struct
page
**
pages
,
const
char
__user
*
data
,
const
void
__user
*
data
,
loff_t
off
,
size_t
len
)
loff_t
off
,
size_t
len
)
{
{
int
i
=
0
;
int
i
=
0
;
...
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages,
...
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages,
}
}
EXPORT_SYMBOL
(
ceph_copy_user_to_page_vector
);
EXPORT_SYMBOL
(
ceph_copy_user_to_page_vector
);
int
ceph_copy_to_page_vector
(
struct
page
**
pages
,
void
ceph_copy_to_page_vector
(
struct
page
**
pages
,
const
char
*
data
,
const
void
*
data
,
loff_t
off
,
size_t
len
)
loff_t
off
,
size_t
len
)
{
{
int
i
=
0
;
int
i
=
0
;
size_t
po
=
off
&
~
PAGE_CACHE_MASK
;
size_t
po
=
off
&
~
PAGE_CACHE_MASK
;
size_t
left
=
len
;
size_t
left
=
len
;
size_t
l
;
while
(
left
>
0
)
{
while
(
left
>
0
)
{
l
=
min_t
(
size_t
,
PAGE_CACHE_SIZE
-
po
,
left
);
size_t
l
=
min_t
(
size_t
,
PAGE_CACHE_SIZE
-
po
,
left
);
memcpy
(
page_address
(
pages
[
i
])
+
po
,
data
,
l
);
memcpy
(
page_address
(
pages
[
i
])
+
po
,
data
,
l
);
data
+=
l
;
data
+=
l
;
left
-=
l
;
left
-=
l
;
...
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages,
...
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages,
i
++
;
i
++
;
}
}
}
}
return
len
;
}
}
EXPORT_SYMBOL
(
ceph_copy_to_page_vector
);
EXPORT_SYMBOL
(
ceph_copy_to_page_vector
);
int
ceph_copy_from_page_vector
(
struct
page
**
pages
,
void
ceph_copy_from_page_vector
(
struct
page
**
pages
,
char
*
data
,
void
*
data
,
loff_t
off
,
size_t
len
)
loff_t
off
,
size_t
len
)
{
{
int
i
=
0
;
int
i
=
0
;
size_t
po
=
off
&
~
PAGE_CACHE_MASK
;
size_t
po
=
off
&
~
PAGE_CACHE_MASK
;
size_t
left
=
len
;
size_t
left
=
len
;
size_t
l
;
while
(
left
>
0
)
{
while
(
left
>
0
)
{
l
=
min_t
(
size_t
,
PAGE_CACHE_SIZE
-
po
,
left
);
size_t
l
=
min_t
(
size_t
,
PAGE_CACHE_SIZE
-
po
,
left
);
memcpy
(
data
,
page_address
(
pages
[
i
])
+
po
,
l
);
memcpy
(
data
,
page_address
(
pages
[
i
])
+
po
,
l
);
data
+=
l
;
data
+=
l
;
left
-=
l
;
left
-=
l
;
...
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages,
...
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages,
i
++
;
i
++
;
}
}
}
}
return
len
;
}
}
EXPORT_SYMBOL
(
ceph_copy_from_page_vector
);
EXPORT_SYMBOL
(
ceph_copy_from_page_vector
);
...
@@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
...
@@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
* copy user data from a page vector into a user pointer
* copy user data from a page vector into a user pointer
*/
*/
int
ceph_copy_page_vector_to_user
(
struct
page
**
pages
,
int
ceph_copy_page_vector_to_user
(
struct
page
**
pages
,
char
__user
*
data
,
void
__user
*
data
,
loff_t
off
,
size_t
len
)
loff_t
off
,
size_t
len
)
{
{
int
i
=
0
;
int
i
=
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录