osd_client.h 14.7 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
S
Sage Weil 已提交
2 3 4
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H

5
#include <linux/bitrev.h>
S
Sage Weil 已提交
6
#include <linux/completion.h>
S
Sage Weil 已提交
7
#include <linux/kref.h>
S
Sage Weil 已提交
8 9
#include <linux/mempool.h>
#include <linux/rbtree.h>
10
#include <linux/refcount.h>
S
Sage Weil 已提交
11

12 13 14
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
15
#include <linux/ceph/msgpool.h>
16
#include <linux/ceph/auth.h>
17
#include <linux/ceph/pagelist.h>
S
Sage Weil 已提交
18 19 20 21 22 23 24 25 26

struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
struct ceph_osd_client;

/*
 * completion callback for async writepages
 */
27
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
S
Sage Weil 已提交
28

29 30
#define CEPH_HOMELESS_OSD	-1

S
Sage Weil 已提交
31 32
/* a given osd we're communicating with */
struct ceph_osd {
33
	refcount_t o_ref;
S
Sage Weil 已提交
34 35 36 37 38
	struct ceph_osd_client *o_osdc;
	int o_osd;
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
39
	struct rb_root o_requests;
40
	struct rb_root o_linger_requests;
41 42
	struct rb_root o_backoff_mappings;
	struct rb_root o_backoffs_by_id;
43
	struct list_head o_osd_lru;
44
	struct ceph_auth_handshake o_auth;
45
	unsigned long lru_ttl;
46
	struct list_head o_keepalive_item;
47
	struct mutex lock;
S
Sage Weil 已提交
48 49
};

50 51
#define CEPH_OSD_SLAB_OPS	2
#define CEPH_OSD_MAX_OPS	16
52

53
enum ceph_osd_data_type {
54
	CEPH_OSD_DATA_TYPE_NONE = 0,
55
	CEPH_OSD_DATA_TYPE_PAGES,
56
	CEPH_OSD_DATA_TYPE_PAGELIST,
57 58 59
#ifdef CONFIG_BLOCK
	CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
60
	CEPH_OSD_DATA_TYPE_BVECS,
61 62
};

63
struct ceph_osd_data {
64 65
	enum ceph_osd_data_type	type;
	union {
66 67
		struct {
			struct page	**pages;
68
			u64		length;
69 70 71 72
			u32		alignment;
			bool		pages_from_pool;
			bool		own_pages;
		};
73
		struct ceph_pagelist	*pagelist;
74
#ifdef CONFIG_BLOCK
75
		struct {
76 77
			struct ceph_bio_iter	bio_pos;
			u32			bio_length;
78
		};
79
#endif /* CONFIG_BLOCK */
80
		struct ceph_bvec_iter	bvec_pos;
81 82 83
	};
};

84 85
struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
86
	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
87
	u32 indata_len;   /* request */
88 89 90
	u32 outdata_len;  /* reply */
	s32 rval;

91
	union {
A
Alex Elder 已提交
92
		struct ceph_osd_data raw_data_in;
93 94 95 96
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
97
			struct ceph_osd_data osd_data;
98
		} extent;
99
		struct {
100 101
			u32 name_len;
			u32 value_len;
102 103 104 105
			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
			struct ceph_osd_data osd_data;
		} xattr;
106 107 108
		struct {
			const char *class_name;
			const char *method_name;
109
			struct ceph_osd_data request_info;
110
			struct ceph_osd_data request_data;
111
			struct ceph_osd_data response_data;
112 113
			__u8 class_len;
			__u8 method_len;
114
			u32 indata_len;
115 116 117
		} cls;
		struct {
			u64 cookie;
118 119
			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
			u32 gen;
120
		} watch;
121 122 123
		struct {
			struct ceph_osd_data request_data;
		} notify_ack;
124 125 126 127 128
		struct {
			u64 cookie;
			struct ceph_osd_data request_data;
			struct ceph_osd_data response_data;
		} notify;
129 130 131
		struct {
			struct ceph_osd_data response_data;
		} list_watchers;
132 133 134 135
		struct {
			u64 expected_object_size;
			u64 expected_write_size;
		} alloc_hint;
136 137 138
	};
};

139 140 141 142 143 144
struct ceph_osd_request_target {
	struct ceph_object_id base_oid;
	struct ceph_object_locator base_oloc;
	struct ceph_object_id target_oid;
	struct ceph_object_locator target_oloc;

145 146
	struct ceph_pg pgid;               /* last raw pg we mapped to */
	struct ceph_spg spgid;             /* last actual spg we mapped to */
147 148 149 150 151 152 153
	u32 pg_num;
	u32 pg_num_mask;
	struct ceph_osds acting;
	struct ceph_osds up;
	int size;
	int min_size;
	bool sort_bitwise;
154
	bool recovery_deletes;
155 156 157 158

	unsigned int flags;                /* CEPH_OSD_FLAG_* */
	bool paused;

159
	u32 epoch;
160 161
	u32 last_force_resend;

162 163 164
	int osd;
};

S
Sage Weil 已提交
165 166 167 168
/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
I
Ilya Dryomov 已提交
169
	struct rb_node  r_mc_node;          /* map check */
S
Sage Weil 已提交
170
	struct ceph_osd *r_osd;
171 172 173 174 175

	struct ceph_osd_request_target r_t;
#define r_base_oid	r_t.base_oid
#define r_base_oloc	r_t.base_oloc
#define r_flags		r_t.flags
S
Sage Weil 已提交
176 177 178

	struct ceph_msg  *r_request, *r_reply;
	u32               r_sent;      /* >0 if r_request is sending/sent */
179

180 181 182
	/* request osd ops array  */
	unsigned int		r_num_ops;

183
	int               r_result;
S
Sage Weil 已提交
184 185

	struct ceph_osd_client *r_osdc;
S
Sage Weil 已提交
186
	struct kref       r_kref;
S
Sage Weil 已提交
187
	bool              r_mempool;
I
Ilya Dryomov 已提交
188
	struct completion r_completion;       /* private to osd_client.c */
189
	ceph_osdc_callback_t r_callback;
S
Sage Weil 已提交
190 191 192
	struct list_head  r_unsafe_item;

	struct inode *r_inode;         	      /* for use by callbacks */
193
	void *r_priv;			      /* ditto */
S
Sage Weil 已提交
194

195 196 197 198 199
	/* set by submitter */
	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
	struct ceph_snap_context *r_snapc;    /* for writes */
	struct timespec r_mtime;              /* ditto */
	u64 r_data_offset;                    /* ditto */
200
	bool r_linger;                        /* don't resend on failure */
201
	bool r_abort_on_full;		      /* return ENOSPC when full */
S
Sage Weil 已提交
202

203 204
	/* internal */
	unsigned long r_stamp;                /* jiffies, send or check time */
205
	unsigned long r_start_stamp;          /* jiffies */
206
	int r_attempts;
I
Ilya Dryomov 已提交
207
	u32 r_map_dne_bound;
208 209

	struct ceph_osd_req_op r_ops[];
S
Sage Weil 已提交
210 211
};

212 213 214 215
struct ceph_request_redirect {
	struct ceph_object_locator oloc;
};

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
/*
 * osd request identifier
 *
 * caller name + incarnation# + tid to unique identify this request
 */
struct ceph_osd_reqid {
	struct ceph_entity_name name;
	__le64 tid;
	__le32 inc;
} __packed;

struct ceph_blkin_trace_info {
	__le64 trace_id;
	__le64 span_id;
	__le64 parent_span_id;
} __packed;

233 234 235 236 237
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
				 u64 notifier_id, void *data, size_t data_len);
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);

struct ceph_osd_linger_request {
238
	struct ceph_osd_client *osdc;
239 240
	u64 linger_id;
	bool committed;
241
	bool is_watch;                  /* watch or notify */
242 243 244 245 246

	struct ceph_osd *osd;
	struct ceph_osd_request *reg_req;
	struct ceph_osd_request *ping_req;
	unsigned long ping_sent;
247 248
	unsigned long watch_valid_thru;
	struct list_head pending_lworks;
249 250

	struct ceph_osd_request_target t;
I
Ilya Dryomov 已提交
251
	u32 map_dne_bound;
252 253 254

	struct timespec mtime;

255
	struct kref kref;
256 257 258
	struct mutex lock;
	struct rb_node node;            /* osd */
	struct rb_node osdc_node;       /* osdc */
I
Ilya Dryomov 已提交
259
	struct rb_node mc_node;         /* map check */
260 261 262
	struct list_head scan_item;

	struct completion reg_commit_wait;
263
	struct completion notify_finish_wait;
264
	int reg_commit_error;
265
	int notify_finish_error;
266 267 268
	int last_error;

	u32 register_gen;
269
	u64 notify_id;
270

271 272 273
	rados_watchcb2_t wcb;
	rados_watcherrcb_t errcb;
	void *data;
274 275 276

	struct page ***preply_pages;
	size_t *preply_len;
277 278
};

279 280 281 282 283 284
struct ceph_watch_item {
	struct ceph_entity_name name;
	u64 cookie;
	struct ceph_entity_addr addr;
};

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
struct ceph_spg_mapping {
	struct rb_node node;
	struct ceph_spg spgid;

	struct rb_root backoffs;
};

struct ceph_hobject_id {
	void *key;
	size_t key_len;
	void *oid;
	size_t oid_len;
	u64 snapid;
	u32 hash;
	u8 is_max;
	void *nspace;
	size_t nspace_len;
	s64 pool;

	/* cache */
	u32 hash_reverse_bits;
};

static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
{
	hoid->hash_reverse_bits = bitrev32(hoid->hash);
}

/*
 * PG-wide backoff: [begin, end)
 * per-object backoff: begin == end
 */
struct ceph_osd_backoff {
	struct rb_node spg_node;
	struct rb_node id_node;

	struct ceph_spg spgid;
	u64 id;
	struct ceph_hobject_id *begin;
	struct ceph_hobject_id *end;
};

327 328
#define CEPH_LINGER_ID_START	0xffff000000000000ULL

S
Sage Weil 已提交
329 330 331 332
struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
333
	struct rw_semaphore    lock;
S
Sage Weil 已提交
334 335

	struct rb_root         osds;          /* osds */
336
	struct list_head       osd_lru;       /* idle osds */
337
	spinlock_t             osd_lru_lock;
338
	u32		       epoch_barrier;
339 340
	struct ceph_osd        homeless_osd;
	atomic64_t             last_tid;      /* tid of last request */
341 342
	u64                    last_linger_id;
	struct rb_root         linger_requests; /* lingering requests */
I
Ilya Dryomov 已提交
343 344
	struct rb_root         map_checks;
	struct rb_root         linger_map_checks;
345 346
	atomic_t               num_requests;
	atomic_t               num_homeless;
S
Sage Weil 已提交
347
	struct delayed_work    timeout_work;
348
	struct delayed_work    osds_timeout_work;
349
#ifdef CONFIG_DEBUG_FS
S
Sage Weil 已提交
350
	struct dentry 	       *debugfs_file;
351
#endif
S
Sage Weil 已提交
352 353 354

	mempool_t              *req_mempool;

355
	struct ceph_msgpool	msgpool_op;
S
Sage Weil 已提交
356
	struct ceph_msgpool	msgpool_op_reply;
357 358

	struct workqueue_struct	*notify_wq;
S
Sage Weil 已提交
359 360
};

361 362 363 364 365
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
{
	return osdc->osdmap->flags & flag;
}

366 367 368
extern int ceph_osdc_setup(void);
extern void ceph_osdc_cleanup(void);

S
Sage Weil 已提交
369 370 371 372 373 374 375 376
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);

extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
				   struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
				 struct ceph_msg *msg);
377
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
S
Sage Weil 已提交
378

A
Alex Elder 已提交
379
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
380
			    unsigned int which, u16 opcode, u32 flags);
A
Alex Elder 已提交
381 382 383 384 385 386 387

extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);

388 389
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
390 391
					u64 offset, u64 length,
					u64 truncate_size, u32 truncate_seq);
392 393
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
					unsigned int which, u64 length);
394 395
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
				       unsigned int which, u64 offset_inc);
396 397 398

extern struct ceph_osd_data *osd_req_op_extent_osd_data(
					struct ceph_osd_request *osd_req,
399
					unsigned int which);
400 401

extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
402
					unsigned int which,
403 404 405 406
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
407
					unsigned int which,
408 409
					struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
410 411 412 413
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
				    unsigned int which,
				    struct ceph_bio_iter *bio_pos,
				    u32 bio_length);
414
#endif /* CONFIG_BLOCK */
415 416 417
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
					 unsigned int which,
					 struct ceph_bvec_iter *bvec_pos);
418

419 420 421
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
					unsigned int which,
					struct ceph_pagelist *pagelist);
422 423 424 425 426
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
427 428 429
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
				       unsigned int which,
				       struct bio_vec *bvecs, u32 bytes);
430
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
431
					unsigned int which,
432 433 434
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
435 436
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
437
					const char *class, const char *method);
438 439 440
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
				 u16 opcode, const char *name, const void *value,
				 size_t size, u8 cmp_op, u8 cmp_mode);
441 442 443 444
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
				       unsigned int which,
				       u64 expected_object_size,
				       u64 expected_write_size);
445

446 447
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
					       struct ceph_snap_context *snapc,
448
					       unsigned int num_ops,
449
					       bool use_mempool,
450
					       gfp_t gfp_flags);
451
int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
452

S
Sage Weil 已提交
453 454 455
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
456
				      u64 offset, u64 *len,
457 458
				      unsigned int which, int num_ops,
				      int opcode, int flags,
S
Sage Weil 已提交
459
				      struct ceph_snap_context *snapc,
460
				      u32 truncate_seq, u64 truncate_size,
461
				      bool use_mempool);
S
Sage Weil 已提交
462

463 464
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
465 466 467 468

extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
				   struct ceph_osd_request *req,
				   bool nofail);
469
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
470 471 472 473
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
				  struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);

474
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
475
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
476

477 478 479 480 481 482 483 484
int ceph_osdc_call(struct ceph_osd_client *osdc,
		   struct ceph_object_id *oid,
		   struct ceph_object_locator *oloc,
		   const char *class, const char *method,
		   unsigned int flags,
		   struct page *req_page, size_t req_len,
		   struct page *resp_page, size_t *resp_len);

S
Sage Weil 已提交
485 486 487 488 489
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
			       struct ceph_vino vino,
			       struct ceph_file_layout *layout,
			       u64 off, u64 *plen,
			       u32 truncate_seq, u64 truncate_size,
490 491
			       struct page **pages, int nr_pages,
			       int page_align);
S
Sage Weil 已提交
492 493 494 495 496 497 498 499

extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
				struct ceph_vino vino,
				struct ceph_file_layout *layout,
				struct ceph_snap_context *sc,
				u64 off, u64 len,
				u32 truncate_seq, u64 truncate_size,
				struct timespec *mtime,
500
				struct page **pages, int nr_pages);
S
Sage Weil 已提交
501

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
/* watch/notify */
struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc,
		struct ceph_object_id *oid,
		struct ceph_object_locator *oloc,
		rados_watchcb2_t wcb,
		rados_watcherrcb_t errcb,
		void *data);
int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
		      struct ceph_osd_linger_request *lreq);

int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
			 struct ceph_object_id *oid,
			 struct ceph_object_locator *oloc,
			 u64 notify_id,
			 u64 cookie,
			 void *payload,
			 size_t payload_len);
520 521 522 523 524 525 526 527
int ceph_osdc_notify(struct ceph_osd_client *osdc,
		     struct ceph_object_id *oid,
		     struct ceph_object_locator *oloc,
		     void *payload,
		     size_t payload_len,
		     u32 timeout,
		     struct page ***preply_pages,
		     size_t *preply_len);
528 529
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
			  struct ceph_osd_linger_request *lreq);
530 531 532 533 534
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
			    struct ceph_object_id *oid,
			    struct ceph_object_locator *oloc,
			    struct ceph_watch_item **watchers,
			    u32 *num_watchers);
S
Sage Weil 已提交
535 536
#endif