osd_client.h 15.0 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
S
Sage Weil 已提交
2 3 4
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H

5
#include <linux/bitrev.h>
S
Sage Weil 已提交
6
#include <linux/completion.h>
S
Sage Weil 已提交
7
#include <linux/kref.h>
S
Sage Weil 已提交
8 9
#include <linux/mempool.h>
#include <linux/rbtree.h>
10
#include <linux/refcount.h>
S
Sage Weil 已提交
11

12 13 14
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
15
#include <linux/ceph/msgpool.h>
16
#include <linux/ceph/auth.h>
17
#include <linux/ceph/pagelist.h>
S
Sage Weil 已提交
18 19 20 21 22 23 24 25 26

struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
struct ceph_osd_client;

/*
 * completion callback for async writepages
 */
27
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
S
Sage Weil 已提交
28

29 30
#define CEPH_HOMELESS_OSD	-1

S
Sage Weil 已提交
31 32
/* a given osd we're communicating with */
struct ceph_osd {
33
	refcount_t o_ref;
S
Sage Weil 已提交
34 35 36 37 38
	struct ceph_osd_client *o_osdc;
	int o_osd;
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
39
	struct rb_root o_requests;
40
	struct rb_root o_linger_requests;
41 42
	struct rb_root o_backoff_mappings;
	struct rb_root o_backoffs_by_id;
43
	struct list_head o_osd_lru;
44
	struct ceph_auth_handshake o_auth;
45
	unsigned long lru_ttl;
46
	struct list_head o_keepalive_item;
47
	struct mutex lock;
S
Sage Weil 已提交
48 49
};

50 51
#define CEPH_OSD_SLAB_OPS	2
#define CEPH_OSD_MAX_OPS	16
52

53
enum ceph_osd_data_type {
54
	CEPH_OSD_DATA_TYPE_NONE = 0,
55
	CEPH_OSD_DATA_TYPE_PAGES,
56
	CEPH_OSD_DATA_TYPE_PAGELIST,
57 58 59
#ifdef CONFIG_BLOCK
	CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
60
	CEPH_OSD_DATA_TYPE_BVECS,
61 62
};

63
struct ceph_osd_data {
64 65
	enum ceph_osd_data_type	type;
	union {
66 67
		struct {
			struct page	**pages;
68
			u64		length;
69 70 71 72
			u32		alignment;
			bool		pages_from_pool;
			bool		own_pages;
		};
73
		struct ceph_pagelist	*pagelist;
74
#ifdef CONFIG_BLOCK
75
		struct {
76 77
			struct ceph_bio_iter	bio_pos;
			u32			bio_length;
78
		};
79
#endif /* CONFIG_BLOCK */
80 81 82 83
		struct {
			struct ceph_bvec_iter	bvec_pos;
			u32			num_bvecs;
		};
84 85 86
	};
};

87 88
struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
89
	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
90
	u32 indata_len;   /* request */
91 92 93
	u32 outdata_len;  /* reply */
	s32 rval;

94
	union {
A
Alex Elder 已提交
95
		struct ceph_osd_data raw_data_in;
96 97 98 99
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
100
			struct ceph_osd_data osd_data;
101
		} extent;
102
		struct {
103 104
			u32 name_len;
			u32 value_len;
105 106 107 108
			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
			struct ceph_osd_data osd_data;
		} xattr;
109 110 111
		struct {
			const char *class_name;
			const char *method_name;
112
			struct ceph_osd_data request_info;
113
			struct ceph_osd_data request_data;
114
			struct ceph_osd_data response_data;
115 116
			__u8 class_len;
			__u8 method_len;
117
			u32 indata_len;
118 119 120
		} cls;
		struct {
			u64 cookie;
121 122
			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
			u32 gen;
123
		} watch;
124 125 126
		struct {
			struct ceph_osd_data request_data;
		} notify_ack;
127 128 129 130 131
		struct {
			u64 cookie;
			struct ceph_osd_data request_data;
			struct ceph_osd_data response_data;
		} notify;
132 133 134
		struct {
			struct ceph_osd_data response_data;
		} list_watchers;
135 136 137 138
		struct {
			u64 expected_object_size;
			u64 expected_write_size;
		} alloc_hint;
139 140 141
	};
};

142 143 144 145 146 147
struct ceph_osd_request_target {
	struct ceph_object_id base_oid;
	struct ceph_object_locator base_oloc;
	struct ceph_object_id target_oid;
	struct ceph_object_locator target_oloc;

148 149
	struct ceph_pg pgid;               /* last raw pg we mapped to */
	struct ceph_spg spgid;             /* last actual spg we mapped to */
150 151 152 153 154 155 156
	u32 pg_num;
	u32 pg_num_mask;
	struct ceph_osds acting;
	struct ceph_osds up;
	int size;
	int min_size;
	bool sort_bitwise;
157
	bool recovery_deletes;
158 159 160 161

	unsigned int flags;                /* CEPH_OSD_FLAG_* */
	bool paused;

162
	u32 epoch;
163 164
	u32 last_force_resend;

165 166 167
	int osd;
};

S
Sage Weil 已提交
168 169 170 171
/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
I
Ilya Dryomov 已提交
172
	struct rb_node  r_mc_node;          /* map check */
S
Sage Weil 已提交
173
	struct ceph_osd *r_osd;
174 175 176 177 178

	struct ceph_osd_request_target r_t;
#define r_base_oid	r_t.base_oid
#define r_base_oloc	r_t.base_oloc
#define r_flags		r_t.flags
S
Sage Weil 已提交
179 180 181

	struct ceph_msg  *r_request, *r_reply;
	u32               r_sent;      /* >0 if r_request is sending/sent */
182

183 184 185
	/* request osd ops array  */
	unsigned int		r_num_ops;

186
	int               r_result;
S
Sage Weil 已提交
187 188

	struct ceph_osd_client *r_osdc;
S
Sage Weil 已提交
189
	struct kref       r_kref;
S
Sage Weil 已提交
190
	bool              r_mempool;
I
Ilya Dryomov 已提交
191
	struct completion r_completion;       /* private to osd_client.c */
192
	ceph_osdc_callback_t r_callback;
S
Sage Weil 已提交
193 194 195
	struct list_head  r_unsafe_item;

	struct inode *r_inode;         	      /* for use by callbacks */
196
	void *r_priv;			      /* ditto */
S
Sage Weil 已提交
197

198 199 200 201 202
	/* set by submitter */
	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
	struct ceph_snap_context *r_snapc;    /* for writes */
	struct timespec r_mtime;              /* ditto */
	u64 r_data_offset;                    /* ditto */
203
	bool r_linger;                        /* don't resend on failure */
204
	bool r_abort_on_full;		      /* return ENOSPC when full */
S
Sage Weil 已提交
205

206 207
	/* internal */
	unsigned long r_stamp;                /* jiffies, send or check time */
208
	unsigned long r_start_stamp;          /* jiffies */
209
	int r_attempts;
I
Ilya Dryomov 已提交
210
	u32 r_map_dne_bound;
211 212

	struct ceph_osd_req_op r_ops[];
S
Sage Weil 已提交
213 214
};

215 216 217 218
struct ceph_request_redirect {
	struct ceph_object_locator oloc;
};

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
/*
 * osd request identifier
 *
 * caller name + incarnation# + tid to unique identify this request
 */
struct ceph_osd_reqid {
	struct ceph_entity_name name;
	__le64 tid;
	__le32 inc;
} __packed;

struct ceph_blkin_trace_info {
	__le64 trace_id;
	__le64 span_id;
	__le64 parent_span_id;
} __packed;

236 237 238 239 240
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
				 u64 notifier_id, void *data, size_t data_len);
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);

struct ceph_osd_linger_request {
241
	struct ceph_osd_client *osdc;
242 243
	u64 linger_id;
	bool committed;
244
	bool is_watch;                  /* watch or notify */
245 246 247 248 249

	struct ceph_osd *osd;
	struct ceph_osd_request *reg_req;
	struct ceph_osd_request *ping_req;
	unsigned long ping_sent;
250 251
	unsigned long watch_valid_thru;
	struct list_head pending_lworks;
252 253

	struct ceph_osd_request_target t;
I
Ilya Dryomov 已提交
254
	u32 map_dne_bound;
255 256 257

	struct timespec mtime;

258
	struct kref kref;
259 260 261
	struct mutex lock;
	struct rb_node node;            /* osd */
	struct rb_node osdc_node;       /* osdc */
I
Ilya Dryomov 已提交
262
	struct rb_node mc_node;         /* map check */
263 264 265
	struct list_head scan_item;

	struct completion reg_commit_wait;
266
	struct completion notify_finish_wait;
267
	int reg_commit_error;
268
	int notify_finish_error;
269 270 271
	int last_error;

	u32 register_gen;
272
	u64 notify_id;
273

274 275 276
	rados_watchcb2_t wcb;
	rados_watcherrcb_t errcb;
	void *data;
277 278 279

	struct page ***preply_pages;
	size_t *preply_len;
280 281
};

282 283 284 285 286 287
struct ceph_watch_item {
	struct ceph_entity_name name;
	u64 cookie;
	struct ceph_entity_addr addr;
};

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
struct ceph_spg_mapping {
	struct rb_node node;
	struct ceph_spg spgid;

	struct rb_root backoffs;
};

struct ceph_hobject_id {
	void *key;
	size_t key_len;
	void *oid;
	size_t oid_len;
	u64 snapid;
	u32 hash;
	u8 is_max;
	void *nspace;
	size_t nspace_len;
	s64 pool;

	/* cache */
	u32 hash_reverse_bits;
};

static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
{
	hoid->hash_reverse_bits = bitrev32(hoid->hash);
}

/*
 * PG-wide backoff: [begin, end)
 * per-object backoff: begin == end
 */
struct ceph_osd_backoff {
	struct rb_node spg_node;
	struct rb_node id_node;

	struct ceph_spg spgid;
	u64 id;
	struct ceph_hobject_id *begin;
	struct ceph_hobject_id *end;
};

330 331
#define CEPH_LINGER_ID_START	0xffff000000000000ULL

S
Sage Weil 已提交
332 333 334 335
struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
336
	struct rw_semaphore    lock;
S
Sage Weil 已提交
337 338

	struct rb_root         osds;          /* osds */
339
	struct list_head       osd_lru;       /* idle osds */
340
	spinlock_t             osd_lru_lock;
341
	u32		       epoch_barrier;
342 343
	struct ceph_osd        homeless_osd;
	atomic64_t             last_tid;      /* tid of last request */
344 345
	u64                    last_linger_id;
	struct rb_root         linger_requests; /* lingering requests */
I
Ilya Dryomov 已提交
346 347
	struct rb_root         map_checks;
	struct rb_root         linger_map_checks;
348 349
	atomic_t               num_requests;
	atomic_t               num_homeless;
350
	int                    abort_err;
S
Sage Weil 已提交
351
	struct delayed_work    timeout_work;
352
	struct delayed_work    osds_timeout_work;
353
#ifdef CONFIG_DEBUG_FS
S
Sage Weil 已提交
354
	struct dentry 	       *debugfs_file;
355
#endif
S
Sage Weil 已提交
356 357 358

	mempool_t              *req_mempool;

359
	struct ceph_msgpool	msgpool_op;
S
Sage Weil 已提交
360
	struct ceph_msgpool	msgpool_op_reply;
361 362

	struct workqueue_struct	*notify_wq;
S
Sage Weil 已提交
363 364
};

365 366 367 368 369
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
{
	return osdc->osdmap->flags & flag;
}

370 371 372
extern int ceph_osdc_setup(void);
extern void ceph_osdc_cleanup(void);

S
Sage Weil 已提交
373 374 375 376 377 378 379 380
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);

extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
				   struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
				 struct ceph_msg *msg);
381
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
382
void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
S
Sage Weil 已提交
383

A
Alex Elder 已提交
384
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
385
			    unsigned int which, u16 opcode, u32 flags);
A
Alex Elder 已提交
386 387 388 389 390 391 392

extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);

393 394
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
395 396
					u64 offset, u64 length,
					u64 truncate_size, u32 truncate_seq);
397 398
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
					unsigned int which, u64 length);
399 400
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
				       unsigned int which, u64 offset_inc);
401 402 403

extern struct ceph_osd_data *osd_req_op_extent_osd_data(
					struct ceph_osd_request *osd_req,
404
					unsigned int which);
405 406

extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
407
					unsigned int which,
408 409 410 411
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
412
					unsigned int which,
413 414
					struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
415 416 417 418
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
				    unsigned int which,
				    struct ceph_bio_iter *bio_pos,
				    u32 bio_length);
419
#endif /* CONFIG_BLOCK */
420 421 422 423
void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
				      unsigned int which,
				      struct bio_vec *bvecs, u32 num_bvecs,
				      u32 bytes);
424 425 426
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
					 unsigned int which,
					 struct ceph_bvec_iter *bvec_pos);
427

428 429 430
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
					unsigned int which,
					struct ceph_pagelist *pagelist);
431 432 433 434 435
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
436 437
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
				       unsigned int which,
438 439
				       struct bio_vec *bvecs, u32 num_bvecs,
				       u32 bytes);
440
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
441
					unsigned int which,
442 443 444
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
445
extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
446
					unsigned int which, u16 opcode,
447
					const char *class, const char *method);
448 449 450
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
				 u16 opcode, const char *name, const void *value,
				 size_t size, u8 cmp_op, u8 cmp_mode);
451 452 453 454
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
				       unsigned int which,
				       u64 expected_object_size,
				       u64 expected_write_size);
455

456 457
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
					       struct ceph_snap_context *snapc,
458
					       unsigned int num_ops,
459
					       bool use_mempool,
460
					       gfp_t gfp_flags);
461
int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
462

S
Sage Weil 已提交
463 464 465
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
466
				      u64 offset, u64 *len,
467 468
				      unsigned int which, int num_ops,
				      int opcode, int flags,
S
Sage Weil 已提交
469
				      struct ceph_snap_context *snapc,
470
				      u32 truncate_seq, u64 truncate_size,
471
				      bool use_mempool);
S
Sage Weil 已提交
472

473 474
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
475 476 477 478

extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
				   struct ceph_osd_request *req,
				   bool nofail);
479
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
480 481 482 483
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
				  struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);

484
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
485
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
486

487 488 489 490 491 492 493 494
int ceph_osdc_call(struct ceph_osd_client *osdc,
		   struct ceph_object_id *oid,
		   struct ceph_object_locator *oloc,
		   const char *class, const char *method,
		   unsigned int flags,
		   struct page *req_page, size_t req_len,
		   struct page *resp_page, size_t *resp_len);

S
Sage Weil 已提交
495 496 497 498 499
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
			       struct ceph_vino vino,
			       struct ceph_file_layout *layout,
			       u64 off, u64 *plen,
			       u32 truncate_seq, u64 truncate_size,
500 501
			       struct page **pages, int nr_pages,
			       int page_align);
S
Sage Weil 已提交
502 503 504 505 506 507 508 509

extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
				struct ceph_vino vino,
				struct ceph_file_layout *layout,
				struct ceph_snap_context *sc,
				u64 off, u64 len,
				u32 truncate_seq, u64 truncate_size,
				struct timespec *mtime,
510
				struct page **pages, int nr_pages);
S
Sage Weil 已提交
511

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
/* watch/notify */
struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc,
		struct ceph_object_id *oid,
		struct ceph_object_locator *oloc,
		rados_watchcb2_t wcb,
		rados_watcherrcb_t errcb,
		void *data);
int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
		      struct ceph_osd_linger_request *lreq);

int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
			 struct ceph_object_id *oid,
			 struct ceph_object_locator *oloc,
			 u64 notify_id,
			 u64 cookie,
			 void *payload,
			 size_t payload_len);
530 531 532 533 534 535 536 537
int ceph_osdc_notify(struct ceph_osd_client *osdc,
		     struct ceph_object_id *oid,
		     struct ceph_object_locator *oloc,
		     void *payload,
		     size_t payload_len,
		     u32 timeout,
		     struct page ***preply_pages,
		     size_t *preply_len);
538 539
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
			  struct ceph_osd_linger_request *lreq);
540 541 542 543 544
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
			    struct ceph_object_id *oid,
			    struct ceph_object_locator *oloc,
			    struct ceph_watch_item **watchers,
			    u32 *num_watchers);
S
Sage Weil 已提交
545 546
#endif