osd_client.h 15.1 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
S
Sage Weil 已提交
2 3 4
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H

5
#include <linux/bitrev.h>
S
Sage Weil 已提交
6
#include <linux/completion.h>
S
Sage Weil 已提交
7
#include <linux/kref.h>
S
Sage Weil 已提交
8 9
#include <linux/mempool.h>
#include <linux/rbtree.h>
10
#include <linux/refcount.h>
S
Sage Weil 已提交
11

12 13 14
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
15
#include <linux/ceph/msgpool.h>
16
#include <linux/ceph/auth.h>
17
#include <linux/ceph/pagelist.h>
S
Sage Weil 已提交
18 19 20 21 22 23 24 25 26

struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
struct ceph_osd_client;

/*
 * completion callback for async writepages
 */
27
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
S
Sage Weil 已提交
28

29 30
#define CEPH_HOMELESS_OSD	-1

S
Sage Weil 已提交
31 32
/* a given osd we're communicating with */
struct ceph_osd {
33
	refcount_t o_ref;
S
Sage Weil 已提交
34 35 36 37 38
	struct ceph_osd_client *o_osdc;
	int o_osd;
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
39
	struct rb_root o_requests;
40
	struct rb_root o_linger_requests;
41 42
	struct rb_root o_backoff_mappings;
	struct rb_root o_backoffs_by_id;
43
	struct list_head o_osd_lru;
44
	struct ceph_auth_handshake o_auth;
45
	unsigned long lru_ttl;
46
	struct list_head o_keepalive_item;
47
	struct mutex lock;
S
Sage Weil 已提交
48 49
};

50 51
#define CEPH_OSD_SLAB_OPS	2
#define CEPH_OSD_MAX_OPS	16
52

53
enum ceph_osd_data_type {
54
	CEPH_OSD_DATA_TYPE_NONE = 0,
55
	CEPH_OSD_DATA_TYPE_PAGES,
56
	CEPH_OSD_DATA_TYPE_PAGELIST,
57 58 59
#ifdef CONFIG_BLOCK
	CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
60
	CEPH_OSD_DATA_TYPE_BVECS,
61 62
};

63
struct ceph_osd_data {
64 65
	enum ceph_osd_data_type	type;
	union {
66 67
		struct {
			struct page	**pages;
68
			u64		length;
69 70 71 72
			u32		alignment;
			bool		pages_from_pool;
			bool		own_pages;
		};
73
		struct ceph_pagelist	*pagelist;
74
#ifdef CONFIG_BLOCK
75
		struct {
76 77
			struct ceph_bio_iter	bio_pos;
			u32			bio_length;
78
		};
79
#endif /* CONFIG_BLOCK */
80 81 82 83
		struct {
			struct ceph_bvec_iter	bvec_pos;
			u32			num_bvecs;
		};
84 85 86
	};
};

87 88
struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
89
	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
90
	u32 indata_len;   /* request */
91 92 93
	u32 outdata_len;  /* reply */
	s32 rval;

94
	union {
A
Alex Elder 已提交
95
		struct ceph_osd_data raw_data_in;
96 97 98 99
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
100
			struct ceph_osd_data osd_data;
101
		} extent;
102
		struct {
103 104
			u32 name_len;
			u32 value_len;
105 106 107 108
			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
			struct ceph_osd_data osd_data;
		} xattr;
109 110 111
		struct {
			const char *class_name;
			const char *method_name;
112
			struct ceph_osd_data request_info;
113
			struct ceph_osd_data request_data;
114
			struct ceph_osd_data response_data;
115 116
			__u8 class_len;
			__u8 method_len;
117
			u32 indata_len;
118 119 120
		} cls;
		struct {
			u64 cookie;
121 122
			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
			u32 gen;
123
		} watch;
124 125 126
		struct {
			struct ceph_osd_data request_data;
		} notify_ack;
127 128 129 130 131
		struct {
			u64 cookie;
			struct ceph_osd_data request_data;
			struct ceph_osd_data response_data;
		} notify;
132 133 134
		struct {
			struct ceph_osd_data response_data;
		} list_watchers;
135 136 137 138
		struct {
			u64 expected_object_size;
			u64 expected_write_size;
		} alloc_hint;
139 140 141
	};
};

142 143 144 145 146 147
struct ceph_osd_request_target {
	struct ceph_object_id base_oid;
	struct ceph_object_locator base_oloc;
	struct ceph_object_id target_oid;
	struct ceph_object_locator target_oloc;

148 149
	struct ceph_pg pgid;               /* last raw pg we mapped to */
	struct ceph_spg spgid;             /* last actual spg we mapped to */
150 151 152 153 154 155 156
	u32 pg_num;
	u32 pg_num_mask;
	struct ceph_osds acting;
	struct ceph_osds up;
	int size;
	int min_size;
	bool sort_bitwise;
157
	bool recovery_deletes;
158 159 160 161

	unsigned int flags;                /* CEPH_OSD_FLAG_* */
	bool paused;

162
	u32 epoch;
163 164
	u32 last_force_resend;

165 166 167
	int osd;
};

S
Sage Weil 已提交
168 169 170 171
/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
I
Ilya Dryomov 已提交
172
	struct rb_node  r_mc_node;          /* map check */
173
	struct work_struct r_complete_work;
S
Sage Weil 已提交
174
	struct ceph_osd *r_osd;
175 176 177 178 179

	struct ceph_osd_request_target r_t;
#define r_base_oid	r_t.base_oid
#define r_base_oloc	r_t.base_oloc
#define r_flags		r_t.flags
S
Sage Weil 已提交
180 181 182

	struct ceph_msg  *r_request, *r_reply;
	u32               r_sent;      /* >0 if r_request is sending/sent */
183

184 185 186
	/* request osd ops array  */
	unsigned int		r_num_ops;

187
	int               r_result;
S
Sage Weil 已提交
188 189

	struct ceph_osd_client *r_osdc;
S
Sage Weil 已提交
190
	struct kref       r_kref;
S
Sage Weil 已提交
191
	bool              r_mempool;
I
Ilya Dryomov 已提交
192
	struct completion r_completion;       /* private to osd_client.c */
193
	ceph_osdc_callback_t r_callback;
S
Sage Weil 已提交
194 195 196
	struct list_head  r_unsafe_item;

	struct inode *r_inode;         	      /* for use by callbacks */
197
	void *r_priv;			      /* ditto */
S
Sage Weil 已提交
198

199 200 201 202 203
	/* set by submitter */
	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
	struct ceph_snap_context *r_snapc;    /* for writes */
	struct timespec r_mtime;              /* ditto */
	u64 r_data_offset;                    /* ditto */
204
	bool r_linger;                        /* don't resend on failure */
205
	bool r_abort_on_full;		      /* return ENOSPC when full */
S
Sage Weil 已提交
206

207 208
	/* internal */
	unsigned long r_stamp;                /* jiffies, send or check time */
209
	unsigned long r_start_stamp;          /* jiffies */
210
	int r_attempts;
I
Ilya Dryomov 已提交
211
	u32 r_map_dne_bound;
212 213

	struct ceph_osd_req_op r_ops[];
S
Sage Weil 已提交
214 215
};

216 217 218 219
struct ceph_request_redirect {
	struct ceph_object_locator oloc;
};

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
/*
 * osd request identifier
 *
 * caller name + incarnation# + tid to unique identify this request
 */
struct ceph_osd_reqid {
	struct ceph_entity_name name;
	__le64 tid;
	__le32 inc;
} __packed;

struct ceph_blkin_trace_info {
	__le64 trace_id;
	__le64 span_id;
	__le64 parent_span_id;
} __packed;

237 238 239 240 241
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
				 u64 notifier_id, void *data, size_t data_len);
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);

struct ceph_osd_linger_request {
242
	struct ceph_osd_client *osdc;
243 244
	u64 linger_id;
	bool committed;
245
	bool is_watch;                  /* watch or notify */
246 247 248 249 250

	struct ceph_osd *osd;
	struct ceph_osd_request *reg_req;
	struct ceph_osd_request *ping_req;
	unsigned long ping_sent;
251 252
	unsigned long watch_valid_thru;
	struct list_head pending_lworks;
253 254

	struct ceph_osd_request_target t;
I
Ilya Dryomov 已提交
255
	u32 map_dne_bound;
256 257 258

	struct timespec mtime;

259
	struct kref kref;
260 261 262
	struct mutex lock;
	struct rb_node node;            /* osd */
	struct rb_node osdc_node;       /* osdc */
I
Ilya Dryomov 已提交
263
	struct rb_node mc_node;         /* map check */
264 265 266
	struct list_head scan_item;

	struct completion reg_commit_wait;
267
	struct completion notify_finish_wait;
268
	int reg_commit_error;
269
	int notify_finish_error;
270 271 272
	int last_error;

	u32 register_gen;
273
	u64 notify_id;
274

275 276 277
	rados_watchcb2_t wcb;
	rados_watcherrcb_t errcb;
	void *data;
278 279 280

	struct page ***preply_pages;
	size_t *preply_len;
281 282
};

283 284 285 286 287 288
struct ceph_watch_item {
	struct ceph_entity_name name;
	u64 cookie;
	struct ceph_entity_addr addr;
};

289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
struct ceph_spg_mapping {
	struct rb_node node;
	struct ceph_spg spgid;

	struct rb_root backoffs;
};

struct ceph_hobject_id {
	void *key;
	size_t key_len;
	void *oid;
	size_t oid_len;
	u64 snapid;
	u32 hash;
	u8 is_max;
	void *nspace;
	size_t nspace_len;
	s64 pool;

	/* cache */
	u32 hash_reverse_bits;
};

static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
{
	hoid->hash_reverse_bits = bitrev32(hoid->hash);
}

/*
 * PG-wide backoff: [begin, end)
 * per-object backoff: begin == end
 */
struct ceph_osd_backoff {
	struct rb_node spg_node;
	struct rb_node id_node;

	struct ceph_spg spgid;
	u64 id;
	struct ceph_hobject_id *begin;
	struct ceph_hobject_id *end;
};

331 332
#define CEPH_LINGER_ID_START	0xffff000000000000ULL

S
Sage Weil 已提交
333 334 335 336
struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
337
	struct rw_semaphore    lock;
S
Sage Weil 已提交
338 339

	struct rb_root         osds;          /* osds */
340
	struct list_head       osd_lru;       /* idle osds */
341
	spinlock_t             osd_lru_lock;
342
	u32		       epoch_barrier;
343 344
	struct ceph_osd        homeless_osd;
	atomic64_t             last_tid;      /* tid of last request */
345 346
	u64                    last_linger_id;
	struct rb_root         linger_requests; /* lingering requests */
I
Ilya Dryomov 已提交
347 348
	struct rb_root         map_checks;
	struct rb_root         linger_map_checks;
349 350
	atomic_t               num_requests;
	atomic_t               num_homeless;
351
	int                    abort_err;
S
Sage Weil 已提交
352
	struct delayed_work    timeout_work;
353
	struct delayed_work    osds_timeout_work;
354
#ifdef CONFIG_DEBUG_FS
S
Sage Weil 已提交
355
	struct dentry 	       *debugfs_file;
356
#endif
S
Sage Weil 已提交
357 358 359

	mempool_t              *req_mempool;

360
	struct ceph_msgpool	msgpool_op;
S
Sage Weil 已提交
361
	struct ceph_msgpool	msgpool_op_reply;
362 363

	struct workqueue_struct	*notify_wq;
364
	struct workqueue_struct	*completion_wq;
S
Sage Weil 已提交
365 366
};

367 368 369 370 371
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
{
	return osdc->osdmap->flags & flag;
}

372 373 374
extern int ceph_osdc_setup(void);
extern void ceph_osdc_cleanup(void);

S
Sage Weil 已提交
375 376 377 378 379 380 381 382
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);

extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
				   struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
				 struct ceph_msg *msg);
383
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
384
void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
S
Sage Weil 已提交
385

A
Alex Elder 已提交
386
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
387
			    unsigned int which, u16 opcode, u32 flags);
A
Alex Elder 已提交
388 389 390 391 392 393 394

extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);

395 396
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
397 398
					u64 offset, u64 length,
					u64 truncate_size, u32 truncate_seq);
399 400
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
					unsigned int which, u64 length);
401 402
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
				       unsigned int which, u64 offset_inc);
403 404 405

extern struct ceph_osd_data *osd_req_op_extent_osd_data(
					struct ceph_osd_request *osd_req,
406
					unsigned int which);
407 408

extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
409
					unsigned int which,
410 411 412 413
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
414
					unsigned int which,
415 416
					struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
417 418 419 420
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
				    unsigned int which,
				    struct ceph_bio_iter *bio_pos,
				    u32 bio_length);
421
#endif /* CONFIG_BLOCK */
422 423 424 425
void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
				      unsigned int which,
				      struct bio_vec *bvecs, u32 num_bvecs,
				      u32 bytes);
426 427 428
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
					 unsigned int which,
					 struct ceph_bvec_iter *bvec_pos);
429

430 431 432
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
					unsigned int which,
					struct ceph_pagelist *pagelist);
433 434 435 436 437
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
438 439
void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
				       unsigned int which,
440 441
				       struct bio_vec *bvecs, u32 num_bvecs,
				       u32 bytes);
442
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
443
					unsigned int which,
444 445 446
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
447
extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
448
					unsigned int which, u16 opcode,
449
					const char *class, const char *method);
450 451 452
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
				 u16 opcode, const char *name, const void *value,
				 size_t size, u8 cmp_op, u8 cmp_mode);
453 454 455 456
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
				       unsigned int which,
				       u64 expected_object_size,
				       u64 expected_write_size);
457

458 459
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
					       struct ceph_snap_context *snapc,
460
					       unsigned int num_ops,
461
					       bool use_mempool,
462
					       gfp_t gfp_flags);
463
int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
464

S
Sage Weil 已提交
465 466 467
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
468
				      u64 offset, u64 *len,
469 470
				      unsigned int which, int num_ops,
				      int opcode, int flags,
S
Sage Weil 已提交
471
				      struct ceph_snap_context *snapc,
472
				      u32 truncate_seq, u64 truncate_size,
473
				      bool use_mempool);
S
Sage Weil 已提交
474

475 476
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
477 478 479 480

extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
				   struct ceph_osd_request *req,
				   bool nofail);
481
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
482 483 484 485
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
				  struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);

486
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
487
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
488

489 490 491 492 493 494 495 496
int ceph_osdc_call(struct ceph_osd_client *osdc,
		   struct ceph_object_id *oid,
		   struct ceph_object_locator *oloc,
		   const char *class, const char *method,
		   unsigned int flags,
		   struct page *req_page, size_t req_len,
		   struct page *resp_page, size_t *resp_len);

S
Sage Weil 已提交
497 498 499 500 501
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
			       struct ceph_vino vino,
			       struct ceph_file_layout *layout,
			       u64 off, u64 *plen,
			       u32 truncate_seq, u64 truncate_size,
502 503
			       struct page **pages, int nr_pages,
			       int page_align);
S
Sage Weil 已提交
504 505 506 507 508 509 510 511

extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
				struct ceph_vino vino,
				struct ceph_file_layout *layout,
				struct ceph_snap_context *sc,
				u64 off, u64 len,
				u32 truncate_seq, u64 truncate_size,
				struct timespec *mtime,
512
				struct page **pages, int nr_pages);
S
Sage Weil 已提交
513

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
/* watch/notify */
struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc,
		struct ceph_object_id *oid,
		struct ceph_object_locator *oloc,
		rados_watchcb2_t wcb,
		rados_watcherrcb_t errcb,
		void *data);
int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
		      struct ceph_osd_linger_request *lreq);

int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
			 struct ceph_object_id *oid,
			 struct ceph_object_locator *oloc,
			 u64 notify_id,
			 u64 cookie,
			 void *payload,
			 size_t payload_len);
532 533 534 535 536 537 538 539
int ceph_osdc_notify(struct ceph_osd_client *osdc,
		     struct ceph_object_id *oid,
		     struct ceph_object_locator *oloc,
		     void *payload,
		     size_t payload_len,
		     u32 timeout,
		     struct page ***preply_pages,
		     size_t *preply_len);
540 541
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
			  struct ceph_osd_linger_request *lreq);
542 543 544 545 546
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
			    struct ceph_object_id *oid,
			    struct ceph_object_locator *oloc,
			    struct ceph_watch_item **watchers,
			    u32 *num_watchers);
S
Sage Weil 已提交
547 548
#endif