osd_client.h 14.3 KB
Newer Older
S
Sage Weil 已提交
1 2 3
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H

4
#include <linux/bitrev.h>
S
Sage Weil 已提交
5
#include <linux/completion.h>
S
Sage Weil 已提交
6
#include <linux/kref.h>
S
Sage Weil 已提交
7 8
#include <linux/mempool.h>
#include <linux/rbtree.h>
9
#include <linux/refcount.h>
S
Sage Weil 已提交
10

11 12 13
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
#include <linux/ceph/messenger.h>
14
#include <linux/ceph/msgpool.h>
15
#include <linux/ceph/auth.h>
16
#include <linux/ceph/pagelist.h>
S
Sage Weil 已提交
17 18 19 20 21 22 23 24 25

struct ceph_msg;
struct ceph_snap_context;
struct ceph_osd_request;
struct ceph_osd_client;

/*
 * completion callback for async writepages
 */
26
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
S
Sage Weil 已提交
27

28 29
#define CEPH_HOMELESS_OSD	-1

S
Sage Weil 已提交
30 31
/* a given osd we're communicating with */
struct ceph_osd {
32
	refcount_t o_ref;
S
Sage Weil 已提交
33 34 35 36 37
	struct ceph_osd_client *o_osdc;
	int o_osd;
	int o_incarnation;
	struct rb_node o_node;
	struct ceph_connection o_con;
38
	struct rb_root o_requests;
39
	struct rb_root o_linger_requests;
40 41
	struct rb_root o_backoff_mappings;
	struct rb_root o_backoffs_by_id;
42
	struct list_head o_osd_lru;
43
	struct ceph_auth_handshake o_auth;
44
	unsigned long lru_ttl;
45
	struct list_head o_keepalive_item;
46
	struct mutex lock;
S
Sage Weil 已提交
47 48
};

49 50
#define CEPH_OSD_SLAB_OPS	2
#define CEPH_OSD_MAX_OPS	16
51

52
enum ceph_osd_data_type {
53
	CEPH_OSD_DATA_TYPE_NONE = 0,
54
	CEPH_OSD_DATA_TYPE_PAGES,
55
	CEPH_OSD_DATA_TYPE_PAGELIST,
56 57 58 59 60
#ifdef CONFIG_BLOCK
	CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
};

61
struct ceph_osd_data {
62 63
	enum ceph_osd_data_type	type;
	union {
64 65
		struct {
			struct page	**pages;
66
			u64		length;
67 68 69 70
			u32		alignment;
			bool		pages_from_pool;
			bool		own_pages;
		};
71
		struct ceph_pagelist	*pagelist;
72
#ifdef CONFIG_BLOCK
73 74 75 76
		struct {
			struct bio	*bio;		/* list of bios */
			size_t		bio_length;	/* total in list */
		};
77 78 79 80
#endif /* CONFIG_BLOCK */
	};
};

81 82
struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
83
	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
84
	u32 indata_len;   /* request */
85 86 87
	u32 outdata_len;  /* reply */
	s32 rval;

88
	union {
A
Alex Elder 已提交
89
		struct ceph_osd_data raw_data_in;
90 91 92 93
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
94
			struct ceph_osd_data osd_data;
95
		} extent;
96
		struct {
97 98
			u32 name_len;
			u32 value_len;
99 100 101 102
			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
			struct ceph_osd_data osd_data;
		} xattr;
103 104 105
		struct {
			const char *class_name;
			const char *method_name;
106
			struct ceph_osd_data request_info;
107
			struct ceph_osd_data request_data;
108
			struct ceph_osd_data response_data;
109 110
			__u8 class_len;
			__u8 method_len;
111
			u32 indata_len;
112 113 114
		} cls;
		struct {
			u64 cookie;
115 116
			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
			u32 gen;
117
		} watch;
118 119 120
		struct {
			struct ceph_osd_data request_data;
		} notify_ack;
121 122 123 124 125
		struct {
			u64 cookie;
			struct ceph_osd_data request_data;
			struct ceph_osd_data response_data;
		} notify;
126 127 128
		struct {
			struct ceph_osd_data response_data;
		} list_watchers;
129 130 131 132
		struct {
			u64 expected_object_size;
			u64 expected_write_size;
		} alloc_hint;
133 134 135
	};
};

136 137 138 139 140 141
struct ceph_osd_request_target {
	struct ceph_object_id base_oid;
	struct ceph_object_locator base_oloc;
	struct ceph_object_id target_oid;
	struct ceph_object_locator target_oloc;

142 143
	struct ceph_pg pgid;               /* last raw pg we mapped to */
	struct ceph_spg spgid;             /* last actual spg we mapped to */
144 145 146 147 148 149 150
	u32 pg_num;
	u32 pg_num_mask;
	struct ceph_osds acting;
	struct ceph_osds up;
	int size;
	int min_size;
	bool sort_bitwise;
151
	bool recovery_deletes;
152 153 154 155

	unsigned int flags;                /* CEPH_OSD_FLAG_* */
	bool paused;

156
	u32 epoch;
157 158
	u32 last_force_resend;

159 160 161
	int osd;
};

S
Sage Weil 已提交
162 163 164 165
/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
	struct rb_node  r_node;
I
Ilya Dryomov 已提交
166
	struct rb_node  r_mc_node;          /* map check */
S
Sage Weil 已提交
167
	struct ceph_osd *r_osd;
168 169 170 171 172

	struct ceph_osd_request_target r_t;
#define r_base_oid	r_t.base_oid
#define r_base_oloc	r_t.base_oloc
#define r_flags		r_t.flags
S
Sage Weil 已提交
173 174 175

	struct ceph_msg  *r_request, *r_reply;
	u32               r_sent;      /* >0 if r_request is sending/sent */
176

177 178 179
	/* request osd ops array  */
	unsigned int		r_num_ops;

180
	int               r_result;
S
Sage Weil 已提交
181 182

	struct ceph_osd_client *r_osdc;
S
Sage Weil 已提交
183
	struct kref       r_kref;
S
Sage Weil 已提交
184
	bool              r_mempool;
I
Ilya Dryomov 已提交
185
	struct completion r_completion;       /* private to osd_client.c */
186
	ceph_osdc_callback_t r_callback;
S
Sage Weil 已提交
187 188 189
	struct list_head  r_unsafe_item;

	struct inode *r_inode;         	      /* for use by callbacks */
190
	void *r_priv;			      /* ditto */
S
Sage Weil 已提交
191

192 193 194 195 196
	/* set by submitter */
	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
	struct ceph_snap_context *r_snapc;    /* for writes */
	struct timespec r_mtime;              /* ditto */
	u64 r_data_offset;                    /* ditto */
197
	bool r_linger;                        /* don't resend on failure */
198
	bool r_abort_on_full;		      /* return ENOSPC when full */
S
Sage Weil 已提交
199

200 201
	/* internal */
	unsigned long r_stamp;                /* jiffies, send or check time */
202
	unsigned long r_start_stamp;          /* jiffies */
203
	int r_attempts;
I
Ilya Dryomov 已提交
204
	u32 r_map_dne_bound;
205 206

	struct ceph_osd_req_op r_ops[];
S
Sage Weil 已提交
207 208
};

209 210 211 212
struct ceph_request_redirect {
	struct ceph_object_locator oloc;
};

213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
/*
 * osd request identifier
 *
 * caller name + incarnation# + tid to unique identify this request
 */
struct ceph_osd_reqid {
	struct ceph_entity_name name;
	__le64 tid;
	__le32 inc;
} __packed;

struct ceph_blkin_trace_info {
	__le64 trace_id;
	__le64 span_id;
	__le64 parent_span_id;
} __packed;

230 231 232 233 234
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
				 u64 notifier_id, void *data, size_t data_len);
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);

struct ceph_osd_linger_request {
235
	struct ceph_osd_client *osdc;
236 237
	u64 linger_id;
	bool committed;
238
	bool is_watch;                  /* watch or notify */
239 240 241 242 243

	struct ceph_osd *osd;
	struct ceph_osd_request *reg_req;
	struct ceph_osd_request *ping_req;
	unsigned long ping_sent;
244 245
	unsigned long watch_valid_thru;
	struct list_head pending_lworks;
246 247

	struct ceph_osd_request_target t;
I
Ilya Dryomov 已提交
248
	u32 map_dne_bound;
249 250 251

	struct timespec mtime;

252
	struct kref kref;
253 254 255
	struct mutex lock;
	struct rb_node node;            /* osd */
	struct rb_node osdc_node;       /* osdc */
I
Ilya Dryomov 已提交
256
	struct rb_node mc_node;         /* map check */
257 258 259
	struct list_head scan_item;

	struct completion reg_commit_wait;
260
	struct completion notify_finish_wait;
261
	int reg_commit_error;
262
	int notify_finish_error;
263 264 265
	int last_error;

	u32 register_gen;
266
	u64 notify_id;
267

268 269 270
	rados_watchcb2_t wcb;
	rados_watcherrcb_t errcb;
	void *data;
271 272 273

	struct page ***preply_pages;
	size_t *preply_len;
274 275
};

276 277 278 279 280 281
struct ceph_watch_item {
	struct ceph_entity_name name;
	u64 cookie;
	struct ceph_entity_addr addr;
};

282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
struct ceph_spg_mapping {
	struct rb_node node;
	struct ceph_spg spgid;

	struct rb_root backoffs;
};

struct ceph_hobject_id {
	void *key;
	size_t key_len;
	void *oid;
	size_t oid_len;
	u64 snapid;
	u32 hash;
	u8 is_max;
	void *nspace;
	size_t nspace_len;
	s64 pool;

	/* cache */
	u32 hash_reverse_bits;
};

static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
{
	hoid->hash_reverse_bits = bitrev32(hoid->hash);
}

/*
 * PG-wide backoff: [begin, end)
 * per-object backoff: begin == end
 */
struct ceph_osd_backoff {
	struct rb_node spg_node;
	struct rb_node id_node;

	struct ceph_spg spgid;
	u64 id;
	struct ceph_hobject_id *begin;
	struct ceph_hobject_id *end;
};

324 325
#define CEPH_LINGER_ID_START	0xffff000000000000ULL

S
Sage Weil 已提交
326 327 328 329
struct ceph_osd_client {
	struct ceph_client     *client;

	struct ceph_osdmap     *osdmap;       /* current map */
330
	struct rw_semaphore    lock;
S
Sage Weil 已提交
331 332

	struct rb_root         osds;          /* osds */
333
	struct list_head       osd_lru;       /* idle osds */
334
	spinlock_t             osd_lru_lock;
335
	u32		       epoch_barrier;
336 337
	struct ceph_osd        homeless_osd;
	atomic64_t             last_tid;      /* tid of last request */
338 339
	u64                    last_linger_id;
	struct rb_root         linger_requests; /* lingering requests */
I
Ilya Dryomov 已提交
340 341
	struct rb_root         map_checks;
	struct rb_root         linger_map_checks;
342 343
	atomic_t               num_requests;
	atomic_t               num_homeless;
S
Sage Weil 已提交
344
	struct delayed_work    timeout_work;
345
	struct delayed_work    osds_timeout_work;
346
#ifdef CONFIG_DEBUG_FS
S
Sage Weil 已提交
347
	struct dentry 	       *debugfs_file;
348
#endif
S
Sage Weil 已提交
349 350 351

	mempool_t              *req_mempool;

352
	struct ceph_msgpool	msgpool_op;
S
Sage Weil 已提交
353
	struct ceph_msgpool	msgpool_op_reply;
354 355

	struct workqueue_struct	*notify_wq;
S
Sage Weil 已提交
356 357
};

358 359 360 361 362
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
{
	return osdc->osdmap->flags & flag;
}

363 364 365
extern int ceph_osdc_setup(void);
extern void ceph_osdc_cleanup(void);

S
Sage Weil 已提交
366 367 368 369 370 371 372 373
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);

extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
				   struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
				 struct ceph_msg *msg);
374
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
S
Sage Weil 已提交
375

A
Alex Elder 已提交
376
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
377
			    unsigned int which, u16 opcode, u32 flags);
A
Alex Elder 已提交
378 379 380 381 382 383 384

extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);

385 386
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
387 388
					u64 offset, u64 length,
					u64 truncate_size, u32 truncate_seq);
389 390
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
					unsigned int which, u64 length);
391 392
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
				       unsigned int which, u64 offset_inc);
393 394 395

extern struct ceph_osd_data *osd_req_op_extent_osd_data(
					struct ceph_osd_request *osd_req,
396
					unsigned int which);
397 398

extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
399
					unsigned int which,
400 401 402 403
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
404
					unsigned int which,
405 406 407
					struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
408
					unsigned int which,
409 410 411
					struct bio *bio, size_t bio_length);
#endif /* CONFIG_BLOCK */

412 413 414
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
					unsigned int which,
					struct ceph_pagelist *pagelist);
415 416 417 418 419
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
					unsigned int which,
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
420
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
421
					unsigned int which,
422 423 424
					struct page **pages, u64 length,
					u32 alignment, bool pages_from_pool,
					bool own_pages);
425 426
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
					unsigned int which, u16 opcode,
427
					const char *class, const char *method);
428 429 430
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
				 u16 opcode, const char *name, const void *value,
				 size_t size, u8 cmp_op, u8 cmp_mode);
431 432 433 434
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
				       unsigned int which,
				       u64 expected_object_size,
				       u64 expected_write_size);
435

436 437
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
					       struct ceph_snap_context *snapc,
438
					       unsigned int num_ops,
439
					       bool use_mempool,
440
					       gfp_t gfp_flags);
441
int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
442

S
Sage Weil 已提交
443 444 445
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
446
				      u64 offset, u64 *len,
447 448
				      unsigned int which, int num_ops,
				      int opcode, int flags,
S
Sage Weil 已提交
449
				      struct ceph_snap_context *snapc,
450
				      u32 truncate_seq, u64 truncate_size,
451
				      bool use_mempool);
S
Sage Weil 已提交
452

453 454
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
455 456 457 458

extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
				   struct ceph_osd_request *req,
				   bool nofail);
459
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
S
Sage Weil 已提交
460 461 462 463
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
				  struct ceph_osd_request *req);
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);

464
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
465
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
466

467 468 469 470 471 472 473 474
int ceph_osdc_call(struct ceph_osd_client *osdc,
		   struct ceph_object_id *oid,
		   struct ceph_object_locator *oloc,
		   const char *class, const char *method,
		   unsigned int flags,
		   struct page *req_page, size_t req_len,
		   struct page *resp_page, size_t *resp_len);

S
Sage Weil 已提交
475 476 477 478 479
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
			       struct ceph_vino vino,
			       struct ceph_file_layout *layout,
			       u64 off, u64 *plen,
			       u32 truncate_seq, u64 truncate_size,
480 481
			       struct page **pages, int nr_pages,
			       int page_align);
S
Sage Weil 已提交
482 483 484 485 486 487 488 489

extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
				struct ceph_vino vino,
				struct ceph_file_layout *layout,
				struct ceph_snap_context *sc,
				u64 off, u64 len,
				u32 truncate_seq, u64 truncate_size,
				struct timespec *mtime,
490
				struct page **pages, int nr_pages);
S
Sage Weil 已提交
491

492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
/* watch/notify */
struct ceph_osd_linger_request *
ceph_osdc_watch(struct ceph_osd_client *osdc,
		struct ceph_object_id *oid,
		struct ceph_object_locator *oloc,
		rados_watchcb2_t wcb,
		rados_watcherrcb_t errcb,
		void *data);
int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
		      struct ceph_osd_linger_request *lreq);

int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
			 struct ceph_object_id *oid,
			 struct ceph_object_locator *oloc,
			 u64 notify_id,
			 u64 cookie,
			 void *payload,
			 size_t payload_len);
510 511 512 513 514 515 516 517
int ceph_osdc_notify(struct ceph_osd_client *osdc,
		     struct ceph_object_id *oid,
		     struct ceph_object_locator *oloc,
		     void *payload,
		     size_t payload_len,
		     u32 timeout,
		     struct page ***preply_pages,
		     size_t *preply_len);
518 519
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
			  struct ceph_osd_linger_request *lreq);
520 521 522 523 524
int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
			    struct ceph_object_id *oid,
			    struct ceph_object_locator *oloc,
			    struct ceph_watch_item **watchers,
			    u32 *num_watchers);
S
Sage Weil 已提交
525 526
#endif