dlm_internal.h 15.8 KB
Newer Older
1 2 3 4
/******************************************************************************
*******************************************************************************
**
**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5
**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
**
**  This copyrighted material is made available to anyone wishing to use,
**  modify, copy, or redistribute it subject to the terms and conditions
**  of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/

#ifndef __DLM_INTERNAL_DOT_H__
#define __DLM_INTERNAL_DOT_H__

/*
 * This is the main header file to be included in each DLM source file.
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/socket.h>
#include <linux/kthread.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
D
David Teigland 已提交
38
#include <linux/miscdevice.h>
39
#include <linux/mutex.h>
40 41 42 43
#include <asm/semaphore.h>
#include <asm/uaccess.h>

#include <linux/dlm.h>
44
#include "config.h"
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

#define DLM_LOCKSPACE_LEN	64

/* Size of the temp buffer midcomms allocates on the stack.
   We try to make this large enough so most messages fit.
   FIXME: should sctp make this unnecessary? */

#define DLM_INBUF_LEN		148

struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
struct dlm_lkbtable;
struct dlm_rsbtable;
struct dlm_dirtable;
struct dlm_direntry;
struct dlm_recover;
struct dlm_header;
struct dlm_message;
struct dlm_rcom;
struct dlm_mhandle;

#define log_print(fmt, args...) \
	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
#define log_error(ls, fmt, args...) \
	printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)

73 74 75 76 77 78
#define log_debug(ls, fmt, args...) \
do { \
	if (dlm_config.ci_log_debug) \
		printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
		       (ls)->ls_name , ##args); \
} while (0)
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

#define DLM_ASSERT(x, do) \
{ \
  if (!(x)) \
  { \
    printk(KERN_ERR "\nDLM:  Assertion failed on line %d of file %s\n" \
               "DLM:  assertion:  \"%s\"\n" \
               "DLM:  time = %lu\n", \
               __LINE__, __FILE__, #x, jiffies); \
    {do} \
    printk("\n"); \
    BUG(); \
    panic("DLM:  Record message above and reboot.\n"); \
  } \
}

95 96
#define DLM_FAKE_USER_AST ERR_PTR(-EINVAL)

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

struct dlm_direntry {
	struct list_head	list;
	uint32_t		master_nodeid;
	uint16_t		length;
	char			name[1];
};

struct dlm_dirtable {
	struct list_head	list;
	rwlock_t		lock;
};

struct dlm_rsbtable {
	struct list_head	list;
	struct list_head	toss;
	rwlock_t		lock;
};

struct dlm_lkbtable {
	struct list_head	list;
	rwlock_t		lock;
	uint16_t		counter;
};

/*
 * Lockspace member (per node in a ls)
 */

struct dlm_member {
	struct list_head	list;
	int			nodeid;
	int			weight;
};

/*
 * Save and manage recovery state for a lockspace.
 */

struct dlm_recover {
	struct list_head	list;
	int			*nodeids;
	int			node_count;
	uint64_t		seq;
};

/*
 * Pass input args to second stage locking function.
 */

struct dlm_args {
	uint32_t		flags;
	void			*astaddr;
	long			astparam;
	void			*bastaddr;
	int			mode;
	struct dlm_lksb		*lksb;
154
	unsigned long		timeout;
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
};


/*
 * Lock block
 *
 * A lock can be one of three types:
 *
 * local copy      lock is mastered locally
 *                 (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set)
 * process copy    lock is mastered on a remote node
 *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set)
 * master copy     master node's copy of a lock owned by remote node
 *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set)
 *
 * lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or
 * dlm_unlock.  The dlm does not modify these or use any private flags in
 * this field; it only contains DLM_LKF_ flags from dlm.h.  These flags
 * are sent as-is to the remote master when the lock is remote.
 *
 * lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h.
 * Some internal flags are shared between the master and process nodes;
 * these shared flags are kept in the lower two bytes.  One of these
 * flags set on the master copy will be propagated to the process copy
 * and v.v.  Other internal flags are private to the master or process
 * node (e.g. DLM_IFL_MSTCPY).  These are kept in the high two bytes.
 *
 * lkb_sbflags: status block flags.  These flags are copied directly into
 * the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion
 * ast.  All defined in dlm.h with DLM_SBF_ prefix.
 *
 * lkb_status: the lock status indicates which rsb queue the lock is
 * on, grant, convert, or wait.  DLM_LKSTS_ WAITING/GRANTED/CONVERT
 *
 * lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a
 * reply is needed.  Only set when the lkb is on the lockspace waiters
 * list awaiting a reply from a remote node.
 *
 * lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb
 * is a master copy, nodeid specifies the remote lock holder, when the
 * lkb is a process copy, the nodeid specifies the lock master.
 */

/* lkb_ast_type */

#define AST_COMP		1
#define AST_BAST		2

/* lkb_status */

#define DLM_LKSTS_WAITING	1
#define DLM_LKSTS_GRANTED	2
#define DLM_LKSTS_CONVERT	3

/* lkb_flags */

#define DLM_IFL_MSTCPY		0x00010000
#define DLM_IFL_RESEND		0x00020000
D
David Teigland 已提交
213
#define DLM_IFL_DEAD		0x00040000
214 215 216
#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
#define DLM_IFL_OVERLAP_CANCEL  0x00100000
#define DLM_IFL_ENDOFLIFE	0x00200000
217
#define DLM_IFL_WATCH_TIMEWARN	0x00400000
D
David Teigland 已提交
218
#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
D
David Teigland 已提交
219
#define DLM_IFL_DEADLOCK_CANCEL	0x01000000
D
David Teigland 已提交
220 221
#define DLM_IFL_USER		0x00000001
#define DLM_IFL_ORPHAN		0x00000002
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

struct dlm_lkb {
	struct dlm_rsb		*lkb_resource;	/* the rsb */
	struct kref		lkb_ref;
	int			lkb_nodeid;	/* copied from rsb */
	int			lkb_ownpid;	/* pid of lock owner */
	uint32_t		lkb_id;		/* our lock ID */
	uint32_t		lkb_remid;	/* lock ID on remote partner */
	uint32_t		lkb_exflags;	/* external flags from caller */
	uint32_t		lkb_sbflags;	/* lksb flags */
	uint32_t		lkb_flags;	/* internal flags */
	uint32_t		lkb_lvbseq;	/* lvb sequence number */

	int8_t			lkb_status;     /* granted, waiting, convert */
	int8_t			lkb_rqmode;	/* requested lock mode */
	int8_t			lkb_grmode;	/* granted lock mode */
	int8_t			lkb_bastmode;	/* requested mode */
	int8_t			lkb_highbast;	/* highest mode bast sent for */
	int8_t			lkb_wait_type;	/* type of reply waiting for */
241
	int8_t			lkb_wait_count;
242 243 244 245 246 247 248
	int8_t			lkb_ast_type;	/* type of ast queued for */

	struct list_head	lkb_idtbl_list;	/* lockspace lkbtbl */
	struct list_head	lkb_statequeue;	/* rsb g/c/w list */
	struct list_head	lkb_rsb_lookup;	/* waiting for rsb lookup */
	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
	struct list_head	lkb_astqueue;	/* need ast to be sent */
D
David Teigland 已提交
249
	struct list_head	lkb_ownqueue;	/* list of locks for a process */
250 251 252
	struct list_head	lkb_time_list;
	unsigned long		lkb_timestamp;
	unsigned long		lkb_timeout_cs;
253 254 255 256 257 258 259 260 261 262 263 264

	char			*lkb_lvbptr;
	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
	void			*lkb_astaddr;	/* caller's ast function */
	void			*lkb_bastaddr;	/* caller's bast function */
	long			lkb_astparam;	/* caller's ast arg */
};


struct dlm_rsb {
	struct dlm_ls		*res_ls;	/* the lockspace */
	struct kref		res_ref;
265
	struct mutex		res_mutex;
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
	unsigned long		res_flags;
	int			res_length;	/* length of rsb name */
	int			res_nodeid;
	uint32_t                res_lvbseq;
	uint32_t		res_hash;
	uint32_t		res_bucket;	/* rsbtbl */
	unsigned long		res_toss_time;
	uint32_t		res_first_lkid;
	struct list_head	res_lookup;	/* lkbs waiting on first */
	struct list_head	res_hashchain;	/* rsbtbl */
	struct list_head	res_grantqueue;
	struct list_head	res_convertqueue;
	struct list_head	res_waitqueue;

	struct list_head	res_root_list;	    /* used for recovery */
	struct list_head	res_recover_list;   /* used for recovery */
	int			res_recover_locks_count;

	char			*res_lvbptr;
	char			res_name[1];
};

/* find_rsb() flags */

#define R_MASTER		1	/* only return rsb if it's a master */
#define R_CREATE		2	/* create/add rsb if not found */

/* rsb_flags */

enum rsb_flags {
	RSB_MASTER_UNCERTAIN,
	RSB_VALNOTVALID,
	RSB_VALNOTVALID_PREV,
	RSB_NEW_MASTER,
	RSB_NEW_MASTER2,
	RSB_RECOVER_CONVERT,
302
	RSB_LOCKS_PURGED,
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
};

static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	__set_bit(flag, &r->res_flags);
}

static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	__clear_bit(flag, &r->res_flags);
}

static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	return test_bit(flag, &r->res_flags);
}


/* dlm_header is first element of all structs sent between nodes */

D
David Teigland 已提交
323 324
#define DLM_HEADER_MAJOR	0x00030000
#define DLM_HEADER_MINOR	0x00000000
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351

#define DLM_MSG			1
#define DLM_RCOM		2

struct dlm_header {
	uint32_t		h_version;
	uint32_t		h_lockspace;
	uint32_t		h_nodeid;	/* nodeid of sender */
	uint16_t		h_length;
	uint8_t			h_cmd;		/* DLM_MSG, DLM_RCOM */
	uint8_t			h_pad;
};


#define DLM_MSG_REQUEST		1
#define DLM_MSG_CONVERT		2
#define DLM_MSG_UNLOCK		3
#define DLM_MSG_CANCEL		4
#define DLM_MSG_REQUEST_REPLY	5
#define DLM_MSG_CONVERT_REPLY	6
#define DLM_MSG_UNLOCK_REPLY	7
#define DLM_MSG_CANCEL_REPLY	8
#define DLM_MSG_GRANT		9
#define DLM_MSG_BAST		10
#define DLM_MSG_LOOKUP		11
#define DLM_MSG_REMOVE		12
#define DLM_MSG_LOOKUP_REPLY	13
352
#define DLM_MSG_PURGE		14
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400

struct dlm_message {
	struct dlm_header	m_header;
	uint32_t		m_type;		/* DLM_MSG_ */
	uint32_t		m_nodeid;
	uint32_t		m_pid;
	uint32_t		m_lkid;		/* lkid on sender */
	uint32_t		m_remid;	/* lkid on receiver */
	uint32_t		m_parent_lkid;
	uint32_t		m_parent_remid;
	uint32_t		m_exflags;
	uint32_t		m_sbflags;
	uint32_t		m_flags;
	uint32_t		m_lvbseq;
	uint32_t		m_hash;
	int			m_status;
	int			m_grmode;
	int			m_rqmode;
	int			m_bastmode;
	int			m_asts;
	int			m_result;	/* 0 or -EXXX */
	char			m_extra[0];	/* name or lvb */
};


#define DLM_RS_NODES		0x00000001
#define DLM_RS_NODES_ALL	0x00000002
#define DLM_RS_DIR		0x00000004
#define DLM_RS_DIR_ALL		0x00000008
#define DLM_RS_LOCKS		0x00000010
#define DLM_RS_LOCKS_ALL	0x00000020
#define DLM_RS_DONE		0x00000040
#define DLM_RS_DONE_ALL		0x00000080

#define DLM_RCOM_STATUS		1
#define DLM_RCOM_NAMES		2
#define DLM_RCOM_LOOKUP		3
#define DLM_RCOM_LOCK		4
#define DLM_RCOM_STATUS_REPLY	5
#define DLM_RCOM_NAMES_REPLY	6
#define DLM_RCOM_LOOKUP_REPLY	7
#define DLM_RCOM_LOCK_REPLY	8

struct dlm_rcom {
	struct dlm_header	rc_header;
	uint32_t		rc_type;	/* DLM_RCOM_ */
	int			rc_result;	/* multi-purpose */
	uint64_t		rc_id;		/* match reply with request */
D
David Teigland 已提交
401 402
	uint64_t		rc_seq;		/* sender's ls_recover_seq */
	uint64_t		rc_seq_reply;	/* remote ls_recover_seq */
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
	char			rc_buf[0];
};

struct rcom_config {
	uint32_t		rf_lvblen;
	uint32_t		rf_lsflags;
	uint64_t		rf_unused;
};

struct rcom_lock {
	uint32_t		rl_ownpid;
	uint32_t		rl_lkid;
	uint32_t		rl_remid;
	uint32_t		rl_parent_lkid;
	uint32_t		rl_parent_remid;
	uint32_t		rl_exflags;
	uint32_t		rl_flags;
	uint32_t		rl_lvbseq;
	int			rl_result;
	int8_t			rl_rqmode;
	int8_t			rl_grmode;
	int8_t			rl_status;
	int8_t			rl_asts;
	uint16_t		rl_wait_type;
	uint16_t		rl_namelen;
	char			rl_name[DLM_RESNAME_MAXLEN];
	char			rl_lvb[0];
};

struct dlm_ls {
	struct list_head	ls_list;	/* list of lockspaces */
D
David Teigland 已提交
434
	dlm_lockspace_t		*ls_local_handle;
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
	uint32_t		ls_global_id;	/* global unique lockspace ID */
	uint32_t		ls_exflags;
	int			ls_lvblen;
	int			ls_count;	/* reference count */
	unsigned long		ls_flags;	/* LSFL_ */
	struct kobject		ls_kobj;

	struct dlm_rsbtable	*ls_rsbtbl;
	uint32_t		ls_rsbtbl_size;

	struct dlm_lkbtable	*ls_lkbtbl;
	uint32_t		ls_lkbtbl_size;

	struct dlm_dirtable	*ls_dirtbl;
	uint32_t		ls_dirtbl_size;

451
	struct mutex		ls_waiters_mutex;
452 453
	struct list_head	ls_waiters;	/* lkbs needing a reply */

454 455 456
	struct mutex		ls_orphans_mutex;
	struct list_head	ls_orphans;

457 458 459
	struct mutex		ls_timeout_mutex;
	struct list_head	ls_timeout;

460 461 462 463 464 465
	struct list_head	ls_nodes;	/* current nodes in ls */
	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
	int			ls_num_nodes;	/* number of nodes in ls */
	int			ls_low_nodeid;
	int			ls_total_weight;
	int			*ls_node_array;
P
Patrick Caulfield 已提交
466
	gfp_t			ls_allocation;
467 468 469 470 471

	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
	struct dlm_message	ls_stub_ms;	/* for faking a reply */

D
David Teigland 已提交
472 473
	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
D
David Teigland 已提交
474
	struct dentry		*ls_debug_locks_dentry; /* debugfs */
475 476 477

	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
	int			ls_uevent_result;
478 479
	struct completion	ls_members_done;
	int			ls_members_result;
480

D
David Teigland 已提交
481 482
	struct miscdevice       ls_device;

483 484 485 486
	/* recovery related */

	struct timer_list	ls_timer;
	struct task_struct	*ls_recoverd_task;
487
	struct mutex		ls_recoverd_active;
488
	spinlock_t		ls_recover_lock;
489
	unsigned long		ls_recover_begin; /* jiffies timestamp */
490 491 492 493
	uint32_t		ls_recover_status; /* DLM_RS_ */
	uint64_t		ls_recover_seq;
	struct dlm_recover	*ls_recover_args;
	struct rw_semaphore	ls_in_recovery;	/* block local requests */
494
	struct rw_semaphore	ls_recv_active;	/* block dlm_recv */
495
	struct list_head	ls_requestqueue;/* queue remote requests */
496
	struct mutex		ls_requestqueue_mutex;
497
	char			*ls_recover_buf;
498
	int			ls_recover_nodeid; /* for debugging */
499
	uint64_t		ls_rcom_seq;
500
	spinlock_t		ls_rcom_spin;
501 502 503 504
	struct list_head	ls_recover_list;
	spinlock_t		ls_recover_list_lock;
	int			ls_recover_list_count;
	wait_queue_head_t	ls_wait_general;
D
David Teigland 已提交
505
	struct mutex		ls_clear_proc_locks;
506 507 508 509 510 511 512 513 514 515 516 517

	struct list_head	ls_root_list;	/* root resources */
	struct rw_semaphore	ls_root_sem;	/* protect root_list */

	int			ls_namelen;
	char			ls_name[1];
};

#define LSFL_WORK		0
#define LSFL_RUNNING		1
#define LSFL_RECOVERY_STOP	2
#define LSFL_RCOM_READY		3
518 519
#define LSFL_RCOM_WAIT		4
#define LSFL_UEVENT_WAIT	5
520
#define LSFL_TIMEWARN		6
521

D
David Teigland 已提交
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
/* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */

struct dlm_user_args {
	struct dlm_user_proc	*proc; /* each process that opens the lockspace
					  device has private data
					  (dlm_user_proc) on the struct file,
					  the process's locks point back to it*/
	struct dlm_lksb		lksb;
	int			old_mode;
	int			update_user_lvb;
	struct dlm_lksb __user	*user_lksb;
	void __user		*castparam;
	void __user		*castaddr;
	void __user		*bastparam;
	void __user		*bastaddr;
538
	uint64_t		xid;
D
David Teigland 已提交
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
};

#define DLM_PROC_FLAGS_CLOSING 1
#define DLM_PROC_FLAGS_COMPAT  2

/* locks list is kept so we can remove all a process's locks when it
   exits (or orphan those that are persistent) */

struct dlm_user_proc {
	dlm_lockspace_t		*lockspace;
	unsigned long		flags; /* DLM_PROC_FLAGS */
	struct list_head	asts;
	spinlock_t		asts_spin;
	struct list_head	locks;
	spinlock_t		locks_spin;
D
David Teigland 已提交
554
	struct list_head	unlocking;
D
David Teigland 已提交
555 556 557
	wait_queue_head_t	wait;
};

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
static inline int dlm_locking_stopped(struct dlm_ls *ls)
{
	return !test_bit(LSFL_RUNNING, &ls->ls_flags);
}

static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{
	return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
}

static inline int dlm_no_directory(struct dlm_ls *ls)
{
	return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0;
}

A
Adrian Bunk 已提交
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);

#ifdef CONFIG_DLM_DEBUG
int dlm_register_debugfs(void);
void dlm_unregister_debugfs(void);
int dlm_create_debug_file(struct dlm_ls *ls);
void dlm_delete_debug_file(struct dlm_ls *ls);
#else
static inline int dlm_register_debugfs(void) { return 0; }
static inline void dlm_unregister_debugfs(void) { }
static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
#endif

589 590
#endif				/* __DLM_INTERNAL_DOT_H__ */