dlm_internal.h 15.1 KB
Newer Older
1 2 3 4
/******************************************************************************
*******************************************************************************
**
**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
5
**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
**
**  This copyrighted material is made available to anyone wishing to use,
**  modify, copy, or redistribute it subject to the terms and conditions
**  of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/

#ifndef __DLM_INTERNAL_DOT_H__
#define __DLM_INTERNAL_DOT_H__

/*
 * This is the main header file to be included in each DLM source file.
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/socket.h>
#include <linux/kthread.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
D
David Teigland 已提交
38
#include <linux/miscdevice.h>
39
#include <linux/mutex.h>
40 41 42 43
#include <asm/semaphore.h>
#include <asm/uaccess.h>

#include <linux/dlm.h>
44
#include "config.h"
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

#define DLM_LOCKSPACE_LEN	64

/* Size of the temp buffer midcomms allocates on the stack.
   We try to make this large enough so most messages fit.
   FIXME: should sctp make this unnecessary? */

#define DLM_INBUF_LEN		148

struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
struct dlm_lkbtable;
struct dlm_rsbtable;
struct dlm_dirtable;
struct dlm_direntry;
struct dlm_recover;
struct dlm_header;
struct dlm_message;
struct dlm_rcom;
struct dlm_mhandle;

#define log_print(fmt, args...) \
	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
#define log_error(ls, fmt, args...) \
	printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)

73 74 75 76 77 78
#define log_debug(ls, fmt, args...) \
do { \
	if (dlm_config.ci_log_debug) \
		printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
		       (ls)->ls_name , ##args); \
} while (0)
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

#define DLM_ASSERT(x, do) \
{ \
  if (!(x)) \
  { \
    printk(KERN_ERR "\nDLM:  Assertion failed on line %d of file %s\n" \
               "DLM:  assertion:  \"%s\"\n" \
               "DLM:  time = %lu\n", \
               __LINE__, __FILE__, #x, jiffies); \
    {do} \
    printk("\n"); \
    BUG(); \
    panic("DLM:  Record message above and reboot.\n"); \
  } \
}

95 96
#define DLM_FAKE_USER_AST ERR_PTR(-EINVAL)

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

struct dlm_direntry {
	struct list_head	list;
	uint32_t		master_nodeid;
	uint16_t		length;
	char			name[1];
};

struct dlm_dirtable {
	struct list_head	list;
	rwlock_t		lock;
};

struct dlm_rsbtable {
	struct list_head	list;
	struct list_head	toss;
	rwlock_t		lock;
};

struct dlm_lkbtable {
	struct list_head	list;
	rwlock_t		lock;
	uint16_t		counter;
};

/*
 * Lockspace member (per node in a ls)
 */

struct dlm_member {
	struct list_head	list;
	int			nodeid;
	int			weight;
};

/*
 * Save and manage recovery state for a lockspace.
 */

struct dlm_recover {
	struct list_head	list;
	int			*nodeids;
	int			node_count;
	uint64_t		seq;
};

/*
 * Pass input args to second stage locking function.
 */

struct dlm_args {
	uint32_t		flags;
	void			*astaddr;
	long			astparam;
	void			*bastaddr;
	int			mode;
	struct dlm_lksb		*lksb;
154
	unsigned long		timeout;
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
};


/*
 * Lock block
 *
 * A lock can be one of three types:
 *
 * local copy      lock is mastered locally
 *                 (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set)
 * process copy    lock is mastered on a remote node
 *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set)
 * master copy     master node's copy of a lock owned by remote node
 *                 (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set)
 *
 * lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or
 * dlm_unlock.  The dlm does not modify these or use any private flags in
 * this field; it only contains DLM_LKF_ flags from dlm.h.  These flags
 * are sent as-is to the remote master when the lock is remote.
 *
 * lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h.
 * Some internal flags are shared between the master and process nodes;
 * these shared flags are kept in the lower two bytes.  One of these
 * flags set on the master copy will be propagated to the process copy
 * and v.v.  Other internal flags are private to the master or process
 * node (e.g. DLM_IFL_MSTCPY).  These are kept in the high two bytes.
 *
 * lkb_sbflags: status block flags.  These flags are copied directly into
 * the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion
 * ast.  All defined in dlm.h with DLM_SBF_ prefix.
 *
 * lkb_status: the lock status indicates which rsb queue the lock is
 * on, grant, convert, or wait.  DLM_LKSTS_ WAITING/GRANTED/CONVERT
 *
 * lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a
 * reply is needed.  Only set when the lkb is on the lockspace waiters
 * list awaiting a reply from a remote node.
 *
 * lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb
 * is a master copy, nodeid specifies the remote lock holder, when the
 * lkb is a process copy, the nodeid specifies the lock master.
 */

/* lkb_ast_type */

#define AST_COMP		1
#define AST_BAST		2

/* lkb_status */

#define DLM_LKSTS_WAITING	1
#define DLM_LKSTS_GRANTED	2
#define DLM_LKSTS_CONVERT	3

/* lkb_flags */

#define DLM_IFL_MSTCPY		0x00010000
#define DLM_IFL_RESEND		0x00020000
D
David Teigland 已提交
213
#define DLM_IFL_DEAD		0x00040000
214 215 216
#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
#define DLM_IFL_OVERLAP_CANCEL  0x00100000
#define DLM_IFL_ENDOFLIFE	0x00200000
217
#define DLM_IFL_WATCH_TIMEWARN	0x00400000
D
David Teigland 已提交
218
#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
D
David Teigland 已提交
219 220
#define DLM_IFL_USER		0x00000001
#define DLM_IFL_ORPHAN		0x00000002
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239

struct dlm_lkb {
	struct dlm_rsb		*lkb_resource;	/* the rsb */
	struct kref		lkb_ref;
	int			lkb_nodeid;	/* copied from rsb */
	int			lkb_ownpid;	/* pid of lock owner */
	uint32_t		lkb_id;		/* our lock ID */
	uint32_t		lkb_remid;	/* lock ID on remote partner */
	uint32_t		lkb_exflags;	/* external flags from caller */
	uint32_t		lkb_sbflags;	/* lksb flags */
	uint32_t		lkb_flags;	/* internal flags */
	uint32_t		lkb_lvbseq;	/* lvb sequence number */

	int8_t			lkb_status;     /* granted, waiting, convert */
	int8_t			lkb_rqmode;	/* requested lock mode */
	int8_t			lkb_grmode;	/* granted lock mode */
	int8_t			lkb_bastmode;	/* requested mode */
	int8_t			lkb_highbast;	/* highest mode bast sent for */
	int8_t			lkb_wait_type;	/* type of reply waiting for */
240
	int8_t			lkb_wait_count;
241 242 243 244 245 246 247
	int8_t			lkb_ast_type;	/* type of ast queued for */

	struct list_head	lkb_idtbl_list;	/* lockspace lkbtbl */
	struct list_head	lkb_statequeue;	/* rsb g/c/w list */
	struct list_head	lkb_rsb_lookup;	/* waiting for rsb lookup */
	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
	struct list_head	lkb_astqueue;	/* need ast to be sent */
D
David Teigland 已提交
248
	struct list_head	lkb_ownqueue;	/* list of locks for a process */
249 250 251
	struct list_head	lkb_time_list;
	unsigned long		lkb_timestamp;
	unsigned long		lkb_timeout_cs;
252 253 254 255 256 257 258 259 260 261 262 263

	char			*lkb_lvbptr;
	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
	void			*lkb_astaddr;	/* caller's ast function */
	void			*lkb_bastaddr;	/* caller's bast function */
	long			lkb_astparam;	/* caller's ast arg */
};


struct dlm_rsb {
	struct dlm_ls		*res_ls;	/* the lockspace */
	struct kref		res_ref;
264
	struct mutex		res_mutex;
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
	unsigned long		res_flags;
	int			res_length;	/* length of rsb name */
	int			res_nodeid;
	uint32_t                res_lvbseq;
	uint32_t		res_hash;
	uint32_t		res_bucket;	/* rsbtbl */
	unsigned long		res_toss_time;
	uint32_t		res_first_lkid;
	struct list_head	res_lookup;	/* lkbs waiting on first */
	struct list_head	res_hashchain;	/* rsbtbl */
	struct list_head	res_grantqueue;
	struct list_head	res_convertqueue;
	struct list_head	res_waitqueue;

	struct list_head	res_root_list;	    /* used for recovery */
	struct list_head	res_recover_list;   /* used for recovery */
	int			res_recover_locks_count;

	char			*res_lvbptr;
	char			res_name[1];
};

/* find_rsb() flags */

#define R_MASTER		1	/* only return rsb if it's a master */
#define R_CREATE		2	/* create/add rsb if not found */

/* rsb_flags */

enum rsb_flags {
	RSB_MASTER_UNCERTAIN,
	RSB_VALNOTVALID,
	RSB_VALNOTVALID_PREV,
	RSB_NEW_MASTER,
	RSB_NEW_MASTER2,
	RSB_RECOVER_CONVERT,
301
	RSB_LOCKS_PURGED,
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
};

static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	__set_bit(flag, &r->res_flags);
}

static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	__clear_bit(flag, &r->res_flags);
}

static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
	return test_bit(flag, &r->res_flags);
}


/* dlm_header is first element of all structs sent between nodes */

D
David Teigland 已提交
322 323
#define DLM_HEADER_MAJOR	0x00030000
#define DLM_HEADER_MINOR	0x00000000
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350

#define DLM_MSG			1
#define DLM_RCOM		2

struct dlm_header {
	uint32_t		h_version;
	uint32_t		h_lockspace;
	uint32_t		h_nodeid;	/* nodeid of sender */
	uint16_t		h_length;
	uint8_t			h_cmd;		/* DLM_MSG, DLM_RCOM */
	uint8_t			h_pad;
};


#define DLM_MSG_REQUEST		1
#define DLM_MSG_CONVERT		2
#define DLM_MSG_UNLOCK		3
#define DLM_MSG_CANCEL		4
#define DLM_MSG_REQUEST_REPLY	5
#define DLM_MSG_CONVERT_REPLY	6
#define DLM_MSG_UNLOCK_REPLY	7
#define DLM_MSG_CANCEL_REPLY	8
#define DLM_MSG_GRANT		9
#define DLM_MSG_BAST		10
#define DLM_MSG_LOOKUP		11
#define DLM_MSG_REMOVE		12
#define DLM_MSG_LOOKUP_REPLY	13
351
#define DLM_MSG_PURGE		14
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399

struct dlm_message {
	struct dlm_header	m_header;
	uint32_t		m_type;		/* DLM_MSG_ */
	uint32_t		m_nodeid;
	uint32_t		m_pid;
	uint32_t		m_lkid;		/* lkid on sender */
	uint32_t		m_remid;	/* lkid on receiver */
	uint32_t		m_parent_lkid;
	uint32_t		m_parent_remid;
	uint32_t		m_exflags;
	uint32_t		m_sbflags;
	uint32_t		m_flags;
	uint32_t		m_lvbseq;
	uint32_t		m_hash;
	int			m_status;
	int			m_grmode;
	int			m_rqmode;
	int			m_bastmode;
	int			m_asts;
	int			m_result;	/* 0 or -EXXX */
	char			m_extra[0];	/* name or lvb */
};


#define DLM_RS_NODES		0x00000001
#define DLM_RS_NODES_ALL	0x00000002
#define DLM_RS_DIR		0x00000004
#define DLM_RS_DIR_ALL		0x00000008
#define DLM_RS_LOCKS		0x00000010
#define DLM_RS_LOCKS_ALL	0x00000020
#define DLM_RS_DONE		0x00000040
#define DLM_RS_DONE_ALL		0x00000080

#define DLM_RCOM_STATUS		1
#define DLM_RCOM_NAMES		2
#define DLM_RCOM_LOOKUP		3
#define DLM_RCOM_LOCK		4
#define DLM_RCOM_STATUS_REPLY	5
#define DLM_RCOM_NAMES_REPLY	6
#define DLM_RCOM_LOOKUP_REPLY	7
#define DLM_RCOM_LOCK_REPLY	8

struct dlm_rcom {
	struct dlm_header	rc_header;
	uint32_t		rc_type;	/* DLM_RCOM_ */
	int			rc_result;	/* multi-purpose */
	uint64_t		rc_id;		/* match reply with request */
D
David Teigland 已提交
400 401
	uint64_t		rc_seq;		/* sender's ls_recover_seq */
	uint64_t		rc_seq_reply;	/* remote ls_recover_seq */
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
	char			rc_buf[0];
};

struct rcom_config {
	uint32_t		rf_lvblen;
	uint32_t		rf_lsflags;
	uint64_t		rf_unused;
};

struct rcom_lock {
	uint32_t		rl_ownpid;
	uint32_t		rl_lkid;
	uint32_t		rl_remid;
	uint32_t		rl_parent_lkid;
	uint32_t		rl_parent_remid;
	uint32_t		rl_exflags;
	uint32_t		rl_flags;
	uint32_t		rl_lvbseq;
	int			rl_result;
	int8_t			rl_rqmode;
	int8_t			rl_grmode;
	int8_t			rl_status;
	int8_t			rl_asts;
	uint16_t		rl_wait_type;
	uint16_t		rl_namelen;
	char			rl_name[DLM_RESNAME_MAXLEN];
	char			rl_lvb[0];
};

struct dlm_ls {
	struct list_head	ls_list;	/* list of lockspaces */
D
David Teigland 已提交
433
	dlm_lockspace_t		*ls_local_handle;
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
	uint32_t		ls_global_id;	/* global unique lockspace ID */
	uint32_t		ls_exflags;
	int			ls_lvblen;
	int			ls_count;	/* reference count */
	unsigned long		ls_flags;	/* LSFL_ */
	struct kobject		ls_kobj;

	struct dlm_rsbtable	*ls_rsbtbl;
	uint32_t		ls_rsbtbl_size;

	struct dlm_lkbtable	*ls_lkbtbl;
	uint32_t		ls_lkbtbl_size;

	struct dlm_dirtable	*ls_dirtbl;
	uint32_t		ls_dirtbl_size;

450
	struct mutex		ls_waiters_mutex;
451 452
	struct list_head	ls_waiters;	/* lkbs needing a reply */

453 454 455
	struct mutex		ls_orphans_mutex;
	struct list_head	ls_orphans;

456 457 458
	struct mutex		ls_timeout_mutex;
	struct list_head	ls_timeout;

459 460 461 462 463 464 465 466 467 468 469
	struct list_head	ls_nodes;	/* current nodes in ls */
	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
	int			ls_num_nodes;	/* number of nodes in ls */
	int			ls_low_nodeid;
	int			ls_total_weight;
	int			*ls_node_array;

	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
	struct dlm_message	ls_stub_ms;	/* for faking a reply */

D
David Teigland 已提交
470 471
	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
472 473 474

	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
	int			ls_uevent_result;
475 476
	struct completion	ls_members_done;
	int			ls_members_result;
477

D
David Teigland 已提交
478 479
	struct miscdevice       ls_device;

480 481 482 483
	/* recovery related */

	struct timer_list	ls_timer;
	struct task_struct	*ls_recoverd_task;
484
	struct mutex		ls_recoverd_active;
485
	spinlock_t		ls_recover_lock;
486
	unsigned long		ls_recover_begin; /* jiffies timestamp */
487 488 489 490 491
	uint32_t		ls_recover_status; /* DLM_RS_ */
	uint64_t		ls_recover_seq;
	struct dlm_recover	*ls_recover_args;
	struct rw_semaphore	ls_in_recovery;	/* block local requests */
	struct list_head	ls_requestqueue;/* queue remote requests */
492
	struct mutex		ls_requestqueue_mutex;
493
	char			*ls_recover_buf;
494
	int			ls_recover_nodeid; /* for debugging */
495
	uint64_t		ls_rcom_seq;
496
	spinlock_t		ls_rcom_spin;
497 498 499 500
	struct list_head	ls_recover_list;
	spinlock_t		ls_recover_list_lock;
	int			ls_recover_list_count;
	wait_queue_head_t	ls_wait_general;
D
David Teigland 已提交
501
	struct mutex		ls_clear_proc_locks;
502 503 504 505 506 507 508 509 510 511 512 513

	struct list_head	ls_root_list;	/* root resources */
	struct rw_semaphore	ls_root_sem;	/* protect root_list */

	int			ls_namelen;
	char			ls_name[1];
};

#define LSFL_WORK		0
#define LSFL_RUNNING		1
#define LSFL_RECOVERY_STOP	2
#define LSFL_RCOM_READY		3
514 515
#define LSFL_RCOM_WAIT		4
#define LSFL_UEVENT_WAIT	5
516
#define LSFL_TIMEWARN		6
517

D
David Teigland 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
/* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */

struct dlm_user_args {
	struct dlm_user_proc	*proc; /* each process that opens the lockspace
					  device has private data
					  (dlm_user_proc) on the struct file,
					  the process's locks point back to it*/
	struct dlm_lksb		lksb;
	int			old_mode;
	int			update_user_lvb;
	struct dlm_lksb __user	*user_lksb;
	void __user		*castparam;
	void __user		*castaddr;
	void __user		*bastparam;
	void __user		*bastaddr;
534
	uint64_t		xid;
D
David Teigland 已提交
535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
};

#define DLM_PROC_FLAGS_CLOSING 1
#define DLM_PROC_FLAGS_COMPAT  2

/* locks list is kept so we can remove all a process's locks when it
   exits (or orphan those that are persistent) */

struct dlm_user_proc {
	dlm_lockspace_t		*lockspace;
	unsigned long		flags; /* DLM_PROC_FLAGS */
	struct list_head	asts;
	spinlock_t		asts_spin;
	struct list_head	locks;
	spinlock_t		locks_spin;
D
David Teigland 已提交
550
	struct list_head	unlocking;
D
David Teigland 已提交
551 552 553
	wait_queue_head_t	wait;
};

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
static inline int dlm_locking_stopped(struct dlm_ls *ls)
{
	return !test_bit(LSFL_RUNNING, &ls->ls_flags);
}

static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{
	return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
}

static inline int dlm_no_directory(struct dlm_ls *ls)
{
	return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0;
}

#endif				/* __DLM_INTERNAL_DOT_H__ */