stackglue.c 15.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * stackglue.c
 *
 * Code which implements an OCFS2 specific interface to underlying
 * cluster stacks.
 *
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation, version 2.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */

21 22 23
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
24
#include <linux/slab.h>
25
#include <linux/kmod.h>
26 27 28
#include <linux/fs.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
29
#include <linux/sysctl.h>
30

31 32
#include "ocfs2_fs.h"

33
#include "stackglue.h"
34

35 36
#define OCFS2_STACK_PLUGIN_O2CB		"o2cb"
#define OCFS2_STACK_PLUGIN_USER		"user"
37
#define OCFS2_MAX_HB_CTL_PATH		256
38

39 40 41
static struct ocfs2_locking_protocol *lproto;
static DEFINE_SPINLOCK(ocfs2_stack_lock);
static LIST_HEAD(ocfs2_stack_list);
42
static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1];
43
static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
44

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*
 * The stack currently in use.  If not null, active_stack->sp_count > 0,
 * the module is pinned, and the locking protocol cannot be changed.
 */
static struct ocfs2_stack_plugin *active_stack;

static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{
	struct ocfs2_stack_plugin *p;

	assert_spin_locked(&ocfs2_stack_lock);

	list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
		if (!strcmp(p->sp_name, name))
			return p;
	}

	return NULL;
}

65 66
static int ocfs2_stack_driver_request(const char *stack_name,
				      const char *plugin_name)
67 68 69 70 71 72
{
	int rc;
	struct ocfs2_stack_plugin *p;

	spin_lock(&ocfs2_stack_lock);

73 74 75 76 77 78 79 80 81
	/*
	 * If the stack passed by the filesystem isn't the selected one,
	 * we can't continue.
	 */
	if (strcmp(stack_name, cluster_stack_name)) {
		rc = -EBUSY;
		goto out;
	}

82 83 84 85 86
	if (active_stack) {
		/*
		 * If the active stack isn't the one we want, it cannot
		 * be selected right now.
		 */
87
		if (!strcmp(active_stack->sp_name, plugin_name))
88 89 90 91 92 93
			rc = 0;
		else
			rc = -EBUSY;
		goto out;
	}

94
	p = ocfs2_stack_lookup(plugin_name);
95 96 97 98 99 100 101 102 103
	if (!p || !try_module_get(p->sp_owner)) {
		rc = -ENOENT;
		goto out;
	}

	active_stack = p;
	rc = 0;

out:
104 105 106 107
	/* If we found it, pin it */
	if (!rc)
		active_stack->sp_count++;

108 109 110 111 112 113 114 115 116
	spin_unlock(&ocfs2_stack_lock);
	return rc;
}

/*
 * This function looks up the appropriate stack and makes it active.  If
 * there is no stack, it tries to load it.  It will fail if the stack still
 * cannot be found.  It will also fail if a different stack is in use.
 */
117
static int ocfs2_stack_driver_get(const char *stack_name)
118 119
{
	int rc;
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
	char *plugin_name = OCFS2_STACK_PLUGIN_O2CB;

	/*
	 * Classic stack does not pass in a stack name.  This is
	 * compatible with older tools as well.
	 */
	if (!stack_name || !*stack_name)
		stack_name = OCFS2_STACK_PLUGIN_O2CB;

	if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) {
		printk(KERN_ERR
		       "ocfs2 passed an invalid cluster stack label: \"%s\"\n",
		       stack_name);
		return -EINVAL;
	}
135

136 137 138 139 140
	/* Anything that isn't the classic stack is a user stack */
	if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB))
		plugin_name = OCFS2_STACK_PLUGIN_USER;

	rc = ocfs2_stack_driver_request(stack_name, plugin_name);
141
	if (rc == -ENOENT) {
142 143
		request_module("ocfs2_stack_%s", plugin_name);
		rc = ocfs2_stack_driver_request(stack_name, plugin_name);
144 145 146 147 148
	}

	if (rc == -ENOENT) {
		printk(KERN_ERR
		       "ocfs2: Cluster stack driver \"%s\" cannot be found\n",
149
		       plugin_name);
150 151
	} else if (rc == -EBUSY) {
		printk(KERN_ERR
152
		       "ocfs2: A different cluster stack is in use\n");
153 154 155 156
	}

	return rc;
}
157

158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
static void ocfs2_stack_driver_put(void)
{
	spin_lock(&ocfs2_stack_lock);
	BUG_ON(active_stack == NULL);
	BUG_ON(active_stack->sp_count == 0);

	active_stack->sp_count--;
	if (!active_stack->sp_count) {
		module_put(active_stack->sp_owner);
		active_stack = NULL;
	}
	spin_unlock(&ocfs2_stack_lock);
}

int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin)
{
	int rc;

	spin_lock(&ocfs2_stack_lock);
	if (!ocfs2_stack_lookup(plugin->sp_name)) {
		plugin->sp_count = 0;
		plugin->sp_proto = lproto;
		list_add(&plugin->sp_list, &ocfs2_stack_list);
		printk(KERN_INFO "ocfs2: Registered cluster interface %s\n",
		       plugin->sp_name);
		rc = 0;
	} else {
		printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n",
		       plugin->sp_name);
		rc = -EEXIST;
	}
	spin_unlock(&ocfs2_stack_lock);

	return rc;
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register);

void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin)
{
	struct ocfs2_stack_plugin *p;

	spin_lock(&ocfs2_stack_lock);
	p = ocfs2_stack_lookup(plugin->sp_name);
	if (p) {
		BUG_ON(p != plugin);
		BUG_ON(plugin == active_stack);
		BUG_ON(plugin->sp_count != 0);
		list_del_init(&plugin->sp_list);
		printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n",
		       plugin->sp_name);
	} else {
		printk(KERN_ERR "Stack \"%s\" is not registered\n",
		       plugin->sp_name);
	}
	spin_unlock(&ocfs2_stack_lock);
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister);

void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto)
{
	struct ocfs2_stack_plugin *p;

	BUG_ON(proto == NULL);

	spin_lock(&ocfs2_stack_lock);
	BUG_ON(active_stack != NULL);

	lproto = proto;
	list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
		p->sp_proto = lproto;
	}

	spin_unlock(&ocfs2_stack_lock);
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol);
233

234

D
David Teigland 已提交
235 236 237 238 239 240 241
/*
 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take
 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the
 * underlying stack plugins need to pilfer the lksb off of the lock_res.
 * If some other structure needs to be passed as an astarg, the plugins
 * will need to be given a different avenue to the lksb.
 */
242 243 244 245 246 247
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
		   int mode,
		   union ocfs2_dlm_lksb *lksb,
		   u32 flags,
		   void *name,
		   unsigned int namelen,
D
David Teigland 已提交
248
		   struct ocfs2_lock_res *astarg)
249
{
250
	BUG_ON(lproto == NULL);
251

252 253
	return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags,
					      name, namelen, astarg);
254
}
255
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock);
256

257 258 259
int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
		     union ocfs2_dlm_lksb *lksb,
		     u32 flags,
D
David Teigland 已提交
260
		     struct ocfs2_lock_res *astarg)
261
{
262
	BUG_ON(lproto == NULL);
263

264
	return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg);
265
}
266
EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock);
267

268 269
int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
{
270
	return active_stack->sp_ops->lock_status(lksb);
271
}
272
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
273

274 275 276 277 278
/*
 * Why don't we cast to ocfs2_meta_lvb?  The "clean" answer is that we
 * don't cast at the glue level.  The real answer is that the header
 * ordering is nigh impossible.
 */
279 280
void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
{
281
	return active_stack->sp_ops->lock_lvb(lksb);
282
}
283
EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb);
284

285 286
void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
{
287
	active_stack->sp_ops->dump_lksb(lksb);
288
}
289
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
290

291 292
int ocfs2_cluster_connect(const char *stack_name,
			  const char *group,
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
			  int grouplen,
			  void (*recovery_handler)(int node_num,
						   void *recovery_data),
			  void *recovery_data,
			  struct ocfs2_cluster_connection **conn)
{
	int rc = 0;
	struct ocfs2_cluster_connection *new_conn;

	BUG_ON(group == NULL);
	BUG_ON(conn == NULL);
	BUG_ON(recovery_handler == NULL);

	if (grouplen > GROUP_NAME_MAX) {
		rc = -EINVAL;
		goto out;
	}

	new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection),
			   GFP_KERNEL);
	if (!new_conn) {
		rc = -ENOMEM;
		goto out;
	}

	memcpy(new_conn->cc_name, group, grouplen);
	new_conn->cc_namelen = grouplen;
	new_conn->cc_recovery_handler = recovery_handler;
	new_conn->cc_recovery_data = recovery_data;

	/* Start the new connection at our maximum compatibility level */
324 325 326
	new_conn->cc_version = lproto->lp_max_version;

	/* This will pin the stack driver if successful */
327
	rc = ocfs2_stack_driver_get(stack_name);
328 329
	if (rc)
		goto out_free;
330

331
	rc = active_stack->sp_ops->connect(new_conn);
332
	if (rc) {
333
		ocfs2_stack_driver_put();
334 335 336 337 338 339
		goto out_free;
	}

	*conn = new_conn;

out_free:
340
	if (rc)
341 342 343 344 345
		kfree(new_conn);

out:
	return rc;
}
346
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect);
347

348 349 350
/* If hangup_pending is 0, the stack driver will be dropped */
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
			     int hangup_pending)
351 352 353 354 355
{
	int ret;

	BUG_ON(conn == NULL);

356
	ret = active_stack->sp_ops->disconnect(conn);
357 358

	/* XXX Should we free it anyway? */
359
	if (!ret) {
360
		kfree(conn);
361 362 363
		if (!hangup_pending)
			ocfs2_stack_driver_put();
	}
364 365 366

	return ret;
}
367
EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect);
368

369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
/*
 * Leave the group for this filesystem.  This is executed by a userspace
 * program (stored in ocfs2_hb_ctl_path).
 */
static void ocfs2_leave_group(const char *group)
{
	int ret;
	char *argv[5], *envp[3];

	argv[0] = ocfs2_hb_ctl_path;
	argv[1] = "-K";
	argv[2] = "-u";
	argv[3] = (char *)group;
	argv[4] = NULL;

	/* minimal command environment taken from cpu_run_sbin_hotplug */
	envp[0] = "HOME=/";
	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
	envp[2] = NULL;

	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
	if (ret < 0) {
		printk(KERN_ERR
		       "ocfs2: Error %d running user helper "
		       "\"%s %s %s %s\"\n",
		       ret, argv[0], argv[1], argv[2], argv[3]);
	}
}

/*
 * Hangup is a required post-umount.  ocfs2-tools software expects the
 * filesystem to call "ocfs2_hb_ctl" during unmount.  This happens
 * regardless of whether the DLM got started, so we can't do it
 * in ocfs2_cluster_disconnect().  The ocfs2_leave_group() function does
 * the actual work.
 */
405 406 407 408 409
void ocfs2_cluster_hangup(const char *group, int grouplen)
{
	BUG_ON(group == NULL);
	BUG_ON(group[grouplen] != '\0');

410 411
	ocfs2_leave_group(group);

412 413
	/* cluster_disconnect() was called with hangup_pending==1 */
	ocfs2_stack_driver_put();
414
}
415
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
416

417 418
int ocfs2_cluster_this_node(unsigned int *node)
{
419
	return active_stack->sp_ops->this_node(node);
420
}
421
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
422

423

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
/*
 * Sysfs bits
 */

static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
					       struct kobj_attribute *attr,
					       char *buf)
{
	ssize_t ret = 0;

	spin_lock(&ocfs2_stack_lock);
	if (lproto)
		ret = snprintf(buf, PAGE_SIZE, "%u.%u\n",
			       lproto->lp_max_version.pv_major,
			       lproto->lp_max_version.pv_minor);
	spin_unlock(&ocfs2_stack_lock);

	return ret;
}

static struct kobj_attribute ocfs2_attr_max_locking_protocol =
	__ATTR(max_locking_protocol, S_IFREG | S_IRUGO,
	       ocfs2_max_locking_protocol_show, NULL);

static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
						 struct kobj_attribute *attr,
						 char *buf)
451
{
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
	struct ocfs2_stack_plugin *p;

	spin_lock(&ocfs2_stack_lock);
	list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
		ret = snprintf(buf, remain, "%s\n",
			       p->sp_name);
		if (ret < 0) {
			total = ret;
			break;
		}
		if (ret == remain) {
			/* snprintf() didn't fit */
			total = -E2BIG;
			break;
		}
		total += ret;
		remain -= ret;
	}
	spin_unlock(&ocfs2_stack_lock);

	return total;
}

static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
	__ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO,
	       ocfs2_loaded_cluster_plugins_show, NULL);

static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
						struct kobj_attribute *attr,
						char *buf)
{
	ssize_t ret = 0;

	spin_lock(&ocfs2_stack_lock);
	if (active_stack) {
		ret = snprintf(buf, PAGE_SIZE, "%s\n",
			       active_stack->sp_name);
		if (ret == PAGE_SIZE)
			ret = -E2BIG;
	}
	spin_unlock(&ocfs2_stack_lock);

	return ret;
}

static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
	__ATTR(active_cluster_plugin, S_IFREG | S_IRUGO,
	       ocfs2_active_cluster_plugin_show, NULL);

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
					struct kobj_attribute *attr,
					char *buf)
{
	ssize_t ret;
	spin_lock(&ocfs2_stack_lock);
	ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name);
	spin_unlock(&ocfs2_stack_lock);

	return ret;
}

static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
					 struct kobj_attribute *attr,
					 const char *buf, size_t count)
{
	size_t len = count;
	ssize_t ret;

	if (len == 0)
		return len;

	if (buf[len - 1] == '\n')
		len--;

	if ((len != OCFS2_STACK_LABEL_LEN) ||
	    (strnlen(buf, len) != len))
		return -EINVAL;

	spin_lock(&ocfs2_stack_lock);
	if (active_stack) {
		if (!strncmp(buf, cluster_stack_name, len))
			ret = count;
		else
			ret = -EBUSY;
	} else {
		memcpy(cluster_stack_name, buf, len);
		ret = count;
	}
	spin_unlock(&ocfs2_stack_lock);

	return ret;
}


static struct kobj_attribute ocfs2_attr_cluster_stack =
	__ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR,
	       ocfs2_cluster_stack_show,
	       ocfs2_cluster_stack_store);

552 553 554 555
static struct attribute *ocfs2_attrs[] = {
	&ocfs2_attr_max_locking_protocol.attr,
	&ocfs2_attr_loaded_cluster_plugins.attr,
	&ocfs2_attr_active_cluster_plugin.attr,
556
	&ocfs2_attr_cluster_stack.attr,
557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
	NULL,
};

static struct attribute_group ocfs2_attr_group = {
	.attrs = ocfs2_attrs,
};

static struct kset *ocfs2_kset;

static void ocfs2_sysfs_exit(void)
{
	kset_unregister(ocfs2_kset);
}

static int ocfs2_sysfs_init(void)
{
	int ret;

	ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj);
	if (!ocfs2_kset)
		return -ENOMEM;

	ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group);
	if (ret)
		goto error;

583
	return 0;
584 585 586 587 588 589

error:
	kset_unregister(ocfs2_kset);
	return ret;
}

590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
/*
 * Sysctl bits
 *
 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path.  The 'nm' doesn't
 * make as much sense in a multiple cluster stack world, but it's safer
 * and easier to preserve the name.
 */

#define FS_OCFS2_NM		1

static ctl_table ocfs2_nm_table[] = {
	{
		.ctl_name	= 1,
		.procname	= "hb_ctl_path",
		.data		= ocfs2_hb_ctl_path,
		.maxlen		= OCFS2_MAX_HB_CTL_PATH,
		.mode		= 0644,
		.proc_handler	= &proc_dostring,
		.strategy	= &sysctl_string,
	},
	{ .ctl_name = 0 }
};

static ctl_table ocfs2_mod_table[] = {
	{
		.ctl_name	= FS_OCFS2_NM,
		.procname	= "nm",
		.data		= NULL,
		.maxlen		= 0,
		.mode		= 0555,
		.child		= ocfs2_nm_table
	},
	{ .ctl_name = 0}
};

static ctl_table ocfs2_kern_table[] = {
	{
		.ctl_name	= FS_OCFS2,
		.procname	= "ocfs2",
		.data		= NULL,
		.maxlen		= 0,
		.mode		= 0555,
		.child		= ocfs2_mod_table
	},
	{ .ctl_name = 0}
};

static ctl_table ocfs2_root_table[] = {
	{
		.ctl_name	= CTL_FS,
		.procname	= "fs",
		.data		= NULL,
		.maxlen		= 0,
		.mode		= 0555,
		.child		= ocfs2_kern_table
	},
	{ .ctl_name = 0 }
};

static struct ctl_table_header *ocfs2_table_header = NULL;


/*
 * Initialization
 */

656 657
static int __init ocfs2_stack_glue_init(void)
{
658 659
	strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);

660 661 662 663 664 665 666
	ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
	if (!ocfs2_table_header) {
		printk(KERN_ERR
		       "ocfs2 stack glue: unable to register sysctl\n");
		return -ENOMEM; /* or something. */
	}

667
	return ocfs2_sysfs_init();
668
}
669

670 671 672
static void __exit ocfs2_stack_glue_exit(void)
{
	lproto = NULL;
673
	ocfs2_sysfs_exit();
674 675
	if (ocfs2_table_header)
		unregister_sysctl_table(ocfs2_table_header);
676 677
}

678 679 680 681 682
MODULE_AUTHOR("Oracle");
MODULE_DESCRIPTION("ocfs2 cluter stack glue layer");
MODULE_LICENSE("GPL");
module_init(ocfs2_stack_glue_init);
module_exit(ocfs2_stack_glue_exit);