md-cluster.c 6.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Copyright (C) 2015, SUSE
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 */


#include <linux/module.h>
13 14 15
#include <linux/dlm.h>
#include <linux/sched.h>
#include "md.h"
16
#include "md-cluster.h"
17 18 19 20 21 22 23 24 25

#define LVB_SIZE	64

struct dlm_lock_resource {
	dlm_lockspace_t *ls;
	struct dlm_lksb lksb;
	char *name; /* lock name. */
	uint32_t flags; /* flags to pass to dlm_lock() */
	struct completion completion; /* completion for synchronized locking */
G
Goldwyn Rodrigues 已提交
26 27 28 29 30 31 32
	void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
	struct mddev *mddev; /* pointing back to mddev. */
};

struct md_cluster_info {
	/* dlm lock space and resources for clustered raid. */
	dlm_lockspace_t *lockspace;
G
Goldwyn Rodrigues 已提交
33 34
	int slot_number;
	struct completion completion;
G
Goldwyn Rodrigues 已提交
35 36
	struct dlm_lock_resource *sb_lock;
	struct mutex sb_mutex;
37
	struct dlm_lock_resource *bitmap_lockres;
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
};

static void sync_ast(void *arg)
{
	struct dlm_lock_resource *res;

	res = (struct dlm_lock_resource *) arg;
	complete(&res->completion);
}

static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
{
	int ret = 0;

	init_completion(&res->completion);
	ret = dlm_lock(res->ls, mode, &res->lksb,
			res->flags, res->name, strlen(res->name),
			0, sync_ast, res, res->bast);
	if (ret)
		return ret;
	wait_for_completion(&res->completion);
	return res->lksb.sb_status;
}

static int dlm_unlock_sync(struct dlm_lock_resource *res)
{
	return dlm_lock_sync(res, DLM_LOCK_NL);
}

G
Goldwyn Rodrigues 已提交
67
static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
68 69 70 71
		char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
{
	struct dlm_lock_resource *res = NULL;
	int ret, namelen;
G
Goldwyn Rodrigues 已提交
72
	struct md_cluster_info *cinfo = mddev->cluster_info;
73 74 75 76

	res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
	if (!res)
		return NULL;
G
Goldwyn Rodrigues 已提交
77 78
	res->ls = cinfo->lockspace;
	res->mddev = mddev;
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
	namelen = strlen(name);
	res->name = kzalloc(namelen + 1, GFP_KERNEL);
	if (!res->name) {
		pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
		goto out_err;
	}
	strlcpy(res->name, name, namelen + 1);
	if (with_lvb) {
		res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
		if (!res->lksb.sb_lvbptr) {
			pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
			goto out_err;
		}
		res->flags = DLM_LKF_VALBLK;
	}

	if (bastfn)
		res->bast = bastfn;

	res->flags |= DLM_LKF_EXPEDITE;

	ret = dlm_lock_sync(res, DLM_LOCK_NL);
	if (ret) {
		pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
		goto out_err;
	}
	res->flags &= ~DLM_LKF_EXPEDITE;
	res->flags |= DLM_LKF_CONVERT;

	return res;
out_err:
	kfree(res->lksb.sb_lvbptr);
	kfree(res->name);
	kfree(res);
	return NULL;
}

static void lockres_free(struct dlm_lock_resource *res)
{
	if (!res)
		return;

	init_completion(&res->completion);
	dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
	wait_for_completion(&res->completion);

	kfree(res->name);
	kfree(res->lksb.sb_lvbptr);
	kfree(res);
}
129

G
Goldwyn Rodrigues 已提交
130 131 132 133 134 135 136 137 138 139 140 141
static char *pretty_uuid(char *dest, char *src)
{
	int i, len = 0;

	for (i = 0; i < 16; i++) {
		if (i == 4 || i == 6 || i == 8 || i == 10)
			len += sprintf(dest + len, "-");
		len += sprintf(dest + len, "%02x", (__u8)src[i]);
	}
	return dest;
}

G
Goldwyn Rodrigues 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
static void recover_prep(void *arg)
{
}

static void recover_slot(void *arg, struct dlm_slot *slot)
{
	struct mddev *mddev = arg;
	struct md_cluster_info *cinfo = mddev->cluster_info;

	pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
			mddev->bitmap_info.cluster_name,
			slot->nodeid, slot->slot,
			cinfo->slot_number);
}

static void recover_done(void *arg, struct dlm_slot *slots,
		int num_slots, int our_slot,
		uint32_t generation)
{
	struct mddev *mddev = arg;
	struct md_cluster_info *cinfo = mddev->cluster_info;

	cinfo->slot_number = our_slot;
	complete(&cinfo->completion);
}

static const struct dlm_lockspace_ops md_ls_ops = {
	.recover_prep = recover_prep,
	.recover_slot = recover_slot,
	.recover_done = recover_done,
};

174 175
static int join(struct mddev *mddev, int nodes)
{
G
Goldwyn Rodrigues 已提交
176
	struct md_cluster_info *cinfo;
G
Goldwyn Rodrigues 已提交
177
	int ret, ops_rv;
G
Goldwyn Rodrigues 已提交
178 179 180 181 182 183 184 185 186
	char str[64];

	if (!try_module_get(THIS_MODULE))
		return -ENOENT;

	cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
	if (!cinfo)
		return -ENOMEM;

G
Goldwyn Rodrigues 已提交
187 188 189 190 191
	init_completion(&cinfo->completion);

	mutex_init(&cinfo->sb_mutex);
	mddev->cluster_info = cinfo;

G
Goldwyn Rodrigues 已提交
192 193
	memset(str, 0, 64);
	pretty_uuid(str, mddev->uuid);
G
Goldwyn Rodrigues 已提交
194 195 196
	ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
				DLM_LSFL_FS, LVB_SIZE,
				&md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
G
Goldwyn Rodrigues 已提交
197 198
	if (ret)
		goto err;
G
Goldwyn Rodrigues 已提交
199
	wait_for_completion(&cinfo->completion);
200 201 202 203 204 205
	if (nodes <= cinfo->slot_number) {
		pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo->slot_number - 1,
			nodes);
		ret = -ERANGE;
		goto err;
	}
G
Goldwyn Rodrigues 已提交
206 207 208 209 210 211
	cinfo->sb_lock = lockres_init(mddev, "cmd-super",
					NULL, 0);
	if (!cinfo->sb_lock) {
		ret = -ENOMEM;
		goto err;
	}
212 213 214 215 216 217 218 219 220 221 222 223

	pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
	snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
	cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
	if (!cinfo->bitmap_lockres)
		goto err;
	if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
		pr_err("Failed to get bitmap lock\n");
		ret = -EINVAL;
		goto err;
	}

224
	return 0;
G
Goldwyn Rodrigues 已提交
225 226 227
err:
	if (cinfo->lockspace)
		dlm_release_lockspace(cinfo->lockspace, 2);
G
Goldwyn Rodrigues 已提交
228
	mddev->cluster_info = NULL;
G
Goldwyn Rodrigues 已提交
229 230 231
	kfree(cinfo);
	module_put(THIS_MODULE);
	return ret;
232 233 234 235
}

static int leave(struct mddev *mddev)
{
G
Goldwyn Rodrigues 已提交
236 237 238 239 240
	struct md_cluster_info *cinfo = mddev->cluster_info;

	if (!cinfo)
		return 0;
	lockres_free(cinfo->sb_lock);
241
	lockres_free(cinfo->bitmap_lockres);
G
Goldwyn Rodrigues 已提交
242
	dlm_release_lockspace(cinfo->lockspace, 2);
243 244 245
	return 0;
}

G
Goldwyn Rodrigues 已提交
246 247 248 249 250 251 252 253 254 255 256
/* slot_number(): Returns the MD slot number to use
 * DLM starts the slot numbers from 1, wheras cluster-md
 * wants the number to be from zero, so we deduct one
 */
static int slot_number(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;

	return cinfo->slot_number - 1;
}

257 258 259
static struct md_cluster_operations cluster_ops = {
	.join   = join,
	.leave  = leave,
G
Goldwyn Rodrigues 已提交
260
	.slot_number = slot_number,
261 262
};

263 264 265 266
static int __init cluster_init(void)
{
	pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
	pr_info("Registering Cluster MD functions\n");
267
	register_md_cluster_operations(&cluster_ops, THIS_MODULE);
268 269 270 271 272
	return 0;
}

static void cluster_exit(void)
{
273
	unregister_md_cluster_operations();
274 275 276 277 278 279
}

module_init(cluster_init);
module_exit(cluster_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Clustering support for MD");