plock.c 8.8 KB
Newer Older
1 2 3 4 5
/*
 * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
6
 * of the GNU General Public License version 2.
7 8 9 10
 */

#include <linux/miscdevice.h>
#include <linux/lock_dlm_plock.h>
A
Al Viro 已提交
11
#include <linux/poll.h>
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

#include "lock_dlm.h"


static spinlock_t ops_lock;
static struct list_head send_list;
static struct list_head recv_list;
static wait_queue_head_t send_wq;
static wait_queue_head_t recv_wq;

struct plock_op {
	struct list_head list;
	int done;
	struct gdlm_plock_info info;
};

M
Marc Eshel 已提交
28 29 30 31 32 33 34 35 36
struct plock_xop {
	struct plock_op xop;
	void *callback;
	void *fl;
	void *file;
	struct file_lock flc;
};


37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static inline void set_version(struct gdlm_plock_info *info)
{
	info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
	info->version[1] = GDLM_PLOCK_VERSION_MINOR;
	info->version[2] = GDLM_PLOCK_VERSION_PATCH;
}

static int check_version(struct gdlm_plock_info *info)
{
	if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
	    (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
		log_error("plock device version mismatch: "
			  "kernel (%u.%u.%u), user (%u.%u.%u)",
			  GDLM_PLOCK_VERSION_MAJOR,
			  GDLM_PLOCK_VERSION_MINOR,
			  GDLM_PLOCK_VERSION_PATCH,
			  info->version[0],
			  info->version[1],
			  info->version[2]);
		return -EINVAL;
	}
	return 0;
}

static void send_op(struct plock_op *op)
{
	set_version(&op->info);
	INIT_LIST_HEAD(&op->list);
	spin_lock(&ops_lock);
	list_add_tail(&op->list, &send_list);
	spin_unlock(&ops_lock);
	wake_up(&send_wq);
}

71
int gdlm_plock(void *lockspace, struct lm_lockname *name,
72 73
	       struct file *file, int cmd, struct file_lock *fl)
{
74
	struct gdlm_ls *ls = lockspace;
75
	struct plock_op *op;
M
Marc Eshel 已提交
76
	struct plock_xop *xop;
77 78
	int rv;

M
Marc Eshel 已提交
79 80
	xop = kzalloc(sizeof(*xop), GFP_KERNEL);
	if (!xop)
81 82
		return -ENOMEM;

M
Marc Eshel 已提交
83
	op = &xop->xop;
84
	op->info.optype		= GDLM_PLOCK_OP_LOCK;
85
	op->info.pid		= fl->fl_pid;
86 87 88 89 90 91
	op->info.ex		= (fl->fl_type == F_WRLCK);
	op->info.wait		= IS_SETLKW(cmd);
	op->info.fsid		= ls->id;
	op->info.number		= name->ln_number;
	op->info.start		= fl->fl_start;
	op->info.end		= fl->fl_end;
D
David Teigland 已提交
92
	op->info.owner		= (__u64)(long) fl->fl_owner;
M
Marc Eshel 已提交
93 94 95 96 97 98 99 100
	if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
		xop->callback	= fl->fl_lmops->fl_grant;
		locks_init_lock(&xop->flc);
		locks_copy_lock(&xop->flc, fl);
		xop->fl		= fl;
		xop->file	= file;
	} else
		xop->callback	= NULL;
101 102

	send_op(op);
M
Marc Eshel 已提交
103 104 105 106 107

	if (xop->callback == NULL)
		wait_event(recv_wq, (op->done != 0));
	else
		return -EINPROGRESS;
108 109 110

	spin_lock(&ops_lock);
	if (!list_empty(&op->list)) {
111
		printk(KERN_INFO "plock op on list\n");
112 113 114 115 116 117 118 119 120
		list_del(&op->list);
	}
	spin_unlock(&ops_lock);

	rv = op->info.rv;

	if (!rv) {
		if (posix_lock_file_wait(file, fl) < 0)
			log_error("gdlm_plock: vfs lock error %x,%llx",
121 122
				  name->ln_type,
				  (unsigned long long)name->ln_number);
123 124
	}

M
Marc Eshel 已提交
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
	kfree(xop);
	return rv;
}

/* Returns failure iff a succesful lock operation should be canceled */
static int gdlm_plock_callback(struct plock_op *op)
{
	struct file *file;
	struct file_lock *fl;
	struct file_lock *flc;
	int (*notify)(void *, void *, int) = NULL;
	struct plock_xop *xop = (struct plock_xop *)op;
	int rv = 0;

	spin_lock(&ops_lock);
	if (!list_empty(&op->list)) {
		printk(KERN_INFO "plock op on list\n");
		list_del(&op->list);
	}
	spin_unlock(&ops_lock);

	/* check if the following 2 are still valid or make a copy */
	file = xop->file;
	flc = &xop->flc;
	fl = xop->fl;
	notify = xop->callback;

	if (op->info.rv) {
		notify(flc, NULL, op->info.rv);
		goto out;
	}

	/* got fs lock; bookkeep locally as well: */
	flc->fl_flags &= ~FL_SLEEP;
	if (posix_lock_file(file, flc, NULL)) {
		/*
		 * This can only happen in the case of kmalloc() failure.
		 * The filesystem's own lock is the authoritative lock,
		 * so a failure to get the lock locally is not a disaster.
		 * As long as GFS cannot reliably cancel locks (especially
		 * in a low-memory situation), we're better off ignoring
		 * this failure than trying to recover.
		 */
		log_error("gdlm_plock: vfs lock error file %p fl %p",
				file, fl);
	}

	rv = notify(flc, NULL, 0);
	if (rv) {
		/* XXX: We need to cancel the fs lock here: */
		printk("gfs2 lock granted after lock request failed;"
						" dangling lock!\n");
		goto out;
	}

out:
	kfree(xop);
182 183 184
	return rv;
}

185
int gdlm_punlock(void *lockspace, struct lm_lockname *name,
186 187
		 struct file *file, struct file_lock *fl)
{
188
	struct gdlm_ls *ls = lockspace;
189 190 191 192 193 194 195 196 197
	struct plock_op *op;
	int rv;

	op = kzalloc(sizeof(*op), GFP_KERNEL);
	if (!op)
		return -ENOMEM;

	if (posix_lock_file_wait(file, fl) < 0)
		log_error("gdlm_punlock: vfs unlock error %x,%llx",
198
			  name->ln_type, (unsigned long long)name->ln_number);
199 200

	op->info.optype		= GDLM_PLOCK_OP_UNLOCK;
201
	op->info.pid		= fl->fl_pid;
202 203 204 205
	op->info.fsid		= ls->id;
	op->info.number		= name->ln_number;
	op->info.start		= fl->fl_start;
	op->info.end		= fl->fl_end;
D
David Teigland 已提交
206
	op->info.owner		= (__u64)(long) fl->fl_owner;
207 208 209 210 211 212

	send_op(op);
	wait_event(recv_wq, (op->done != 0));

	spin_lock(&ops_lock);
	if (!list_empty(&op->list)) {
213
		printk(KERN_INFO "punlock op on list\n");
214 215 216 217 218 219
		list_del(&op->list);
	}
	spin_unlock(&ops_lock);

	rv = op->info.rv;

M
Marc Eshel 已提交
220 221 222
	if (rv == -ENOENT)
		rv = 0;

223 224 225 226
	kfree(op);
	return rv;
}

227
int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
228 229
		   struct file *file, struct file_lock *fl)
{
230
	struct gdlm_ls *ls = lockspace;
231 232 233 234 235 236 237 238
	struct plock_op *op;
	int rv;

	op = kzalloc(sizeof(*op), GFP_KERNEL);
	if (!op)
		return -ENOMEM;

	op->info.optype		= GDLM_PLOCK_OP_GET;
239
	op->info.pid		= fl->fl_pid;
240 241 242 243 244
	op->info.ex		= (fl->fl_type == F_WRLCK);
	op->info.fsid		= ls->id;
	op->info.number		= name->ln_number;
	op->info.start		= fl->fl_start;
	op->info.end		= fl->fl_end;
245
	op->info.owner		= (__u64)(long) fl->fl_owner;
M
Marc Eshel 已提交
246

247 248 249 250 251
	send_op(op);
	wait_event(recv_wq, (op->done != 0));

	spin_lock(&ops_lock);
	if (!list_empty(&op->list)) {
252
		printk(KERN_INFO "plock_get op on list\n");
253 254 255 256
		list_del(&op->list);
	}
	spin_unlock(&ops_lock);

257 258 259
	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
	   -ENOENT if there are no locks on the file */

260 261
	rv = op->info.rv;

M
Marc Eshel 已提交
262 263 264
	fl->fl_type = F_UNLCK;
	if (rv == -ENOENT)
		rv = 0;
265
	else if (rv > 0) {
266 267 268 269
		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
		fl->fl_pid = op->info.pid;
		fl->fl_start = op->info.start;
		fl->fl_end = op->info.end;
270
		rv = 0;
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
	}

	kfree(op);
	return rv;
}

/* a read copies out one plock request from the send list */
static ssize_t dev_read(struct file *file, char __user *u, size_t count,
			loff_t *ppos)
{
	struct gdlm_plock_info info;
	struct plock_op *op = NULL;

	if (count < sizeof(info))
		return -EINVAL;

	spin_lock(&ops_lock);
	if (!list_empty(&send_list)) {
		op = list_entry(send_list.next, struct plock_op, list);
		list_move(&op->list, &recv_list);
		memcpy(&info, &op->info, sizeof(info));
	}
	spin_unlock(&ops_lock);

	if (!op)
		return -EAGAIN;

	if (copy_to_user(u, &info, sizeof(info)))
		return -EFAULT;
	return sizeof(info);
}

/* a write copies in one plock result that should match a plock_op
   on the recv list */
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
			 loff_t *ppos)
{
	struct gdlm_plock_info info;
	struct plock_op *op;
	int found = 0;

	if (count != sizeof(info))
		return -EINVAL;

	if (copy_from_user(&info, u, sizeof(info)))
		return -EFAULT;

	if (check_version(&info))
		return -EINVAL;

	spin_lock(&ops_lock);
	list_for_each_entry(op, &recv_list, list) {
323
		if (op->info.fsid == info.fsid && op->info.number == info.number &&
324
		    op->info.owner == info.owner) {
325 326 327 328 329 330 331 332 333
			list_del_init(&op->list);
			found = 1;
			op->done = 1;
			memcpy(&op->info, &info, sizeof(info));
			break;
		}
	}
	spin_unlock(&ops_lock);

M
Marc Eshel 已提交
334 335 336 337 338 339 340 341
	if (found) {
		struct plock_xop *xop;
		xop = (struct plock_xop *)op;
		if (xop->callback)
			count = gdlm_plock_callback(op);
		else
			wake_up(&recv_wq);
	} else
342
		printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
343
			(unsigned long long)info.number);
344 345 346 347 348
	return count;
}

static unsigned int dev_poll(struct file *file, poll_table *wait)
{
349 350
	unsigned int mask = 0;

351 352 353
	poll_wait(file, &send_wq, wait);

	spin_lock(&ops_lock);
354 355
	if (!list_empty(&send_list))
		mask = POLLIN | POLLRDNORM;
356
	spin_unlock(&ops_lock);
357 358

	return mask;
359 360
}

361
static const struct file_operations dev_fops = {
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
	.read    = dev_read,
	.write   = dev_write,
	.poll    = dev_poll,
	.owner   = THIS_MODULE
};

static struct miscdevice plock_dev_misc = {
	.minor = MISC_DYNAMIC_MINOR,
	.name = GDLM_PLOCK_MISC_NAME,
	.fops = &dev_fops
};

int gdlm_plock_init(void)
{
	int rv;

	spin_lock_init(&ops_lock);
	INIT_LIST_HEAD(&send_list);
	INIT_LIST_HEAD(&recv_list);
	init_waitqueue_head(&send_wq);
	init_waitqueue_head(&recv_wq);

	rv = misc_register(&plock_dev_misc);
	if (rv)
386 387
		printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
		       rv);
388 389 390 391 392 393
	return rv;
}

void gdlm_plock_exit(void)
{
	if (misc_deregister(&plock_dev_misc) < 0)
394
		printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
395 396
}