bitmap.c 55.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
 *
 * bitmap_create  - sets up the bitmap structure
 * bitmap_destroy - destroys the bitmap structure
 *
 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
 * - added disk storage for bitmap
 * - changes to allow various bitmap chunk sizes
 */

/*
 * Still to do:
 *
 * flush after percent set rather than just time based. (maybe both).
 */

18
#include <linux/blkdev.h>
19 20 21 22 23 24 25 26 27 28
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/timer.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
29
#include <linux/seq_file.h>
30
#include "md.h"
31
#include "bitmap.h"
32

33
static inline char *bmname(struct bitmap *bitmap)
34 35 36 37 38 39 40 41 42 43 44
{
	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
}

/*
 * just a placeholder - calls kmalloc for bitmap pages
 */
static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
{
	unsigned char *page;

45
	page = kzalloc(PAGE_SIZE, GFP_NOIO);
46 47 48
	if (!page)
		printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
	else
49 50
		pr_debug("%s: bitmap_alloc_page: allocated page at %p\n",
			 bmname(bitmap), page);
51 52 53 54 55 56 57 58
	return page;
}

/*
 * for now just a placeholder -- just calls kfree for bitmap pages
 */
static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
{
59
	pr_debug("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
60 61 62 63 64 65 66 67 68 69 70 71 72
	kfree(page);
}

/*
 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
 *
 * 1) check to see if this page is allocated, if it's not then try to alloc
 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
 *    page pointer directly as a counter
 *
 * if we find our page, we increment the page's refcount so that it stays
 * allocated while we're using it
 */
73 74
static int bitmap_checkpage(struct bitmap *bitmap,
			    unsigned long page, int create)
75 76
__releases(bitmap->lock)
__acquires(bitmap->lock)
77 78 79 80
{
	unsigned char *mappage;

	if (page >= bitmap->pages) {
81 82 83 84
		/* This can happen if bitmap_start_sync goes beyond
		 * End-of-device while looking for a whole page.
		 * It is harmless.
		 */
85 86 87 88 89 90 91 92 93 94 95 96 97 98
		return -EINVAL;
	}

	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
		return 0;

	if (bitmap->bp[page].map) /* page is already allocated, just return */
		return 0;

	if (!create)
		return -ENOENT;

	/* this page has not been allocated yet */

99 100 101 102 103
	spin_unlock_irq(&bitmap->lock);
	mappage = bitmap_alloc_page(bitmap);
	spin_lock_irq(&bitmap->lock);

	if (mappage == NULL) {
104 105
		pr_debug("%s: bitmap map page allocation failed, hijacking\n",
			 bmname(bitmap));
106 107 108 109
		/* failed - set the hijacked flag so that we can use the
		 * pointer as a counter */
		if (!bitmap->bp[page].map)
			bitmap->bp[page].hijacked = 1;
110 111
	} else if (bitmap->bp[page].map ||
		   bitmap->bp[page].hijacked) {
112 113 114
		/* somebody beat us to getting the page */
		bitmap_free_page(bitmap, mappage);
		return 0;
115
	} else {
116

117
		/* no page was in place and we have one, so install it */
118

119 120 121
		bitmap->bp[page].map = mappage;
		bitmap->missing_pages--;
	}
122 123 124 125 126 127
	return 0;
}

/* if page is completely empty, put it back on the free list, or dealloc it */
/* if page was hijacked, unmark the flag so it might get alloced next time */
/* Note: lock should be held when calling this */
128
static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
129 130 131 132 133 134 135 136 137 138 139
{
	char *ptr;

	if (bitmap->bp[page].count) /* page is still busy */
		return;

	/* page is no longer in use, it can be released */

	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
		bitmap->bp[page].hijacked = 0;
		bitmap->bp[page].map = NULL;
140 141 142 143 144 145
	} else {
		/* normal case, free the page */
		ptr = bitmap->bp[page].map;
		bitmap->bp[page].map = NULL;
		bitmap->missing_pages++;
		bitmap_free_page(bitmap, ptr);
146 147 148 149 150 151 152 153 154 155 156
	}
}

/*
 * bitmap file handling - read and write the bitmap file and its superblock
 */

/*
 * basic page I/O operations
 */

157
/* IO operations when bitmap is stored near all superblocks */
158
static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
159 160
				 struct page *page,
				 unsigned long index, int size)
161 162 163
{
	/* choose a good rdev and read the page from there */

164
	struct md_rdev *rdev;
165
	sector_t target;
166
	int did_alloc = 0;
167

168
	if (!page) {
169
		page = alloc_page(GFP_KERNEL);
170 171 172 173
		if (!page)
			return ERR_PTR(-ENOMEM);
		did_alloc = 1;
	}
174

N
NeilBrown 已提交
175
	rdev_for_each(rdev, mddev) {
176 177
		if (! test_bit(In_sync, &rdev->flags)
		    || test_bit(Faulty, &rdev->flags))
178 179
			continue;

J
Jonathan Brassow 已提交
180
		target = offset + index * (PAGE_SIZE/512);
181

182
		if (sync_page_io(rdev, target,
183
				 roundup(size, bdev_logical_block_size(rdev->bdev)),
J
Jonathan Brassow 已提交
184
				 page, READ, true)) {
185
			page->index = index;
186 187
			attach_page_buffers(page, NULL); /* so that free_buffer will
							  * quietly no-op */
188 189 190
			return page;
		}
	}
191 192
	if (did_alloc)
		put_page(page);
193
	return ERR_PTR(-EIO);
194 195 196

}

197
static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
{
	/* Iterate the disks of an mddev, using rcu to protect access to the
	 * linked list, and raising the refcount of devices we return to ensure
	 * they don't disappear while in use.
	 * As devices are only added or removed when raid_disk is < 0 and
	 * nr_pending is 0 and In_sync is clear, the entries we return will
	 * still be in the same position on the list when we re-enter
	 * list_for_each_continue_rcu.
	 */
	struct list_head *pos;
	rcu_read_lock();
	if (rdev == NULL)
		/* start at the beginning */
		pos = &mddev->disks;
	else {
		/* release the previous rdev and start from there. */
		rdev_dec_pending(rdev, mddev);
		pos = &rdev->same_set;
	}
	list_for_each_continue_rcu(pos, &mddev->disks) {
218
		rdev = list_entry(pos, struct md_rdev, same_set);
219 220 221 222 223 224 225 226 227 228 229 230
		if (rdev->raid_disk >= 0 &&
		    !test_bit(Faulty, &rdev->flags)) {
			/* this is a usable devices */
			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();
			return rdev;
		}
	}
	rcu_read_unlock();
	return NULL;
}

231
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
232
{
233
	struct md_rdev *rdev = NULL;
234
	struct block_device *bdev;
235
	struct mddev *mddev = bitmap->mddev;
236

237
	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
238 239
		int size = PAGE_SIZE;
		loff_t offset = mddev->bitmap_info.offset;
240 241 242

		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;

243 244
		if (page->index == bitmap->file_pages-1)
			size = roundup(bitmap->last_page_size,
245
				       bdev_logical_block_size(bdev));
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
		/* Just make sure we aren't corrupting data or
		 * metadata
		 */
		if (mddev->external) {
			/* Bitmap could be anywhere. */
			if (rdev->sb_start + offset + (page->index
						       * (PAGE_SIZE/512))
			    > rdev->data_offset
			    &&
			    rdev->sb_start + offset
			    < (rdev->data_offset + mddev->dev_sectors
			     + (PAGE_SIZE/512)))
				goto bad_alignment;
		} else if (offset < 0) {
			/* DATA  BITMAP METADATA  */
			if (offset
			    + (long)(page->index * (PAGE_SIZE/512))
			    + size/512 > 0)
				/* bitmap runs in to metadata */
				goto bad_alignment;
			if (rdev->data_offset + mddev->dev_sectors
			    > rdev->sb_start + offset)
				/* data runs in to bitmap */
				goto bad_alignment;
		} else if (rdev->sb_start < rdev->data_offset) {
			/* METADATA BITMAP DATA */
			if (rdev->sb_start
			    + offset
			    + page->index*(PAGE_SIZE/512) + size/512
			    > rdev->data_offset)
				/* bitmap runs in to data */
				goto bad_alignment;
		} else {
			/* DATA METADATA BITMAP - no problems */
		}
		md_super_write(mddev, rdev,
			       rdev->sb_start + offset
			       + page->index * (PAGE_SIZE/512),
			       size,
			       page);
286
	}
287 288

	if (wait)
289
		md_super_wait(mddev);
290
	return 0;
291 292 293

 bad_alignment:
	return -EINVAL;
294 295
}

296
static void bitmap_file_kick(struct bitmap *bitmap);
297
/*
298
 * write out a page to a file
299
 */
300
static void write_page(struct bitmap *bitmap, struct page *page, int wait)
301
{
302
	struct buffer_head *bh;
303

304 305 306 307 308
	if (bitmap->file == NULL) {
		switch (write_sb_page(bitmap, page, wait)) {
		case -EINVAL:
			bitmap->flags |= BITMAP_WRITE_ERROR;
		}
309
	} else {
310

311
		bh = page_buffers(page);
312

313 314 315 316
		while (bh && bh->b_blocknr) {
			atomic_inc(&bitmap->pending_writes);
			set_buffer_locked(bh);
			set_buffer_mapped(bh);
J
Jens Axboe 已提交
317
			submit_bh(WRITE | REQ_SYNC, bh);
318 319
			bh = bh->b_this_page;
		}
320

321
		if (wait)
322 323
			wait_event(bitmap->write_wait,
				   atomic_read(&bitmap->pending_writes)==0);
324
	}
325 326
	if (bitmap->flags & BITMAP_WRITE_ERROR)
		bitmap_file_kick(bitmap);
327 328 329 330 331 332
}

static void end_bitmap_write(struct buffer_head *bh, int uptodate)
{
	struct bitmap *bitmap = bh->b_private;
	unsigned long flags;
333

334 335 336 337
	if (!uptodate) {
		spin_lock_irqsave(&bitmap->lock, flags);
		bitmap->flags |= BITMAP_WRITE_ERROR;
		spin_unlock_irqrestore(&bitmap->lock, flags);
338
	}
339 340 341
	if (atomic_dec_and_test(&bitmap->pending_writes))
		wake_up(&bitmap->write_wait);
}
342

343 344 345 346 347 348 349 350 351 352 353
/* copied from buffer.c */
static void
__clear_page_buffers(struct page *page)
{
	ClearPagePrivate(page);
	set_page_private(page, 0);
	page_cache_release(page);
}
static void free_buffers(struct page *page)
{
	struct buffer_head *bh = page_buffers(page);
354

355 356 357 358
	while (bh) {
		struct buffer_head *next = bh->b_this_page;
		free_buffer_head(bh);
		bh = next;
359
	}
360 361
	__clear_page_buffers(page);
	put_page(page);
362 363
}

364 365 366 367 368 369 370
/* read a page from a file.
 * We both read the page, and attach buffers to the page to record the
 * address of each block (using bmap).  These addresses will be used
 * to write the block later, completely bypassing the filesystem.
 * This usage is similar to how swap files are handled, and allows us
 * to write to a file with no concerns of memory allocation failing.
 */
371
static struct page *read_page(struct file *file, unsigned long index,
372 373
			      struct bitmap *bitmap,
			      unsigned long count)
374 375
{
	struct page *page = NULL;
J
Josef Sipek 已提交
376
	struct inode *inode = file->f_path.dentry->d_inode;
377 378
	struct buffer_head *bh;
	sector_t block;
379

380 381
	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
		 (unsigned long long)index << PAGE_SHIFT);
382

383 384 385
	page = alloc_page(GFP_KERNEL);
	if (!page)
		page = ERR_PTR(-ENOMEM);
386 387
	if (IS_ERR(page))
		goto out;
388 389 390

	bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
	if (!bh) {
391
		put_page(page);
392
		page = ERR_PTR(-ENOMEM);
393 394
		goto out;
	}
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
	attach_page_buffers(page, bh);
	block = index << (PAGE_SHIFT - inode->i_blkbits);
	while (bh) {
		if (count == 0)
			bh->b_blocknr = 0;
		else {
			bh->b_blocknr = bmap(inode, block);
			if (bh->b_blocknr == 0) {
				/* Cannot use this file! */
				free_buffers(page);
				page = ERR_PTR(-EINVAL);
				goto out;
			}
			bh->b_bdev = inode->i_sb->s_bdev;
			if (count < (1<<inode->i_blkbits))
				count = 0;
			else
				count -= (1<<inode->i_blkbits);

			bh->b_end_io = end_bitmap_write;
			bh->b_private = bitmap;
416 417 418 419
			atomic_inc(&bitmap->pending_writes);
			set_buffer_locked(bh);
			set_buffer_mapped(bh);
			submit_bh(READ, bh);
420 421 422 423 424
		}
		block++;
		bh = bh->b_this_page;
	}
	page->index = index;
425 426 427 428 429 430 431

	wait_event(bitmap->write_wait,
		   atomic_read(&bitmap->pending_writes)==0);
	if (bitmap->flags & BITMAP_WRITE_ERROR) {
		free_buffers(page);
		page = ERR_PTR(-EIO);
	}
432 433
out:
	if (IS_ERR(page))
434
		printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n",
435 436
			(int)PAGE_SIZE,
			(unsigned long long)index << PAGE_SHIFT,
437 438 439 440 441 442 443 444 445
			PTR_ERR(page));
	return page;
}

/*
 * bitmap file superblock operations
 */

/* update the event counter and sync the superblock to disk */
446
void bitmap_update_sb(struct bitmap *bitmap)
447 448 449 450 451
{
	bitmap_super_t *sb;
	unsigned long flags;

	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
452
		return;
453 454
	if (bitmap->mddev->bitmap_info.external)
		return;
455 456 457
	spin_lock_irqsave(&bitmap->lock, flags);
	if (!bitmap->sb_page) { /* no superblock */
		spin_unlock_irqrestore(&bitmap->lock, flags);
458
		return;
459 460
	}
	spin_unlock_irqrestore(&bitmap->lock, flags);
461
	sb = kmap_atomic(bitmap->sb_page, KM_USER0);
462
	sb->events = cpu_to_le64(bitmap->mddev->events);
463
	if (bitmap->mddev->events < bitmap->events_cleared)
464 465
		/* rocking back to read-only */
		bitmap->events_cleared = bitmap->mddev->events;
466 467
	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
	sb->state = cpu_to_le32(bitmap->flags);
468 469 470
	/* Just in case these have been changed via sysfs: */
	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
471
	kunmap_atomic(sb, KM_USER0);
472
	write_page(bitmap, bitmap->sb_page, 1);
473 474 475 476 477 478 479 480 481
}

/* print out the bitmap file superblock */
void bitmap_print_sb(struct bitmap *bitmap)
{
	bitmap_super_t *sb;

	if (!bitmap || !bitmap->sb_page)
		return;
482
	sb = kmap_atomic(bitmap->sb_page, KM_USER0);
483
	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
484 485 486
	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
	printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
487 488 489 490
					*(__u32 *)(sb->uuid+0),
					*(__u32 *)(sb->uuid+4),
					*(__u32 *)(sb->uuid+8),
					*(__u32 *)(sb->uuid+12));
491
	printk(KERN_DEBUG "        events: %llu\n",
492
			(unsigned long long) le64_to_cpu(sb->events));
493
	printk(KERN_DEBUG "events cleared: %llu\n",
494
			(unsigned long long) le64_to_cpu(sb->events_cleared));
495 496 497 498 499
	printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
	printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
	printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
	printk(KERN_DEBUG "     sync size: %llu KB\n",
			(unsigned long long)le64_to_cpu(sb->sync_size)/2);
500
	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
501
	kunmap_atomic(sb, KM_USER0);
502 503
}

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
/*
 * bitmap_new_disk_sb
 * @bitmap
 *
 * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
 * This function verifies 'bitmap_info' and populates the on-disk bitmap
 * structure, which is to be written to disk.
 *
 * Returns: 0 on success, -Exxx on error
 */
static int bitmap_new_disk_sb(struct bitmap *bitmap)
{
	bitmap_super_t *sb;
	unsigned long chunksize, daemon_sleep, write_behind;
	int err = -EINVAL;

	bitmap->sb_page = alloc_page(GFP_KERNEL);
	if (IS_ERR(bitmap->sb_page)) {
		err = PTR_ERR(bitmap->sb_page);
		bitmap->sb_page = NULL;
		return err;
	}
	bitmap->sb_page->index = 0;

	sb = kmap_atomic(bitmap->sb_page, KM_USER0);

	sb->magic = cpu_to_le32(BITMAP_MAGIC);
	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);

	chunksize = bitmap->mddev->bitmap_info.chunksize;
	BUG_ON(!chunksize);
	if (!is_power_of_2(chunksize)) {
		kunmap_atomic(sb, KM_USER0);
		printk(KERN_ERR "bitmap chunksize not a power of 2\n");
		return -EINVAL;
	}
	sb->chunksize = cpu_to_le32(chunksize);

	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
	if (!daemon_sleep ||
	    (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
		daemon_sleep = 5 * HZ;
	}
	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;

	/*
	 * FIXME: write_behind for RAID1.  If not specified, what
	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
	 */
	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
	if (write_behind > COUNTER_MAX)
		write_behind = COUNTER_MAX / 2;
	sb->write_behind = cpu_to_le32(write_behind);
	bitmap->mddev->bitmap_info.max_write_behind = write_behind;

	/* keep the array size field of the bitmap superblock up to date */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

	memcpy(sb->uuid, bitmap->mddev->uuid, 16);

	bitmap->flags |= BITMAP_STALE;
	sb->state |= cpu_to_le32(BITMAP_STALE);
	bitmap->events_cleared = bitmap->mddev->events;
	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);

	bitmap->flags |= BITMAP_HOSTENDIAN;
	sb->version = cpu_to_le32(BITMAP_MAJOR_HOSTENDIAN);

	kunmap_atomic(sb, KM_USER0);

	return 0;
}

580 581 582 583 584
/* read the superblock from the bitmap file and initialize some bitmap fields */
static int bitmap_read_sb(struct bitmap *bitmap)
{
	char *reason = NULL;
	bitmap_super_t *sb;
585
	unsigned long chunksize, daemon_sleep, write_behind;
586 587 588 589
	unsigned long long events;
	int err = -EINVAL;

	/* page 0 is the superblock, read it... */
590 591 592 593 594 595
	if (bitmap->file) {
		loff_t isize = i_size_read(bitmap->file->f_mapping->host);
		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;

		bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
	} else {
596 597
		bitmap->sb_page = read_sb_page(bitmap->mddev,
					       bitmap->mddev->bitmap_info.offset,
598 599
					       NULL,
					       0, sizeof(bitmap_super_t));
600
	}
601 602 603 604 605 606
	if (IS_ERR(bitmap->sb_page)) {
		err = PTR_ERR(bitmap->sb_page);
		bitmap->sb_page = NULL;
		return err;
	}

607
	sb = kmap_atomic(bitmap->sb_page, KM_USER0);
608 609

	chunksize = le32_to_cpu(sb->chunksize);
610
	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
611
	write_behind = le32_to_cpu(sb->write_behind);
612 613 614 615

	/* verify that the bitmap-specific fields are valid */
	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
		reason = "bad magic";
616 617
	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
618
		reason = "unrecognized superblock version";
619
	else if (chunksize < 512)
620
		reason = "bitmap chunksize too small";
J
Jonathan Brassow 已提交
621
	else if (!is_power_of_2(chunksize))
622
		reason = "bitmap chunksize not a power of 2";
623
	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
624
		reason = "daemon sleep period out of range";
625 626
	else if (write_behind > COUNTER_MAX)
		reason = "write-behind limit out of range (0 - 16383)";
627 628 629 630 631 632 633 634 635
	if (reason) {
		printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
			bmname(bitmap), reason);
		goto out;
	}

	/* keep the array size field of the bitmap superblock up to date */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
	if (bitmap->mddev->persistent) {
		/*
		 * We have a persistent array superblock, so compare the
		 * bitmap's UUID and event counter to the mddev's
		 */
		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
			printk(KERN_INFO
			       "%s: bitmap superblock UUID mismatch\n",
			       bmname(bitmap));
			goto out;
		}
		events = le64_to_cpu(sb->events);
		if (events < bitmap->mddev->events) {
			printk(KERN_INFO
			       "%s: bitmap file is out of date (%llu < %llu) "
			       "-- forcing full recovery\n",
			       bmname(bitmap), events,
			       (unsigned long long) bitmap->mddev->events);
			sb->state |= cpu_to_le32(BITMAP_STALE);
		}
656
	}
657

658
	/* assign fields using values from superblock */
659 660 661
	bitmap->mddev->bitmap_info.chunksize = chunksize;
	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
662
	bitmap->flags |= le32_to_cpu(sb->state);
663 664
	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
		bitmap->flags |= BITMAP_HOSTENDIAN;
665
	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
666
	if (bitmap->flags & BITMAP_STALE)
667
		bitmap->events_cleared = bitmap->mddev->events;
668 669
	err = 0;
out:
670
	kunmap_atomic(sb, KM_USER0);
671 672 673 674 675 676 677 678 679 680
	if (err)
		bitmap_print_sb(bitmap);
	return err;
}

enum bitmap_mask_op {
	MASK_SET,
	MASK_UNSET
};

681 682 683
/* record the state of the bitmap in the superblock.  Return the old value */
static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
			     enum bitmap_mask_op op)
684 685 686
{
	bitmap_super_t *sb;
	unsigned long flags;
687
	int old;
688 689

	spin_lock_irqsave(&bitmap->lock, flags);
690
	if (!bitmap->sb_page) { /* can't set the state */
691
		spin_unlock_irqrestore(&bitmap->lock, flags);
692
		return 0;
693 694
	}
	spin_unlock_irqrestore(&bitmap->lock, flags);
695
	sb = kmap_atomic(bitmap->sb_page, KM_USER0);
696
	old = le32_to_cpu(sb->state) & bits;
697
	switch (op) {
698 699
	case MASK_SET:
		sb->state |= cpu_to_le32(bits);
700
		bitmap->flags |= bits;
701 702 703
		break;
	case MASK_UNSET:
		sb->state &= cpu_to_le32(~bits);
704
		bitmap->flags &= ~bits;
705 706 707
		break;
	default:
		BUG();
708
	}
709
	kunmap_atomic(sb, KM_USER0);
710
	return old;
711 712 713 714 715 716
}

/*
 * general bitmap file operations
 */

717 718 719 720 721 722
/*
 * on-disk bitmap:
 *
 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
 * file a page at a time. There's a superblock at the start of the file.
 */
723
/* calculate the index of the page that contains this bit */
724
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
725
{
726 727 728
	if (!bitmap->mddev->bitmap_info.external)
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk >> PAGE_BIT_SHIFT;
729 730 731
}

/* calculate the (bit) offset of this bit within a page */
732
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
733
{
734 735 736
	if (!bitmap->mddev->bitmap_info.external)
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk & (PAGE_BITS - 1);
737 738 739 740 741 742 743 744 745 746
}

/*
 * return a pointer to the page in the filemap that contains the given bit
 *
 * this lookup is complicated by the fact that the bitmap sb might be exactly
 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
 * 0 or page 1
 */
static inline struct page *filemap_get_page(struct bitmap *bitmap,
747
					    unsigned long chunk)
748
{
749 750
	if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
		return NULL;
751 752
	return bitmap->filemap[file_page_index(bitmap, chunk)
			       - file_page_index(bitmap, 0)];
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
}

static void bitmap_file_unmap(struct bitmap *bitmap)
{
	struct page **map, *sb_page;
	unsigned long *attr;
	int pages;
	unsigned long flags;

	spin_lock_irqsave(&bitmap->lock, flags);
	map = bitmap->filemap;
	bitmap->filemap = NULL;
	attr = bitmap->filemap_attr;
	bitmap->filemap_attr = NULL;
	pages = bitmap->file_pages;
	bitmap->file_pages = 0;
	sb_page = bitmap->sb_page;
	bitmap->sb_page = NULL;
	spin_unlock_irqrestore(&bitmap->lock, flags);

	while (pages--)
774
		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
775
			free_buffers(map[pages]);
776 777 778
	kfree(map);
	kfree(attr);

779 780
	if (sb_page)
		free_buffers(sb_page);
781 782 783 784 785 786 787 788 789 790 791 792
}

static void bitmap_file_put(struct bitmap *bitmap)
{
	struct file *file;
	unsigned long flags;

	spin_lock_irqsave(&bitmap->lock, flags);
	file = bitmap->file;
	bitmap->file = NULL;
	spin_unlock_irqrestore(&bitmap->lock, flags);

793 794 795
	if (file)
		wait_event(bitmap->write_wait,
			   atomic_read(&bitmap->pending_writes)==0);
796 797
	bitmap_file_unmap(bitmap);

798
	if (file) {
J
Josef Sipek 已提交
799
		struct inode *inode = file->f_path.dentry->d_inode;
800
		invalidate_mapping_pages(inode->i_mapping, 0, -1);
801
		fput(file);
802
	}
803 804 805 806 807 808 809 810 811 812 813
}

/*
 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
 * then it is no longer reliable, so we stop using it and we mark the file
 * as failed in the superblock
 */
static void bitmap_file_kick(struct bitmap *bitmap)
{
	char *path, *ptr = NULL;

814 815
	if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
		bitmap_update_sb(bitmap);
816

817 818 819
		if (bitmap->file) {
			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
			if (path)
C
Christoph Hellwig 已提交
820 821 822
				ptr = d_path(&bitmap->file->f_path, path,
					     PAGE_SIZE);

823 824
			printk(KERN_ALERT
			      "%s: kicking failed bitmap file %s from array!\n",
C
Christoph Hellwig 已提交
825
			      bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
826

827 828 829 830 831
			kfree(path);
		} else
			printk(KERN_ALERT
			       "%s: disabling internal bitmap due to errors\n",
			       bmname(bitmap));
832
	}
833 834 835 836 837 838 839

	bitmap_file_put(bitmap);

	return;
}

enum bitmap_page_attr {
840
	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
841 842
	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
				    * i.e. counter is 1 or 2. */
843
	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
844 845 846 847 848
};

static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
				enum bitmap_page_attr attr)
{
849
	__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
850 851 852 853 854
}

static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
				enum bitmap_page_attr attr)
{
855
	__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
856 857
}

858 859
static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
					   enum bitmap_page_attr attr)
860
{
861
	return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
862 863 864 865 866 867 868 869 870 871 872 873
}

/*
 * bitmap_file_set_bit -- called before performing a write to the md device
 * to set (and eventually sync) a particular bit in the bitmap file
 *
 * we set the bit immediately, then we record the page number so that
 * when an unplug occurs, we can flush the dirty pages out to disk
 */
static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
{
	unsigned long bit;
874
	struct page *page;
875 876 877
	void *kaddr;
	unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);

878 879
	if (!bitmap->filemap)
		return;
880

881 882 883 884
	page = filemap_get_page(bitmap, chunk);
	if (!page)
		return;
	bit = file_page_offset(bitmap, chunk);
885

886 887 888 889 890 891 892
	/* set the bit */
	kaddr = kmap_atomic(page, KM_USER0);
	if (bitmap->flags & BITMAP_HOSTENDIAN)
		set_bit(bit, kaddr);
	else
		__set_bit_le(bit, kaddr);
	kunmap_atomic(kaddr, KM_USER0);
893
	pr_debug("set file bit %lu page %lu\n", bit, page->index);
894 895 896 897 898 899 900
	/* record page number so it gets flushed to disk when unplug occurs */
	set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
}

/* this gets called when the md device is ready to unplug its underlying
 * (slave) device queues -- before we let any writes go down, we need to
 * sync the dirty pages of the bitmap file to disk */
901
void bitmap_unplug(struct bitmap *bitmap)
902
{
903 904
	unsigned long i, flags;
	int dirty, need_write;
905 906 907 908
	struct page *page;
	int wait = 0;

	if (!bitmap)
909
		return;
910 911 912 913 914

	/* look at each page to see if there are any set bits that need to be
	 * flushed out to disk */
	for (i = 0; i < bitmap->file_pages; i++) {
		spin_lock_irqsave(&bitmap->lock, flags);
915
		if (!bitmap->filemap) {
916
			spin_unlock_irqrestore(&bitmap->lock, flags);
917
			return;
918 919
		}
		page = bitmap->filemap[i];
920 921
		dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
		need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
922 923
		clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
		clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
924
		if (dirty)
925 926 927
			wait = 1;
		spin_unlock_irqrestore(&bitmap->lock, flags);

928
		if (dirty || need_write)
929
			write_page(bitmap, page, 0);
930 931
	}
	if (wait) { /* if any writes were performed, we need to wait on them */
932
		if (bitmap->file)
933 934
			wait_event(bitmap->write_wait,
				   atomic_read(&bitmap->pending_writes)==0);
935
		else
936
			md_super_wait(bitmap->mddev);
937
	}
938 939
	if (bitmap->flags & BITMAP_WRITE_ERROR)
		bitmap_file_kick(bitmap);
940
}
941
EXPORT_SYMBOL(bitmap_unplug);
942

943
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
944 945 946 947 948 949 950
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
 * memory mapping of the bitmap file
 * Special cases:
 *   if there's no bitmap file, or if the bitmap file had been
 *   previously kicked from the array, we mark all the bits as
 *   1's in order to cause a full resync.
951 952 953
 *
 * We ignore all bits for sectors that end earlier than 'start'.
 * This is used when reading an out-of-date bitmap...
954
 */
955
static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
956 957 958 959 960
{
	unsigned long i, chunks, index, oldindex, bit;
	struct page *page = NULL, *oldpage = NULL;
	unsigned long num_pages, bit_cnt = 0;
	struct file *file;
961
	unsigned long bytes, offset;
962 963
	int outofdate;
	int ret = -ENOSPC;
964
	void *paddr;
965 966 967 968

	chunks = bitmap->chunks;
	file = bitmap->file;

969
	BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
970 971 972 973 974 975

	outofdate = bitmap->flags & BITMAP_STALE;
	if (outofdate)
		printk(KERN_INFO "%s: bitmap file is out of date, doing full "
			"recovery\n", bmname(bitmap));

976
	bytes = DIV_ROUND_UP(bitmap->chunks, 8);
977 978
	if (!bitmap->mddev->bitmap_info.external)
		bytes += sizeof(bitmap_super_t);
979

980
	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
981

982
	if (file && i_size_read(file->f_mapping->host) < bytes) {
983 984 985
		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
			bmname(bitmap),
			(unsigned long) i_size_read(file->f_mapping->host),
986
			bytes);
987
		goto err;
988
	}
989 990 991

	ret = -ENOMEM;

992
	bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
993
	if (!bitmap->filemap)
994
		goto err;
995

996 997
	/* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
	bitmap->filemap_attr = kzalloc(
998
		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
999
		GFP_KERNEL);
1000
	if (!bitmap->filemap_attr)
1001
		goto err;
1002 1003 1004 1005

	oldindex = ~0L;

	for (i = 0; i < chunks; i++) {
1006
		int b;
1007 1008
		index = file_page_index(bitmap, i);
		bit = file_page_offset(bitmap, i);
1009
		if (index != oldindex) { /* this is a new page, read it in */
1010
			int count;
1011
			/* unmap the old page, we're done with it */
1012
			if (index == num_pages-1)
1013
				count = bytes - index * PAGE_SIZE;
1014 1015
			else
				count = PAGE_SIZE;
1016
			if (index == 0 && bitmap->sb_page) {
1017 1018 1019 1020 1021 1022 1023
				/*
				 * if we're here then the superblock page
				 * contains some bits (PAGE_SIZE != sizeof sb)
				 * we've already read it in, so just use it
				 */
				page = bitmap->sb_page;
				offset = sizeof(bitmap_super_t);
N
NeilBrown 已提交
1024
				if (!file)
1025 1026 1027 1028 1029
					page = read_sb_page(
						bitmap->mddev,
						bitmap->mddev->bitmap_info.offset,
						page,
						index, count);
1030
			} else if (file) {
1031
				page = read_page(file, index, bitmap, count);
1032 1033
				offset = 0;
			} else {
1034 1035
				page = read_sb_page(bitmap->mddev,
						    bitmap->mddev->bitmap_info.offset,
1036 1037
						    NULL,
						    index, count);
1038 1039
				offset = 0;
			}
1040 1041
			if (IS_ERR(page)) { /* read error */
				ret = PTR_ERR(page);
1042
				goto err;
1043 1044
			}

1045 1046 1047
			oldindex = index;
			oldpage = page;

1048 1049 1050
			bitmap->filemap[bitmap->file_pages++] = page;
			bitmap->last_page_size = count;

1051 1052 1053
			if (outofdate) {
				/*
				 * if bitmap is out of date, dirty the
1054
				 * whole page and write it out
1055
				 */
1056 1057
				paddr = kmap_atomic(page, KM_USER0);
				memset(paddr + offset, 0xff,
1058
				       PAGE_SIZE - offset);
1059
				kunmap_atomic(paddr, KM_USER0);
1060 1061 1062
				write_page(bitmap, page, 1);

				ret = -EIO;
1063
				if (bitmap->flags & BITMAP_WRITE_ERROR)
1064
					goto err;
1065 1066
			}
		}
1067
		paddr = kmap_atomic(page, KM_USER0);
1068
		if (bitmap->flags & BITMAP_HOSTENDIAN)
1069
			b = test_bit(bit, paddr);
1070
		else
A
Akinobu Mita 已提交
1071
			b = test_bit_le(bit, paddr);
1072
		kunmap_atomic(paddr, KM_USER0);
1073
		if (b) {
1074
			/* if the disk bit is set, set the memory bit */
1075 1076 1077 1078 1079
			int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
				      >= start);
			bitmap_set_memory_bits(bitmap,
					       (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
					       needed);
1080 1081 1082 1083
			bit_cnt++;
		}
	}

1084
	/* everything went OK */
1085 1086 1087 1088 1089 1090 1091 1092 1093
	ret = 0;
	bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);

	if (bit_cnt) { /* Kick recovery if any bits were set */
		set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
		md_wakeup_thread(bitmap->mddev->thread);
	}

	printk(KERN_INFO "%s: bitmap initialized from disk: "
1094 1095
	       "read %lu/%lu pages, set %lu of %lu bits\n",
	       bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
1096 1097

	return 0;
1098

1099 1100 1101
 err:
	printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
	       bmname(bitmap), ret);
1102 1103 1104
	return ret;
}

1105 1106 1107 1108 1109
void bitmap_write_all(struct bitmap *bitmap)
{
	/* We don't actually write all bitmap blocks here,
	 * just flag them as needing to be written
	 */
1110
	int i;
1111

1112
	spin_lock_irq(&bitmap->lock);
1113
	for (i = 0; i < bitmap->file_pages; i++)
1114 1115
		set_page_attr(bitmap, bitmap->filemap[i],
			      BITMAP_PAGE_NEEDWRITE);
1116
	bitmap->allclean = 0;
1117
	spin_unlock_irq(&bitmap->lock);
1118 1119
}

1120 1121 1122 1123 1124 1125 1126 1127
static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
{
	sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
	bitmap->bp[page].count += inc;
	bitmap_checkfree(bitmap, page);
}
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
N
NeilBrown 已提交
1128
					    sector_t offset, sector_t *blocks,
1129 1130 1131 1132 1133 1134 1135
					    int create);

/*
 * bitmap daemon -- periodically wakes up to clean bits and flush pages
 *			out to disk
 */

1136
void bitmap_daemon_work(struct mddev *mddev)
1137
{
1138
	struct bitmap *bitmap;
1139
	unsigned long j;
1140 1141
	unsigned long flags;
	struct page *page = NULL, *lastpage = NULL;
N
NeilBrown 已提交
1142
	sector_t blocks;
1143
	void *paddr;
1144

1145 1146 1147
	/* Use a mutex to guard daemon_work against
	 * bitmap_destroy.
	 */
1148
	mutex_lock(&mddev->bitmap_info.mutex);
1149 1150
	bitmap = mddev->bitmap;
	if (bitmap == NULL) {
1151
		mutex_unlock(&mddev->bitmap_info.mutex);
1152
		return;
1153
	}
1154
	if (time_before(jiffies, bitmap->daemon_lastrun
N
NeilBrown 已提交
1155
			+ mddev->bitmap_info.daemon_sleep))
1156 1157
		goto done;

1158
	bitmap->daemon_lastrun = jiffies;
1159
	if (bitmap->allclean) {
N
NeilBrown 已提交
1160
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1161
		goto done;
1162 1163
	}
	bitmap->allclean = 1;
1164

1165
	spin_lock_irqsave(&bitmap->lock, flags);
1166 1167
	for (j = 0; j < bitmap->chunks; j++) {
		bitmap_counter_t *bmc;
1168 1169 1170 1171 1172
		if (!bitmap->filemap)
			/* error or shutdown */
			break;

		page = filemap_get_page(bitmap, j);
1173 1174

		if (page != lastpage) {
1175
			/* skip this page unless it's marked as needing cleaning */
1176
			if (!test_page_attr(bitmap, page, BITMAP_PAGE_PENDING)) {
1177 1178
				int need_write = test_page_attr(bitmap, page,
								BITMAP_PAGE_NEEDWRITE);
1179
				if (need_write)
1180
					clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1181

1182
				spin_unlock_irqrestore(&bitmap->lock, flags);
1183
				if (need_write)
1184
					write_page(bitmap, page, 0);
1185 1186
				spin_lock_irqsave(&bitmap->lock, flags);
				j |= (PAGE_BITS - 1);
1187 1188 1189
				continue;
			}

1190 1191
			/* grab the new page, sync and release the old */
			if (lastpage != NULL) {
1192 1193 1194 1195
				if (test_page_attr(bitmap, lastpage,
						   BITMAP_PAGE_NEEDWRITE)) {
					clear_page_attr(bitmap, lastpage,
							BITMAP_PAGE_NEEDWRITE);
1196
					spin_unlock_irqrestore(&bitmap->lock, flags);
1197
					write_page(bitmap, lastpage, 0);
1198
				} else {
1199 1200 1201
					set_page_attr(bitmap, lastpage,
						      BITMAP_PAGE_NEEDWRITE);
					bitmap->allclean = 0;
1202 1203 1204 1205 1206
					spin_unlock_irqrestore(&bitmap->lock, flags);
				}
			} else
				spin_unlock_irqrestore(&bitmap->lock, flags);
			lastpage = page;
1207 1208 1209 1210

			/* We are possibly going to clear some bits, so make
			 * sure that events_cleared is up-to-date.
			 */
1211
			if (bitmap->need_sync &&
N
NeilBrown 已提交
1212
			    mddev->bitmap_info.external == 0) {
1213 1214 1215 1216 1217 1218 1219 1220
				bitmap_super_t *sb;
				bitmap->need_sync = 0;
				sb = kmap_atomic(bitmap->sb_page, KM_USER0);
				sb->events_cleared =
					cpu_to_le64(bitmap->events_cleared);
				kunmap_atomic(sb, KM_USER0);
				write_page(bitmap, bitmap->sb_page, 1);
			}
1221
			spin_lock_irqsave(&bitmap->lock, flags);
1222
			if (!bitmap->need_sync)
1223
				clear_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1224 1225
			else
				bitmap->allclean = 0;
1226
		}
1227 1228 1229
		bmc = bitmap_get_counter(bitmap,
					 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
					 &blocks, 0);
1230 1231 1232 1233
		if (!bmc)
			j |= PAGE_COUNTER_MASK;
		else if (*bmc) {
			if (*bmc == 1 && !bitmap->need_sync) {
1234 1235
				/* we can clear the bit */
				*bmc = 0;
1236 1237
				bitmap_count_page(bitmap,
						  (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1238 1239 1240
						  -1);

				/* clear the bit */
1241 1242 1243 1244 1245 1246
				paddr = kmap_atomic(page, KM_USER0);
				if (bitmap->flags & BITMAP_HOSTENDIAN)
					clear_bit(file_page_offset(bitmap, j),
						  paddr);
				else
					__clear_bit_le(
1247 1248 1249
						file_page_offset(bitmap,
								 j),
						paddr);
1250
				kunmap_atomic(paddr, KM_USER0);
1251 1252 1253
			} else if (*bmc <= 2) {
				*bmc = 1; /* maybe clear the bit next time */
				set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1254
				bitmap->allclean = 0;
1255
			}
1256
		}
1257
	}
1258
	spin_unlock_irqrestore(&bitmap->lock, flags);
1259 1260

	/* now sync the final page */
1261
	if (lastpage != NULL) {
1262
		spin_lock_irqsave(&bitmap->lock, flags);
1263
		if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1264 1265
			clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
			spin_unlock_irqrestore(&bitmap->lock, flags);
1266
			write_page(bitmap, lastpage, 0);
1267 1268
		} else {
			set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1269
			bitmap->allclean = 0;
1270 1271 1272 1273
			spin_unlock_irqrestore(&bitmap->lock, flags);
		}
	}

1274
 done:
1275
	if (bitmap->allclean == 0)
N
NeilBrown 已提交
1276 1277
		mddev->thread->timeout =
			mddev->bitmap_info.daemon_sleep;
1278
	mutex_unlock(&mddev->bitmap_info.mutex);
1279 1280 1281
}

static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
N
NeilBrown 已提交
1282
					    sector_t offset, sector_t *blocks,
1283
					    int create)
1284 1285
__releases(bitmap->lock)
__acquires(bitmap->lock)
1286 1287 1288 1289 1290 1291 1292 1293 1294
{
	/* If 'create', we might release the lock and reclaim it.
	 * The lock must have been taken with interrupts enabled.
	 * If !create, we don't release the lock.
	 */
	sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
	sector_t csize;
1295
	int err;
1296

1297 1298 1299 1300 1301 1302 1303
	err = bitmap_checkpage(bitmap, page, create);

	if (bitmap->bp[page].hijacked ||
	    bitmap->bp[page].map == NULL)
		csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
					  PAGE_COUNTER_SHIFT - 1);
	else
1304
		csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1305 1306 1307
	*blocks = csize - (offset & (csize - 1));

	if (err < 0)
1308
		return NULL;
1309

1310 1311 1312 1313 1314 1315 1316 1317
	/* now locked ... */

	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
		/* should we use the first or second counter field
		 * of the hijacked pointer? */
		int hi = (pageoff > PAGE_COUNTER_MASK);
		return  &((bitmap_counter_t *)
			  &bitmap->bp[page].map)[hi];
1318
	} else /* page is allocated */
1319 1320 1321 1322
		return (bitmap_counter_t *)
			&(bitmap->bp[page].map[pageoff]);
}

1323
int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1324
{
1325 1326
	if (!bitmap)
		return 0;
1327 1328

	if (behind) {
1329
		int bw;
1330
		atomic_inc(&bitmap->behind_writes);
1331 1332 1333 1334
		bw = atomic_read(&bitmap->behind_writes);
		if (bw > bitmap->behind_writes_used)
			bitmap->behind_writes_used = bw;

1335 1336
		pr_debug("inc write-behind count %d/%lu\n",
			 bw, bitmap->mddev->bitmap_info.max_write_behind);
1337 1338
	}

1339
	while (sectors) {
N
NeilBrown 已提交
1340
		sector_t blocks;
1341 1342 1343 1344 1345 1346 1347 1348 1349
		bitmap_counter_t *bmc;

		spin_lock_irq(&bitmap->lock);
		bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
		if (!bmc) {
			spin_unlock_irq(&bitmap->lock);
			return 0;
		}

1350
		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1351 1352 1353 1354 1355 1356 1357 1358
			DEFINE_WAIT(__wait);
			/* note that it is safe to do the prepare_to_wait
			 * after the test as long as we do it before dropping
			 * the spinlock.
			 */
			prepare_to_wait(&bitmap->overflow_wait, &__wait,
					TASK_UNINTERRUPTIBLE);
			spin_unlock_irq(&bitmap->lock);
J
Jens Axboe 已提交
1359
			io_schedule();
1360 1361 1362 1363
			finish_wait(&bitmap->overflow_wait, &__wait);
			continue;
		}

1364
		switch (*bmc) {
1365 1366
		case 0:
			bitmap_file_set_bit(bitmap, offset);
1367
			bitmap_count_page(bitmap, offset, 1);
1368 1369 1370 1371
			/* fall through */
		case 1:
			*bmc = 2;
		}
1372

1373 1374 1375 1376 1377 1378 1379
		(*bmc)++;

		spin_unlock_irq(&bitmap->lock);

		offset += blocks;
		if (sectors > blocks)
			sectors -= blocks;
1380 1381
		else
			sectors = 0;
1382 1383 1384
	}
	return 0;
}
1385
EXPORT_SYMBOL(bitmap_startwrite);
1386 1387

void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1388
		     int success, int behind)
1389
{
1390 1391
	if (!bitmap)
		return;
1392
	if (behind) {
1393 1394
		if (atomic_dec_and_test(&bitmap->behind_writes))
			wake_up(&bitmap->behind_wait);
1395 1396 1397
		pr_debug("dec write-behind count %d/%lu\n",
			 atomic_read(&bitmap->behind_writes),
			 bitmap->mddev->bitmap_info.max_write_behind);
1398 1399
	}

1400
	while (sectors) {
N
NeilBrown 已提交
1401
		sector_t blocks;
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
		unsigned long flags;
		bitmap_counter_t *bmc;

		spin_lock_irqsave(&bitmap->lock, flags);
		bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
		if (!bmc) {
			spin_unlock_irqrestore(&bitmap->lock, flags);
			return;
		}

1412
		if (success && !bitmap->mddev->degraded &&
1413 1414 1415
		    bitmap->events_cleared < bitmap->mddev->events) {
			bitmap->events_cleared = bitmap->mddev->events;
			bitmap->need_sync = 1;
1416
			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1417 1418
		}

1419
		if (!success && !NEEDED(*bmc))
1420 1421
			*bmc |= NEEDED_MASK;

1422
		if (COUNTER(*bmc) == COUNTER_MAX)
1423 1424
			wake_up(&bitmap->overflow_wait);

1425
		(*bmc)--;
1426
		if (*bmc <= 2) {
1427
			set_page_attr(bitmap,
1428 1429 1430
				      filemap_get_page(
					      bitmap,
					      offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1431
				      BITMAP_PAGE_PENDING);
1432 1433
			bitmap->allclean = 0;
		}
1434 1435 1436 1437
		spin_unlock_irqrestore(&bitmap->lock, flags);
		offset += blocks;
		if (sectors > blocks)
			sectors -= blocks;
1438 1439
		else
			sectors = 0;
1440 1441
	}
}
1442
EXPORT_SYMBOL(bitmap_endwrite);
1443

N
NeilBrown 已提交
1444
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1445
			       int degraded)
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
{
	bitmap_counter_t *bmc;
	int rv;
	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
		*blocks = 1024;
		return 1; /* always resync if no bitmap */
	}
	spin_lock_irq(&bitmap->lock);
	bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
	rv = 0;
	if (bmc) {
		/* locked */
		if (RESYNC(*bmc))
			rv = 1;
		else if (NEEDED(*bmc)) {
			rv = 1;
1462 1463 1464 1465
			if (!degraded) { /* don't set/clear bits if degraded */
				*bmc |= RESYNC_MASK;
				*bmc &= ~NEEDED_MASK;
			}
1466 1467 1468 1469 1470 1471
		}
	}
	spin_unlock_irq(&bitmap->lock);
	return rv;
}

N
NeilBrown 已提交
1472
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482
		      int degraded)
{
	/* bitmap_start_sync must always report on multiples of whole
	 * pages, otherwise resync (which is very PAGE_SIZE based) will
	 * get confused.
	 * So call __bitmap_start_sync repeatedly (if needed) until
	 * At least PAGE_SIZE>>9 blocks are covered.
	 * Return the 'or' of the result.
	 */
	int rv = 0;
N
NeilBrown 已提交
1483
	sector_t blocks1;
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493

	*blocks = 0;
	while (*blocks < (PAGE_SIZE>>9)) {
		rv |= __bitmap_start_sync(bitmap, offset,
					  &blocks1, degraded);
		offset += blocks1;
		*blocks += blocks1;
	}
	return rv;
}
1494
EXPORT_SYMBOL(bitmap_start_sync);
1495

N
NeilBrown 已提交
1496
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1497 1498 1499
{
	bitmap_counter_t *bmc;
	unsigned long flags;
1500 1501

	if (bitmap == NULL) {
1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
		*blocks = 1024;
		return;
	}
	spin_lock_irqsave(&bitmap->lock, flags);
	bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
	if (bmc == NULL)
		goto unlock;
	/* locked */
	if (RESYNC(*bmc)) {
		*bmc &= ~RESYNC_MASK;

		if (!NEEDED(*bmc) && aborted)
			*bmc |= NEEDED_MASK;
		else {
1516
			if (*bmc <= 2) {
1517 1518
				set_page_attr(bitmap,
					      filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1519
					      BITMAP_PAGE_PENDING);
1520 1521
				bitmap->allclean = 0;
			}
1522 1523 1524 1525 1526
		}
	}
 unlock:
	spin_unlock_irqrestore(&bitmap->lock, flags);
}
1527
EXPORT_SYMBOL(bitmap_end_sync);
1528 1529 1530 1531 1532 1533 1534 1535

void bitmap_close_sync(struct bitmap *bitmap)
{
	/* Sync has finished, and any bitmap chunks that weren't synced
	 * properly have been aborted.  It remains to us to clear the
	 * RESYNC bit wherever it is still on
	 */
	sector_t sector = 0;
N
NeilBrown 已提交
1536
	sector_t blocks;
N
NeilBrown 已提交
1537 1538
	if (!bitmap)
		return;
1539 1540
	while (sector < bitmap->mddev->resync_max_sectors) {
		bitmap_end_sync(bitmap, sector, &blocks, 0);
N
NeilBrown 已提交
1541 1542 1543
		sector += blocks;
	}
}
1544
EXPORT_SYMBOL(bitmap_close_sync);
N
NeilBrown 已提交
1545 1546 1547 1548

void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
{
	sector_t s = 0;
N
NeilBrown 已提交
1549
	sector_t blocks;
N
NeilBrown 已提交
1550 1551 1552 1553 1554 1555 1556 1557

	if (!bitmap)
		return;
	if (sector == 0) {
		bitmap->last_end_sync = jiffies;
		return;
	}
	if (time_before(jiffies, (bitmap->last_end_sync
1558
				  + bitmap->mddev->bitmap_info.daemon_sleep)))
N
NeilBrown 已提交
1559 1560 1561 1562
		return;
	wait_event(bitmap->mddev->recovery_wait,
		   atomic_read(&bitmap->mddev->recovery_active) == 0);

1563
	bitmap->mddev->curr_resync_completed = sector;
1564
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
N
NeilBrown 已提交
1565 1566 1567 1568 1569
	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
	s = 0;
	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
		bitmap_end_sync(bitmap, s, &blocks, 0);
		s += blocks;
1570
	}
N
NeilBrown 已提交
1571
	bitmap->last_end_sync = jiffies;
1572
	sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1573
}
1574
EXPORT_SYMBOL(bitmap_cond_end_sync);
1575

1576
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1577 1578
{
	/* For each chunk covered by any of these sectors, set the
1579
	 * counter to 1 and set resync_needed.  They should all
1580 1581
	 * be 0 at this point
	 */
1582

N
NeilBrown 已提交
1583
	sector_t secs;
1584 1585 1586 1587
	bitmap_counter_t *bmc;
	spin_lock_irq(&bitmap->lock);
	bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
	if (!bmc) {
1588
		spin_unlock_irq(&bitmap->lock);
1589
		return;
1590
	}
1591
	if (!*bmc) {
1592
		struct page *page;
1593
		*bmc = 2 | (needed ? NEEDED_MASK : 0);
1594 1595
		bitmap_count_page(bitmap, offset, 1);
		page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1596
		set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
1597
		bitmap->allclean = 0;
1598 1599
	}
	spin_unlock_irq(&bitmap->lock);
1600 1601
}

1602 1603 1604 1605 1606 1607
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
{
	unsigned long chunk;

	for (chunk = s; chunk <= e; chunk++) {
1608
		sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
1609
		bitmap_set_memory_bits(bitmap, sec, 1);
1610
		spin_lock_irq(&bitmap->lock);
1611
		bitmap_file_set_bit(bitmap, sec);
1612
		spin_unlock_irq(&bitmap->lock);
1613 1614 1615 1616 1617 1618
		if (sec < bitmap->mddev->recovery_cp)
			/* We are asserting that the array is dirty,
			 * so move the recovery_cp address back so
			 * that it is obvious that it is dirty
			 */
			bitmap->mddev->recovery_cp = sec;
1619 1620 1621
	}
}

1622 1623 1624
/*
 * flush out any pending updates
 */
1625
void bitmap_flush(struct mddev *mddev)
1626 1627
{
	struct bitmap *bitmap = mddev->bitmap;
1628
	long sleep;
1629 1630 1631 1632 1633 1634 1635

	if (!bitmap) /* there was no bitmap */
		return;

	/* run the daemon_work three time to ensure everything is flushed
	 * that can be
	 */
1636
	sleep = mddev->bitmap_info.daemon_sleep * 2;
1637
	bitmap->daemon_lastrun -= sleep;
1638
	bitmap_daemon_work(mddev);
1639
	bitmap->daemon_lastrun -= sleep;
1640
	bitmap_daemon_work(mddev);
1641
	bitmap->daemon_lastrun -= sleep;
1642
	bitmap_daemon_work(mddev);
1643 1644 1645
	bitmap_update_sb(bitmap);
}

1646 1647 1648
/*
 * free memory that was allocated
 */
1649
static void bitmap_free(struct bitmap *bitmap)
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
{
	unsigned long k, pages;
	struct bitmap_page *bp;

	if (!bitmap) /* there was no bitmap */
		return;

	/* release the bitmap file and kill the daemon */
	bitmap_file_put(bitmap);

	bp = bitmap->bp;
	pages = bitmap->pages;

	/* free all allocated memory */

	if (bp) /* deallocate the page memory */
		for (k = 0; k < pages; k++)
			if (bp[k].map && !bp[k].hijacked)
				kfree(bp[k].map);
	kfree(bp);
	kfree(bitmap);
}
1672

1673
void bitmap_destroy(struct mddev *mddev)
1674 1675 1676 1677 1678 1679
{
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap) /* there was no bitmap */
		return;

1680
	mutex_lock(&mddev->bitmap_info.mutex);
1681
	mddev->bitmap = NULL; /* disconnect from the md device */
1682
	mutex_unlock(&mddev->bitmap_info.mutex);
1683 1684
	if (mddev->thread)
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1685

1686 1687 1688
	if (bitmap->sysfs_can_clear)
		sysfs_put(bitmap->sysfs_can_clear);

1689 1690
	bitmap_free(bitmap);
}
1691 1692 1693 1694 1695

/*
 * initialize the bitmap structure
 * if this returns an error, bitmap_destroy must be called to do clean up
 */
1696
int bitmap_create(struct mddev *mddev)
1697 1698
{
	struct bitmap *bitmap;
1699
	sector_t blocks = mddev->resync_max_sectors;
1700 1701
	unsigned long chunks;
	unsigned long pages;
1702
	struct file *file = mddev->bitmap_info.file;
1703
	int err;
1704
	struct sysfs_dirent *bm = NULL;
1705

A
Alexey Dobriyan 已提交
1706
	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1707

1708
	if (!file
1709
	    && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
1710 1711
		return 0;

1712
	BUG_ON(file && mddev->bitmap_info.offset);
1713

1714
	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1715 1716 1717 1718
	if (!bitmap)
		return -ENOMEM;

	spin_lock_init(&bitmap->lock);
1719 1720
	atomic_set(&bitmap->pending_writes, 0);
	init_waitqueue_head(&bitmap->write_wait);
1721
	init_waitqueue_head(&bitmap->overflow_wait);
1722
	init_waitqueue_head(&bitmap->behind_wait);
1723

1724 1725
	bitmap->mddev = mddev;

1726 1727
	if (mddev->kobj.sd)
		bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
1728
	if (bm) {
1729
		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
1730 1731 1732 1733
		sysfs_put(bm);
	} else
		bitmap->sysfs_can_clear = NULL;

1734
	bitmap->file = file;
1735 1736
	if (file) {
		get_file(file);
1737 1738 1739 1740
		/* As future accesses to this file will use bmap,
		 * and bypass the page cache, we must sync the file
		 * first.
		 */
1741
		vfs_fsync(file, 1);
1742
	}
1743
	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
	if (!mddev->bitmap_info.external) {
		/*
		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
		 * instructing us to create a new on-disk bitmap instance.
		 */
		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
			err = bitmap_new_disk_sb(bitmap);
		else
			err = bitmap_read_sb(bitmap);
	} else {
1754 1755 1756 1757 1758 1759 1760
		err = 0;
		if (mddev->bitmap_info.chunksize == 0 ||
		    mddev->bitmap_info.daemon_sleep == 0)
			/* chunksize and time_base need to be
			 * set first. */
			err = -EINVAL;
	}
1761
	if (err)
1762
		goto error;
1763

1764
	bitmap->daemon_lastrun = jiffies;
1765
	bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
1766 1767

	/* now that chunksize and chunkshift are set, we can use these macros */
1768
	chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
1769
			CHUNK_BLOCK_SHIFT(bitmap);
1770
	pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1771 1772 1773 1774 1775 1776 1777

	BUG_ON(!pages);

	bitmap->chunks = chunks;
	bitmap->pages = pages;
	bitmap->missing_pages = pages;

1778
	bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1779

1780
	err = -ENOMEM;
1781
	if (!bitmap->bp)
1782
		goto error;
1783

1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
	printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
		pages, bmname(bitmap));

	mddev->bitmap = bitmap;


	return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;

 error:
	bitmap_free(bitmap);
	return err;
}

1797
int bitmap_load(struct mddev *mddev)
1798 1799
{
	int err = 0;
1800
	sector_t start = 0;
1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
	sector_t sector = 0;
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap)
		goto out;

	/* Clear out old bitmap info first:  Either there is none, or we
	 * are resuming after someone else has possibly changed things,
	 * so we should forget old cached info.
	 * All chunks should be clean, but some might need_sync.
	 */
	while (sector < mddev->resync_max_sectors) {
N
NeilBrown 已提交
1813
		sector_t blocks;
1814 1815 1816 1817 1818
		bitmap_start_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	bitmap_close_sync(bitmap);

1819 1820 1821 1822 1823 1824 1825 1826
	if (mddev->degraded == 0
	    || bitmap->events_cleared == mddev->events)
		/* no need to keep dirty bits to optimise a
		 * re-add of a missing device */
		start = mddev->recovery_cp;

	err = bitmap_init_from_disk(bitmap, start);

1827
	if (err)
1828
		goto out;
1829

1830
	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1831
	md_wakeup_thread(mddev->thread);
1832

1833 1834
	bitmap_update_sb(bitmap);

1835 1836 1837
	if (bitmap->flags & BITMAP_WRITE_ERROR)
		err = -EIO;
out:
1838
	return err;
1839
}
1840
EXPORT_SYMBOL_GPL(bitmap_load);
1841

1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
{
	unsigned long chunk_kb;
	unsigned long flags;

	if (!bitmap)
		return;

	spin_lock_irqsave(&bitmap->lock, flags);
	chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
		   "%lu%s chunk",
		   bitmap->pages - bitmap->missing_pages,
		   bitmap->pages,
		   (bitmap->pages - bitmap->missing_pages)
		   << (PAGE_SHIFT - 10),
		   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
		   chunk_kb ? "KB" : "B");
	if (bitmap->file) {
		seq_printf(seq, ", file: ");
		seq_path(seq, &bitmap->file->f_path, " \t\n");
	}

	seq_printf(seq, "\n");
	spin_unlock_irqrestore(&bitmap->lock, flags);
}

1869
static ssize_t
1870
location_show(struct mddev *mddev, char *page)
1871 1872
{
	ssize_t len;
1873
	if (mddev->bitmap_info.file)
1874
		len = sprintf(page, "file");
1875
	else if (mddev->bitmap_info.offset)
1876
		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
1877
	else
1878 1879 1880 1881 1882 1883
		len = sprintf(page, "none");
	len += sprintf(page+len, "\n");
	return len;
}

static ssize_t
1884
location_store(struct mddev *mddev, const char *buf, size_t len)
1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928
{

	if (mddev->pers) {
		if (!mddev->pers->quiesce)
			return -EBUSY;
		if (mddev->recovery || mddev->sync_thread)
			return -EBUSY;
	}

	if (mddev->bitmap || mddev->bitmap_info.file ||
	    mddev->bitmap_info.offset) {
		/* bitmap already configured.  Only option is to clear it */
		if (strncmp(buf, "none", 4) != 0)
			return -EBUSY;
		if (mddev->pers) {
			mddev->pers->quiesce(mddev, 1);
			bitmap_destroy(mddev);
			mddev->pers->quiesce(mddev, 0);
		}
		mddev->bitmap_info.offset = 0;
		if (mddev->bitmap_info.file) {
			struct file *f = mddev->bitmap_info.file;
			mddev->bitmap_info.file = NULL;
			restore_bitmap_write_access(f);
			fput(f);
		}
	} else {
		/* No bitmap, OK to set a location */
		long long offset;
		if (strncmp(buf, "none", 4) == 0)
			/* nothing to be done */;
		else if (strncmp(buf, "file:", 5) == 0) {
			/* Not supported yet */
			return -EINVAL;
		} else {
			int rv;
			if (buf[0] == '+')
				rv = strict_strtoll(buf+1, 10, &offset);
			else
				rv = strict_strtoll(buf, 10, &offset);
			if (rv)
				return rv;
			if (offset == 0)
				return -EINVAL;
1929 1930
			if (mddev->bitmap_info.external == 0 &&
			    mddev->major_version == 0 &&
1931 1932 1933 1934 1935 1936
			    offset != mddev->bitmap_info.default_offset)
				return -EINVAL;
			mddev->bitmap_info.offset = offset;
			if (mddev->pers) {
				mddev->pers->quiesce(mddev, 1);
				rv = bitmap_create(mddev);
1937 1938
				if (!rv)
					rv = bitmap_load(mddev);
1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962
				if (rv) {
					bitmap_destroy(mddev);
					mddev->bitmap_info.offset = 0;
				}
				mddev->pers->quiesce(mddev, 0);
				if (rv)
					return rv;
			}
		}
	}
	if (!mddev->external) {
		/* Ensure new bitmap info is stored in
		 * metadata promptly.
		 */
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		md_wakeup_thread(mddev->thread);
	}
	return len;
}

static struct md_sysfs_entry bitmap_location =
__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);

static ssize_t
1963
timeout_show(struct mddev *mddev, char *page)
1964 1965 1966 1967
{
	ssize_t len;
	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
1968

1969 1970 1971 1972 1973 1974 1975 1976
	len = sprintf(page, "%lu", secs);
	if (jifs)
		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
	len += sprintf(page+len, "\n");
	return len;
}

static ssize_t
1977
timeout_store(struct mddev *mddev, const char *buf, size_t len)
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
{
	/* timeout can be set at any time */
	unsigned long timeout;
	int rv = strict_strtoul_scaled(buf, &timeout, 4);
	if (rv)
		return rv;

	/* just to make sure we don't overflow... */
	if (timeout >= LONG_MAX / HZ)
		return -EINVAL;

	timeout = timeout * HZ / 10000;

	if (timeout >= MAX_SCHEDULE_TIMEOUT)
		timeout = MAX_SCHEDULE_TIMEOUT-1;
	if (timeout < 1)
		timeout = 1;
	mddev->bitmap_info.daemon_sleep = timeout;
	if (mddev->thread) {
		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
		 * the bitmap is all clean and we don't need to
		 * adjust the timeout right now
		 */
		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
			mddev->thread->timeout = timeout;
			md_wakeup_thread(mddev->thread);
		}
	}
	return len;
}

static struct md_sysfs_entry bitmap_timeout =
__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);

static ssize_t
2013
backlog_show(struct mddev *mddev, char *page)
2014 2015 2016 2017 2018
{
	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
}

static ssize_t
2019
backlog_store(struct mddev *mddev, const char *buf, size_t len)
2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034
{
	unsigned long backlog;
	int rv = strict_strtoul(buf, 10, &backlog);
	if (rv)
		return rv;
	if (backlog > COUNTER_MAX)
		return -EINVAL;
	mddev->bitmap_info.max_write_behind = backlog;
	return len;
}

static struct md_sysfs_entry bitmap_backlog =
__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);

static ssize_t
2035
chunksize_show(struct mddev *mddev, char *page)
2036 2037 2038 2039 2040
{
	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
}

static ssize_t
2041
chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060
{
	/* Can only be changed when no bitmap is active */
	int rv;
	unsigned long csize;
	if (mddev->bitmap)
		return -EBUSY;
	rv = strict_strtoul(buf, 10, &csize);
	if (rv)
		return rv;
	if (csize < 512 ||
	    !is_power_of_2(csize))
		return -EINVAL;
	mddev->bitmap_info.chunksize = csize;
	return len;
}

static struct md_sysfs_entry bitmap_chunksize =
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);

2061
static ssize_t metadata_show(struct mddev *mddev, char *page)
2062 2063 2064 2065 2066
{
	return sprintf(page, "%s\n", (mddev->bitmap_info.external
				      ? "external" : "internal"));
}

2067
static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
{
	if (mddev->bitmap ||
	    mddev->bitmap_info.file ||
	    mddev->bitmap_info.offset)
		return -EBUSY;
	if (strncmp(buf, "external", 8) == 0)
		mddev->bitmap_info.external = 1;
	else if (strncmp(buf, "internal", 8) == 0)
		mddev->bitmap_info.external = 0;
	else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_metadata =
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);

2085
static ssize_t can_clear_show(struct mddev *mddev, char *page)
2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
{
	int len;
	if (mddev->bitmap)
		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
					     "false" : "true"));
	else
		len = sprintf(page, "\n");
	return len;
}

2096
static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113
{
	if (mddev->bitmap == NULL)
		return -ENOENT;
	if (strncmp(buf, "false", 5) == 0)
		mddev->bitmap->need_sync = 1;
	else if (strncmp(buf, "true", 4) == 0) {
		if (mddev->degraded)
			return -EBUSY;
		mddev->bitmap->need_sync = 0;
	} else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);

2114
static ssize_t
2115
behind_writes_used_show(struct mddev *mddev, char *page)
2116 2117 2118 2119 2120 2121 2122 2123
{
	if (mddev->bitmap == NULL)
		return sprintf(page, "0\n");
	return sprintf(page, "%lu\n",
		       mddev->bitmap->behind_writes_used);
}

static ssize_t
2124
behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2125 2126 2127 2128 2129 2130 2131 2132 2133 2134
{
	if (mddev->bitmap)
		mddev->bitmap->behind_writes_used = 0;
	return len;
}

static struct md_sysfs_entry max_backlog_used =
__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
       behind_writes_used_show, behind_writes_used_reset);

2135 2136 2137 2138 2139
static struct attribute *md_bitmap_attrs[] = {
	&bitmap_location.attr,
	&bitmap_timeout.attr,
	&bitmap_backlog.attr,
	&bitmap_chunksize.attr,
2140 2141
	&bitmap_metadata.attr,
	&bitmap_can_clear.attr,
2142
	&max_backlog_used.attr,
2143 2144 2145 2146 2147 2148 2149
	NULL
};
struct attribute_group md_bitmap_group = {
	.name = "bitmap",
	.attrs = md_bitmap_attrs,
};