bitmap.c 66.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
 *
 * bitmap_create  - sets up the bitmap structure
 * bitmap_destroy - destroys the bitmap structure
 *
 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
 * - added disk storage for bitmap
 * - changes to allow various bitmap chunk sizes
 */

/*
 * Still to do:
 *
 * flush after percent set rather than just time based. (maybe both).
 */

18
#include <linux/blkdev.h>
19 20 21 22 23 24 25 26 27 28
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/timer.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
29
#include <linux/seq_file.h>
30
#include "md.h"
31
#include "bitmap.h"
32

33
static inline char *bmname(struct bitmap *bitmap)
34 35 36 37 38 39 40 41 42 43 44 45 46 47
{
	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
}

/*
 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
 *
 * 1) check to see if this page is allocated, if it's not then try to alloc
 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
 *    page pointer directly as a counter
 *
 * if we find our page, we increment the page's refcount so that it stays
 * allocated while we're using it
 */
48
static int bitmap_checkpage(struct bitmap_counts *bitmap,
49
			    unsigned long page, int create, int no_hijack)
50 51
__releases(bitmap->lock)
__acquires(bitmap->lock)
52 53 54 55
{
	unsigned char *mappage;

	if (page >= bitmap->pages) {
56 57 58 59
		/* This can happen if bitmap_start_sync goes beyond
		 * End-of-device while looking for a whole page.
		 * It is harmless.
		 */
60 61 62 63 64 65 66 67 68 69 70 71 72 73
		return -EINVAL;
	}

	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
		return 0;

	if (bitmap->bp[page].map) /* page is already allocated, just return */
		return 0;

	if (!create)
		return -ENOENT;

	/* this page has not been allocated yet */

74
	spin_unlock_irq(&bitmap->lock);
75 76 77 78 79 80 81 82 83 84 85 86 87
	/* It is possible that this is being called inside a
	 * prepare_to_wait/finish_wait loop from raid5c:make_request().
	 * In general it is not permitted to sleep in that context as it
	 * can cause the loop to spin freely.
	 * That doesn't apply here as we can only reach this point
	 * once with any loop.
	 * When this function completes, either bp[page].map or
	 * bp[page].hijacked.  In either case, this function will
	 * abort before getting to this point again.  So there is
	 * no risk of a free-spin, and so it is safe to assert
	 * that sleeping here is allowed.
	 */
	sched_annotate_sleep();
88
	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89 90 91
	spin_lock_irq(&bitmap->lock);

	if (mappage == NULL) {
92
		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93 94 95
		/* We don't support hijack for cluster raid */
		if (no_hijack)
			return -ENOMEM;
96 97 98 99
		/* failed - set the hijacked flag so that we can use the
		 * pointer as a counter */
		if (!bitmap->bp[page].map)
			bitmap->bp[page].hijacked = 1;
100 101
	} else if (bitmap->bp[page].map ||
		   bitmap->bp[page].hijacked) {
102
		/* somebody beat us to getting the page */
103
		kfree(mappage);
104
	} else {
105

106
		/* no page was in place and we have one, so install it */
107

108 109 110
		bitmap->bp[page].map = mappage;
		bitmap->missing_pages--;
	}
111 112 113 114 115 116
	return 0;
}

/* if page is completely empty, put it back on the free list, or dealloc it */
/* if page was hijacked, unmark the flag so it might get alloced next time */
/* Note: lock should be held when calling this */
117
static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
118 119 120 121 122 123 124 125 126 127 128
{
	char *ptr;

	if (bitmap->bp[page].count) /* page is still busy */
		return;

	/* page is no longer in use, it can be released */

	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
		bitmap->bp[page].hijacked = 0;
		bitmap->bp[page].map = NULL;
129 130 131 132 133
	} else {
		/* normal case, free the page */
		ptr = bitmap->bp[page].map;
		bitmap->bp[page].map = NULL;
		bitmap->missing_pages++;
134
		kfree(ptr);
135 136 137 138 139 140 141 142 143 144 145
	}
}

/*
 * bitmap file handling - read and write the bitmap file and its superblock
 */

/*
 * basic page I/O operations
 */

146
/* IO operations when bitmap is stored near all superblocks */
147 148 149
static int read_sb_page(struct mddev *mddev, loff_t offset,
			struct page *page,
			unsigned long index, int size)
150 151 152
{
	/* choose a good rdev and read the page from there */

153
	struct md_rdev *rdev;
154 155
	sector_t target;

N
NeilBrown 已提交
156
	rdev_for_each(rdev, mddev) {
157 158
		if (! test_bit(In_sync, &rdev->flags)
		    || test_bit(Faulty, &rdev->flags))
159 160
			continue;

J
Jonathan Brassow 已提交
161
		target = offset + index * (PAGE_SIZE/512);
162

163
		if (sync_page_io(rdev, target,
164
				 roundup(size, bdev_logical_block_size(rdev->bdev)),
J
Jonathan Brassow 已提交
165
				 page, READ, true)) {
166
			page->index = index;
167
			return 0;
168 169
		}
	}
170
	return -EIO;
171 172
}

173
static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
174 175 176 177 178 179 180
{
	/* Iterate the disks of an mddev, using rcu to protect access to the
	 * linked list, and raising the refcount of devices we return to ensure
	 * they don't disappear while in use.
	 * As devices are only added or removed when raid_disk is < 0 and
	 * nr_pending is 0 and In_sync is clear, the entries we return will
	 * still be in the same position on the list when we re-enter
181
	 * list_for_each_entry_continue_rcu.
182 183 184 185 186
	 *
	 * Note that if entered with 'rdev == NULL' to start at the
	 * beginning, we temporarily assign 'rdev' to an address which
	 * isn't really an rdev, but which can be used by
	 * list_for_each_entry_continue_rcu() to find the first entry.
187 188 189 190
	 */
	rcu_read_lock();
	if (rdev == NULL)
		/* start at the beginning */
191
		rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
192 193 194 195
	else {
		/* release the previous rdev and start from there. */
		rdev_dec_pending(rdev, mddev);
	}
196
	list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
197 198 199 200 201 202 203 204 205 206 207 208
		if (rdev->raid_disk >= 0 &&
		    !test_bit(Faulty, &rdev->flags)) {
			/* this is a usable devices */
			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();
			return rdev;
		}
	}
	rcu_read_unlock();
	return NULL;
}

209
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
210
{
211
	struct md_rdev *rdev = NULL;
212
	struct block_device *bdev;
213
	struct mddev *mddev = bitmap->mddev;
214
	struct bitmap_storage *store = &bitmap->storage;
215

216
	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
217 218
		int size = PAGE_SIZE;
		loff_t offset = mddev->bitmap_info.offset;
219 220 221

		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;

222 223 224 225 226
		if (page->index == store->file_pages-1) {
			int last_page_size = store->bytes & (PAGE_SIZE-1);
			if (last_page_size == 0)
				last_page_size = PAGE_SIZE;
			size = roundup(last_page_size,
227
				       bdev_logical_block_size(bdev));
228
		}
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
		/* Just make sure we aren't corrupting data or
		 * metadata
		 */
		if (mddev->external) {
			/* Bitmap could be anywhere. */
			if (rdev->sb_start + offset + (page->index
						       * (PAGE_SIZE/512))
			    > rdev->data_offset
			    &&
			    rdev->sb_start + offset
			    < (rdev->data_offset + mddev->dev_sectors
			     + (PAGE_SIZE/512)))
				goto bad_alignment;
		} else if (offset < 0) {
			/* DATA  BITMAP METADATA  */
			if (offset
			    + (long)(page->index * (PAGE_SIZE/512))
			    + size/512 > 0)
				/* bitmap runs in to metadata */
				goto bad_alignment;
			if (rdev->data_offset + mddev->dev_sectors
			    > rdev->sb_start + offset)
				/* data runs in to bitmap */
				goto bad_alignment;
		} else if (rdev->sb_start < rdev->data_offset) {
			/* METADATA BITMAP DATA */
			if (rdev->sb_start
			    + offset
			    + page->index*(PAGE_SIZE/512) + size/512
			    > rdev->data_offset)
				/* bitmap runs in to data */
				goto bad_alignment;
		} else {
			/* DATA METADATA BITMAP - no problems */
		}
		md_super_write(mddev, rdev,
			       rdev->sb_start + offset
			       + page->index * (PAGE_SIZE/512),
			       size,
			       page);
269
	}
270 271

	if (wait)
272
		md_super_wait(mddev);
273
	return 0;
274 275 276

 bad_alignment:
	return -EINVAL;
277 278
}

279
static void bitmap_file_kick(struct bitmap *bitmap);
280
/*
281
 * write out a page to a file
282
 */
283
static void write_page(struct bitmap *bitmap, struct page *page, int wait)
284
{
285
	struct buffer_head *bh;
286

287
	if (bitmap->storage.file == NULL) {
288 289
		switch (write_sb_page(bitmap, page, wait)) {
		case -EINVAL:
290
			set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
291
		}
292
	} else {
293

294
		bh = page_buffers(page);
295

296 297 298 299
		while (bh && bh->b_blocknr) {
			atomic_inc(&bitmap->pending_writes);
			set_buffer_locked(bh);
			set_buffer_mapped(bh);
J
Jens Axboe 已提交
300
			submit_bh(WRITE | REQ_SYNC, bh);
301 302
			bh = bh->b_this_page;
		}
303

304
		if (wait)
305 306
			wait_event(bitmap->write_wait,
				   atomic_read(&bitmap->pending_writes)==0);
307
	}
308
	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
309
		bitmap_file_kick(bitmap);
310 311 312 313 314
}

static void end_bitmap_write(struct buffer_head *bh, int uptodate)
{
	struct bitmap *bitmap = bh->b_private;
315

316 317
	if (!uptodate)
		set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
318 319 320
	if (atomic_dec_and_test(&bitmap->pending_writes))
		wake_up(&bitmap->write_wait);
}
321

322 323 324 325 326 327
/* copied from buffer.c */
static void
__clear_page_buffers(struct page *page)
{
	ClearPagePrivate(page);
	set_page_private(page, 0);
328
	put_page(page);
329 330 331
}
static void free_buffers(struct page *page)
{
332
	struct buffer_head *bh;
333

334 335 336 337
	if (!PagePrivate(page))
		return;

	bh = page_buffers(page);
338 339 340 341
	while (bh) {
		struct buffer_head *next = bh->b_this_page;
		free_buffer_head(bh);
		bh = next;
342
	}
343 344
	__clear_page_buffers(page);
	put_page(page);
345 346
}

347 348 349 350 351 352 353
/* read a page from a file.
 * We both read the page, and attach buffers to the page to record the
 * address of each block (using bmap).  These addresses will be used
 * to write the block later, completely bypassing the filesystem.
 * This usage is similar to how swap files are handled, and allows us
 * to write to a file with no concerns of memory allocation failing.
 */
354 355 356 357
static int read_page(struct file *file, unsigned long index,
		     struct bitmap *bitmap,
		     unsigned long count,
		     struct page *page)
358
{
359
	int ret = 0;
A
Al Viro 已提交
360
	struct inode *inode = file_inode(file);
361 362
	struct buffer_head *bh;
	sector_t block;
363

364 365
	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
		 (unsigned long long)index << PAGE_SHIFT);
366

367 368
	bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
	if (!bh) {
369
		ret = -ENOMEM;
370 371
		goto out;
	}
372 373 374 375 376 377 378 379 380
	attach_page_buffers(page, bh);
	block = index << (PAGE_SHIFT - inode->i_blkbits);
	while (bh) {
		if (count == 0)
			bh->b_blocknr = 0;
		else {
			bh->b_blocknr = bmap(inode, block);
			if (bh->b_blocknr == 0) {
				/* Cannot use this file! */
381
				ret = -EINVAL;
382 383 384 385 386 387 388 389 390 391
				goto out;
			}
			bh->b_bdev = inode->i_sb->s_bdev;
			if (count < (1<<inode->i_blkbits))
				count = 0;
			else
				count -= (1<<inode->i_blkbits);

			bh->b_end_io = end_bitmap_write;
			bh->b_private = bitmap;
392 393 394 395
			atomic_inc(&bitmap->pending_writes);
			set_buffer_locked(bh);
			set_buffer_mapped(bh);
			submit_bh(READ, bh);
396 397 398 399 400
		}
		block++;
		bh = bh->b_this_page;
	}
	page->index = index;
401 402 403

	wait_event(bitmap->write_wait,
		   atomic_read(&bitmap->pending_writes)==0);
404
	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
405
		ret = -EIO;
406
out:
407 408
	if (ret)
		printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
409 410
			(int)PAGE_SIZE,
			(unsigned long long)index << PAGE_SHIFT,
411 412
			ret);
	return ret;
413 414 415 416 417 418 419
}

/*
 * bitmap file superblock operations
 */

/* update the event counter and sync the superblock to disk */
420
void bitmap_update_sb(struct bitmap *bitmap)
421 422 423 424
{
	bitmap_super_t *sb;

	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
425
		return;
426 427
	if (bitmap->mddev->bitmap_info.external)
		return;
428
	if (!bitmap->storage.sb_page) /* no superblock */
429
		return;
430
	sb = kmap_atomic(bitmap->storage.sb_page);
431
	sb->events = cpu_to_le64(bitmap->mddev->events);
432
	if (bitmap->mddev->events < bitmap->events_cleared)
433 434
		/* rocking back to read-only */
		bitmap->events_cleared = bitmap->mddev->events;
435 436
	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
	sb->state = cpu_to_le32(bitmap->flags);
437 438 439
	/* Just in case these have been changed via sysfs: */
	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
440 441 442
	/* This might have been changed by a reshape */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
	sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
G
Goldwyn Rodrigues 已提交
443
	sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
444 445
	sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
					   bitmap_info.space);
446
	kunmap_atomic(sb);
447
	write_page(bitmap, bitmap->storage.sb_page, 1);
448 449 450 451 452 453 454
}

/* print out the bitmap file superblock */
void bitmap_print_sb(struct bitmap *bitmap)
{
	bitmap_super_t *sb;

455
	if (!bitmap || !bitmap->storage.sb_page)
456
		return;
457
	sb = kmap_atomic(bitmap->storage.sb_page);
458
	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
459 460 461
	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
	printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
462 463 464 465
					*(__u32 *)(sb->uuid+0),
					*(__u32 *)(sb->uuid+4),
					*(__u32 *)(sb->uuid+8),
					*(__u32 *)(sb->uuid+12));
466
	printk(KERN_DEBUG "        events: %llu\n",
467
			(unsigned long long) le64_to_cpu(sb->events));
468
	printk(KERN_DEBUG "events cleared: %llu\n",
469
			(unsigned long long) le64_to_cpu(sb->events_cleared));
470 471 472 473 474
	printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
	printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
	printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
	printk(KERN_DEBUG "     sync size: %llu KB\n",
			(unsigned long long)le64_to_cpu(sb->sync_size)/2);
475
	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
476
	kunmap_atomic(sb);
477 478
}

479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
/*
 * bitmap_new_disk_sb
 * @bitmap
 *
 * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
 * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
 * This function verifies 'bitmap_info' and populates the on-disk bitmap
 * structure, which is to be written to disk.
 *
 * Returns: 0 on success, -Exxx on error
 */
static int bitmap_new_disk_sb(struct bitmap *bitmap)
{
	bitmap_super_t *sb;
	unsigned long chunksize, daemon_sleep, write_behind;

495
	bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
496 497
	if (bitmap->storage.sb_page == NULL)
		return -ENOMEM;
498
	bitmap->storage.sb_page->index = 0;
499

500
	sb = kmap_atomic(bitmap->storage.sb_page);
501 502 503 504 505 506 507

	sb->magic = cpu_to_le32(BITMAP_MAGIC);
	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);

	chunksize = bitmap->mddev->bitmap_info.chunksize;
	BUG_ON(!chunksize);
	if (!is_power_of_2(chunksize)) {
508
		kunmap_atomic(sb);
509 510 511 512 513 514
		printk(KERN_ERR "bitmap chunksize not a power of 2\n");
		return -EINVAL;
	}
	sb->chunksize = cpu_to_le32(chunksize);

	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
515
	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
		daemon_sleep = 5 * HZ;
	}
	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;

	/*
	 * FIXME: write_behind for RAID1.  If not specified, what
	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
	 */
	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
	if (write_behind > COUNTER_MAX)
		write_behind = COUNTER_MAX / 2;
	sb->write_behind = cpu_to_le32(write_behind);
	bitmap->mddev->bitmap_info.max_write_behind = write_behind;

	/* keep the array size field of the bitmap superblock up to date */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

	memcpy(sb->uuid, bitmap->mddev->uuid, 16);

537
	set_bit(BITMAP_STALE, &bitmap->flags);
538
	sb->state = cpu_to_le32(bitmap->flags);
539 540
	bitmap->events_cleared = bitmap->mddev->events;
	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
541
	bitmap->mddev->bitmap_info.nodes = 0;
542

543
	kunmap_atomic(sb);
544 545 546 547

	return 0;
}

548 549 550 551 552
/* read the superblock from the bitmap file and initialize some bitmap fields */
static int bitmap_read_sb(struct bitmap *bitmap)
{
	char *reason = NULL;
	bitmap_super_t *sb;
553
	unsigned long chunksize, daemon_sleep, write_behind;
554
	unsigned long long events;
G
Goldwyn Rodrigues 已提交
555
	int nodes = 0;
556
	unsigned long sectors_reserved = 0;
557
	int err = -EINVAL;
558
	struct page *sb_page;
559
	loff_t offset = bitmap->mddev->bitmap_info.offset;
560

561
	if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
562 563 564
		chunksize = 128 * 1024 * 1024;
		daemon_sleep = 5 * HZ;
		write_behind = 0;
565
		set_bit(BITMAP_STALE, &bitmap->flags);
566 567 568
		err = 0;
		goto out_no_sb;
	}
569
	/* page 0 is the superblock, read it... */
570 571 572
	sb_page = alloc_page(GFP_KERNEL);
	if (!sb_page)
		return -ENOMEM;
573
	bitmap->storage.sb_page = sb_page;
574

575
re_read:
576 577
	/* If cluster_slot is set, the cluster is setup */
	if (bitmap->cluster_slot >= 0) {
578
		sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
579

580 581
		sector_div(bm_blocks,
			   bitmap->mddev->bitmap_info.chunksize >> 9);
582 583 584
		/* bits to bytes */
		bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
		/* to 4k blocks */
585
		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
586
		offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
587
		pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
588
			bitmap->cluster_slot, offset);
589 590
	}

591 592
	if (bitmap->storage.file) {
		loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
593 594
		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;

595
		err = read_page(bitmap->storage.file, 0,
596
				bitmap, bytes, sb_page);
597
	} else {
598
		err = read_sb_page(bitmap->mddev,
599
				   offset,
600 601
				   sb_page,
				   0, sizeof(bitmap_super_t));
602
	}
603
	if (err)
604 605
		return err;

606
	err = -EINVAL;
607
	sb = kmap_atomic(sb_page);
608 609

	chunksize = le32_to_cpu(sb->chunksize);
610
	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
611
	write_behind = le32_to_cpu(sb->write_behind);
612
	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
613 614
	/* Setup nodes/clustername only if bitmap version is
	 * cluster-compatible
615
	 */
616
	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
617 618 619 620
		nodes = le32_to_cpu(sb->nodes);
		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
				sb->cluster_name, 64);
	}
621 622 623 624

	/* verify that the bitmap-specific fields are valid */
	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
		reason = "bad magic";
625
	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
626
		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
627
		reason = "unrecognized superblock version";
628
	else if (chunksize < 512)
629
		reason = "bitmap chunksize too small";
J
Jonathan Brassow 已提交
630
	else if (!is_power_of_2(chunksize))
631
		reason = "bitmap chunksize not a power of 2";
632
	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
633
		reason = "daemon sleep period out of range";
634 635
	else if (write_behind > COUNTER_MAX)
		reason = "write-behind limit out of range (0 - 16383)";
636 637 638 639 640 641 642 643 644
	if (reason) {
		printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
			bmname(bitmap), reason);
		goto out;
	}

	/* keep the array size field of the bitmap superblock up to date */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

645 646 647 648 649 650 651 652 653 654 655 656
	if (bitmap->mddev->persistent) {
		/*
		 * We have a persistent array superblock, so compare the
		 * bitmap's UUID and event counter to the mddev's
		 */
		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
			printk(KERN_INFO
			       "%s: bitmap superblock UUID mismatch\n",
			       bmname(bitmap));
			goto out;
		}
		events = le64_to_cpu(sb->events);
657
		if (!nodes && (events < bitmap->mddev->events)) {
658 659 660 661 662
			printk(KERN_INFO
			       "%s: bitmap file is out of date (%llu < %llu) "
			       "-- forcing full recovery\n",
			       bmname(bitmap), events,
			       (unsigned long long) bitmap->mddev->events);
663
			set_bit(BITMAP_STALE, &bitmap->flags);
664
		}
665
	}
666

667
	/* assign fields using values from superblock */
668
	bitmap->flags |= le32_to_cpu(sb->state);
669
	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
670
		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
671
	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
G
Goldwyn Rodrigues 已提交
672
	strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
673
	err = 0;
674

675
out:
676
	kunmap_atomic(sb);
677 678
	/* Assiging chunksize is required for "re_read" */
	bitmap->mddev->bitmap_info.chunksize = chunksize;
679
	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
680 681 682 683 684 685 686 687 688 689 690
		err = md_setup_cluster(bitmap->mddev, nodes);
		if (err) {
			pr_err("%s: Could not setup cluster service (%d)\n",
					bmname(bitmap), err);
			goto out_no_sb;
		}
		bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
		goto re_read;
	}


691
out_no_sb:
692
	if (test_bit(BITMAP_STALE, &bitmap->flags))
693 694 695 696
		bitmap->events_cleared = bitmap->mddev->events;
	bitmap->mddev->bitmap_info.chunksize = chunksize;
	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
G
Goldwyn Rodrigues 已提交
697
	bitmap->mddev->bitmap_info.nodes = nodes;
698 699 700
	if (bitmap->mddev->bitmap_info.space == 0 ||
	    bitmap->mddev->bitmap_info.space > sectors_reserved)
		bitmap->mddev->bitmap_info.space = sectors_reserved;
701
	if (err) {
702
		bitmap_print_sb(bitmap);
703
		if (bitmap->cluster_slot < 0)
704 705
			md_cluster_stop(bitmap->mddev);
	}
706 707 708 709 710 711 712
	return err;
}

/*
 * general bitmap file operations
 */

713 714 715 716 717 718
/*
 * on-disk bitmap:
 *
 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
 * file a page at a time. There's a superblock at the start of the file.
 */
719
/* calculate the index of the page that contains this bit */
720 721
static inline unsigned long file_page_index(struct bitmap_storage *store,
					    unsigned long chunk)
722
{
723
	if (store->sb_page)
724 725
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk >> PAGE_BIT_SHIFT;
726 727 728
}

/* calculate the (bit) offset of this bit within a page */
729 730
static inline unsigned long file_page_offset(struct bitmap_storage *store,
					     unsigned long chunk)
731
{
732
	if (store->sb_page)
733 734
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk & (PAGE_BITS - 1);
735 736 737 738 739 740
}

/*
 * return a pointer to the page in the filemap that contains the given bit
 *
 */
741
static inline struct page *filemap_get_page(struct bitmap_storage *store,
742
					    unsigned long chunk)
743
{
744
	if (file_page_index(store, chunk) >= store->file_pages)
745
		return NULL;
746
	return store->filemap[file_page_index(store, chunk)];
747 748
}

749
static int bitmap_storage_alloc(struct bitmap_storage *store,
750 751
				unsigned long chunks, int with_super,
				int slot_number)
752
{
753
	int pnum, offset = 0;
754 755 756 757 758 759 760 761
	unsigned long num_pages;
	unsigned long bytes;

	bytes = DIV_ROUND_UP(chunks, 8);
	if (with_super)
		bytes += sizeof(bitmap_super_t);

	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
762
	offset = slot_number * num_pages;
763 764 765 766 767 768 769

	store->filemap = kmalloc(sizeof(struct page *)
				 * num_pages, GFP_KERNEL);
	if (!store->filemap)
		return -ENOMEM;

	if (with_super && !store->sb_page) {
770
		store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
771 772 773
		if (store->sb_page == NULL)
			return -ENOMEM;
	}
774

775 776 777 778
	pnum = 0;
	if (store->sb_page) {
		store->filemap[0] = store->sb_page;
		pnum = 1;
779
		store->sb_page->index = offset;
780
	}
781

782
	for ( ; pnum < num_pages; pnum++) {
783
		store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
784 785 786 787
		if (!store->filemap[pnum]) {
			store->file_pages = pnum;
			return -ENOMEM;
		}
788
		store->filemap[pnum]->index = pnum + offset;
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
	}
	store->file_pages = pnum;

	/* We need 4 bits per page, rounded up to a multiple
	 * of sizeof(unsigned long) */
	store->filemap_attr = kzalloc(
		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
		GFP_KERNEL);
	if (!store->filemap_attr)
		return -ENOMEM;

	store->bytes = bytes;

	return 0;
}

805
static void bitmap_file_unmap(struct bitmap_storage *store)
806 807 808
{
	struct page **map, *sb_page;
	int pages;
809
	struct file *file;
810

811
	file = store->file;
812 813 814
	map = store->filemap;
	pages = store->file_pages;
	sb_page = store->sb_page;
815 816

	while (pages--)
817
		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
818
			free_buffers(map[pages]);
819
	kfree(map);
820
	kfree(store->filemap_attr);
821

822 823
	if (sb_page)
		free_buffers(sb_page);
824

825
	if (file) {
A
Al Viro 已提交
826
		struct inode *inode = file_inode(file);
827
		invalidate_mapping_pages(inode->i_mapping, 0, -1);
828
		fput(file);
829
	}
830 831 832 833 834 835 836 837 838 839 840
}

/*
 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
 * then it is no longer reliable, so we stop using it and we mark the file
 * as failed in the superblock
 */
static void bitmap_file_kick(struct bitmap *bitmap)
{
	char *path, *ptr = NULL;

841
	if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
842
		bitmap_update_sb(bitmap);
843

844
		if (bitmap->storage.file) {
845 846
			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
			if (path)
M
Miklos Szeredi 已提交
847
				ptr = file_path(bitmap->storage.file,
848
					     path, PAGE_SIZE);
C
Christoph Hellwig 已提交
849

850 851
			printk(KERN_ALERT
			      "%s: kicking failed bitmap file %s from array!\n",
C
Christoph Hellwig 已提交
852
			      bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
853

854 855 856 857 858
			kfree(path);
		} else
			printk(KERN_ALERT
			       "%s: disabling internal bitmap due to errors\n",
			       bmname(bitmap));
859
	}
860 861 862
}

enum bitmap_page_attr {
863
	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
864 865
	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
				    * i.e. counter is 1 or 2. */
866
	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
867 868
};

869 870
static inline void set_page_attr(struct bitmap *bitmap, int pnum,
				 enum bitmap_page_attr attr)
871
{
872
	set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
873 874
}

875 876
static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
				   enum bitmap_page_attr attr)
877
{
878
	clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
879 880
}

881 882
static inline int test_page_attr(struct bitmap *bitmap, int pnum,
				 enum bitmap_page_attr attr)
883
{
884
	return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
885 886
}

887 888 889 890 891 892
static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
					   enum bitmap_page_attr attr)
{
	return test_and_clear_bit((pnum<<2) + attr,
				  bitmap->storage.filemap_attr);
}
893 894 895 896 897 898 899 900 901 902
/*
 * bitmap_file_set_bit -- called before performing a write to the md device
 * to set (and eventually sync) a particular bit in the bitmap file
 *
 * we set the bit immediately, then we record the page number so that
 * when an unplug occurs, we can flush the dirty pages out to disk
 */
static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
{
	unsigned long bit;
903
	struct page *page;
904
	void *kaddr;
905
	unsigned long chunk = block >> bitmap->counts.chunkshift;
906

907
	page = filemap_get_page(&bitmap->storage, chunk);
908 909
	if (!page)
		return;
910
	bit = file_page_offset(&bitmap->storage, chunk);
911

912
	/* set the bit */
913
	kaddr = kmap_atomic(page);
914
	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
915 916
		set_bit(bit, kaddr);
	else
917
		set_bit_le(bit, kaddr);
918
	kunmap_atomic(kaddr);
919
	pr_debug("set file bit %lu page %lu\n", bit, page->index);
920
	/* record page number so it gets flushed to disk when unplug occurs */
921
	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
922 923
}

924 925 926 927 928
static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
{
	unsigned long bit;
	struct page *page;
	void *paddr;
929
	unsigned long chunk = block >> bitmap->counts.chunkshift;
930

931
	page = filemap_get_page(&bitmap->storage, chunk);
932 933
	if (!page)
		return;
934
	bit = file_page_offset(&bitmap->storage, chunk);
935
	paddr = kmap_atomic(page);
936
	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
937 938
		clear_bit(bit, paddr);
	else
939
		clear_bit_le(bit, paddr);
940
	kunmap_atomic(paddr);
941 942
	if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
		set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
943 944 945 946
		bitmap->allclean = 0;
	}
}

947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968
static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
{
	unsigned long bit;
	struct page *page;
	void *paddr;
	unsigned long chunk = block >> bitmap->counts.chunkshift;
	int set = 0;

	page = filemap_get_page(&bitmap->storage, chunk);
	if (!page)
		return -EINVAL;
	bit = file_page_offset(&bitmap->storage, chunk);
	paddr = kmap_atomic(page);
	if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
		set = test_bit(bit, paddr);
	else
		set = test_bit_le(bit, paddr);
	kunmap_atomic(paddr);
	return set;
}


969 970 971
/* this gets called when the md device is ready to unplug its underlying
 * (slave) device queues -- before we let any writes go down, we need to
 * sync the dirty pages of the bitmap file to disk */
972
void bitmap_unplug(struct bitmap *bitmap)
973
{
974
	unsigned long i;
975
	int dirty, need_write;
976

977 978
	if (!bitmap || !bitmap->storage.filemap ||
	    test_bit(BITMAP_STALE, &bitmap->flags))
979
		return;
980 981 982

	/* look at each page to see if there are any set bits that need to be
	 * flushed out to disk */
983
	for (i = 0; i < bitmap->storage.file_pages; i++) {
984
		if (!bitmap->storage.filemap)
985
			return;
986 987 988 989
		dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
		need_write = test_and_clear_page_attr(bitmap, i,
						      BITMAP_PAGE_NEEDWRITE);
		if (dirty || need_write) {
990
			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
991 992
			write_page(bitmap, bitmap->storage.filemap[i], 0);
		}
993
	}
994 995 996 997 998 999
	if (bitmap->storage.file)
		wait_event(bitmap->write_wait,
			   atomic_read(&bitmap->pending_writes)==0);
	else
		md_super_wait(bitmap->mddev);

1000
	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1001
		bitmap_file_kick(bitmap);
1002
}
1003
EXPORT_SYMBOL(bitmap_unplug);
1004

1005
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
1006 1007 1008 1009 1010 1011 1012
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
 * memory mapping of the bitmap file
 * Special cases:
 *   if there's no bitmap file, or if the bitmap file had been
 *   previously kicked from the array, we mark all the bits as
 *   1's in order to cause a full resync.
1013 1014 1015
 *
 * We ignore all bits for sectors that end earlier than 'start'.
 * This is used when reading an out-of-date bitmap...
1016
 */
1017
static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1018
{
1019
	unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
1020
	struct page *page = NULL;
1021
	unsigned long bit_cnt = 0;
1022
	struct file *file;
1023
	unsigned long offset;
1024 1025
	int outofdate;
	int ret = -ENOSPC;
1026
	void *paddr;
1027
	struct bitmap_storage *store = &bitmap->storage;
1028

1029
	chunks = bitmap->counts.chunks;
1030
	file = store->file;
1031

1032 1033
	if (!file && !bitmap->mddev->bitmap_info.offset) {
		/* No permanent bitmap - fill with '1s'. */
1034 1035
		store->filemap = NULL;
		store->file_pages = 0;
1036 1037
		for (i = 0; i < chunks ; i++) {
			/* if the disk bit is set, set the memory bit */
1038
			int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1039 1040
				      >= start);
			bitmap_set_memory_bits(bitmap,
1041
					       (sector_t)i << bitmap->counts.chunkshift,
1042 1043 1044 1045
					       needed);
		}
		return 0;
	}
1046

1047
	outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1048 1049 1050 1051
	if (outofdate)
		printk(KERN_INFO "%s: bitmap file is out of date, doing full "
			"recovery\n", bmname(bitmap));

1052
	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1053
		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1054 1055 1056
		       bmname(bitmap),
		       (unsigned long) i_size_read(file->f_mapping->host),
		       store->bytes);
1057
		goto err;
1058
	}
1059

1060
	oldindex = ~0L;
1061
	offset = 0;
1062
	if (!bitmap->mddev->bitmap_info.external)
1063
		offset = sizeof(bitmap_super_t);
1064

1065 1066 1067
	if (mddev_is_clustered(bitmap->mddev))
		node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));

1068
	for (i = 0; i < chunks; i++) {
1069
		int b;
1070 1071
		index = file_page_index(&bitmap->storage, i);
		bit = file_page_offset(&bitmap->storage, i);
1072
		if (index != oldindex) { /* this is a new page, read it in */
1073
			int count;
1074
			/* unmap the old page, we're done with it */
1075 1076
			if (index == store->file_pages-1)
				count = store->bytes - index * PAGE_SIZE;
1077 1078
			else
				count = PAGE_SIZE;
1079
			page = store->filemap[index];
1080 1081 1082 1083 1084 1085 1086 1087
			if (file)
				ret = read_page(file, index, bitmap,
						count, page);
			else
				ret = read_sb_page(
					bitmap->mddev,
					bitmap->mddev->bitmap_info.offset,
					page,
1088
					index + node_offset, count);
1089 1090

			if (ret)
1091
				goto err;
1092

1093 1094 1095 1096 1097
			oldindex = index;

			if (outofdate) {
				/*
				 * if bitmap is out of date, dirty the
1098
				 * whole page and write it out
1099
				 */
1100
				paddr = kmap_atomic(page);
1101
				memset(paddr + offset, 0xff,
1102
				       PAGE_SIZE - offset);
1103
				kunmap_atomic(paddr);
1104 1105 1106
				write_page(bitmap, page, 1);

				ret = -EIO;
1107 1108
				if (test_bit(BITMAP_WRITE_ERROR,
					     &bitmap->flags))
1109
					goto err;
1110 1111
			}
		}
1112
		paddr = kmap_atomic(page);
1113
		if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1114
			b = test_bit(bit, paddr);
1115
		else
A
Akinobu Mita 已提交
1116
			b = test_bit_le(bit, paddr);
1117
		kunmap_atomic(paddr);
1118
		if (b) {
1119
			/* if the disk bit is set, set the memory bit */
1120
			int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1121 1122
				      >= start);
			bitmap_set_memory_bits(bitmap,
1123
					       (sector_t)i << bitmap->counts.chunkshift,
1124
					       needed);
1125 1126
			bit_cnt++;
		}
1127
		offset = 0;
1128 1129 1130
	}

	printk(KERN_INFO "%s: bitmap initialized from disk: "
1131
	       "read %lu pages, set %lu of %lu bits\n",
1132
	       bmname(bitmap), store->file_pages,
1133
	       bit_cnt, chunks);
1134 1135

	return 0;
1136

1137 1138 1139
 err:
	printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
	       bmname(bitmap), ret);
1140 1141 1142
	return ret;
}

1143 1144 1145 1146 1147
void bitmap_write_all(struct bitmap *bitmap)
{
	/* We don't actually write all bitmap blocks here,
	 * just flag them as needing to be written
	 */
1148
	int i;
1149

1150
	if (!bitmap || !bitmap->storage.filemap)
1151
		return;
1152
	if (bitmap->storage.file)
1153 1154 1155
		/* Only one copy, so nothing needed */
		return;

1156
	for (i = 0; i < bitmap->storage.file_pages; i++)
1157
		set_page_attr(bitmap, i,
1158
			      BITMAP_PAGE_NEEDWRITE);
1159
	bitmap->allclean = 0;
1160 1161
}

1162 1163
static void bitmap_count_page(struct bitmap_counts *bitmap,
			      sector_t offset, int inc)
1164
{
1165
	sector_t chunk = offset >> bitmap->chunkshift;
1166 1167 1168 1169
	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
	bitmap->bp[page].count += inc;
	bitmap_checkfree(bitmap, page);
}
1170

1171
static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1172 1173 1174 1175 1176 1177 1178 1179 1180
{
	sector_t chunk = offset >> bitmap->chunkshift;
	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
	struct bitmap_page *bp = &bitmap->bp[page];

	if (!bp->pending)
		bp->pending = 1;
}

1181
static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
N
NeilBrown 已提交
1182
					    sector_t offset, sector_t *blocks,
1183 1184 1185 1186 1187 1188 1189
					    int create);

/*
 * bitmap daemon -- periodically wakes up to clean bits and flush pages
 *			out to disk
 */

1190
void bitmap_daemon_work(struct mddev *mddev)
1191
{
1192
	struct bitmap *bitmap;
1193
	unsigned long j;
1194
	unsigned long nextpage;
N
NeilBrown 已提交
1195
	sector_t blocks;
1196
	struct bitmap_counts *counts;
1197

1198 1199 1200
	/* Use a mutex to guard daemon_work against
	 * bitmap_destroy.
	 */
1201
	mutex_lock(&mddev->bitmap_info.mutex);
1202 1203
	bitmap = mddev->bitmap;
	if (bitmap == NULL) {
1204
		mutex_unlock(&mddev->bitmap_info.mutex);
1205
		return;
1206
	}
1207
	if (time_before(jiffies, bitmap->daemon_lastrun
N
NeilBrown 已提交
1208
			+ mddev->bitmap_info.daemon_sleep))
1209 1210
		goto done;

1211
	bitmap->daemon_lastrun = jiffies;
1212
	if (bitmap->allclean) {
N
NeilBrown 已提交
1213
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1214
		goto done;
1215 1216
	}
	bitmap->allclean = 1;
1217

1218 1219 1220 1221
	/* Any file-page which is PENDING now needs to be written.
	 * So set NEEDWRITE now, then after we make any last-minute changes
	 * we will write it.
	 */
1222
	for (j = 0; j < bitmap->storage.file_pages; j++)
1223 1224
		if (test_and_clear_page_attr(bitmap, j,
					     BITMAP_PAGE_PENDING))
1225
			set_page_attr(bitmap, j,
1226 1227 1228 1229 1230 1231 1232 1233
				      BITMAP_PAGE_NEEDWRITE);

	if (bitmap->need_sync &&
	    mddev->bitmap_info.external == 0) {
		/* Arrange for superblock update as well as
		 * other changes */
		bitmap_super_t *sb;
		bitmap->need_sync = 0;
1234 1235
		if (bitmap->storage.filemap) {
			sb = kmap_atomic(bitmap->storage.sb_page);
1236 1237 1238
			sb->events_cleared =
				cpu_to_le64(bitmap->events_cleared);
			kunmap_atomic(sb);
1239
			set_page_attr(bitmap, 0,
1240 1241
				      BITMAP_PAGE_NEEDWRITE);
		}
1242 1243 1244 1245
	}
	/* Now look at the bitmap counters and if any are '2' or '1',
	 * decrement and handle accordingly.
	 */
1246 1247
	counts = &bitmap->counts;
	spin_lock_irq(&counts->lock);
1248
	nextpage = 0;
1249
	for (j = 0; j < counts->chunks; j++) {
1250
		bitmap_counter_t *bmc;
1251
		sector_t  block = (sector_t)j << counts->chunkshift;
1252

1253 1254
		if (j == nextpage) {
			nextpage += PAGE_COUNTER_RATIO;
1255
			if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1256
				j |= PAGE_COUNTER_MASK;
1257 1258
				continue;
			}
1259
			counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1260
		}
1261
		bmc = bitmap_get_counter(counts,
1262
					 block,
1263
					 &blocks, 0);
1264 1265

		if (!bmc) {
1266
			j |= PAGE_COUNTER_MASK;
1267 1268 1269 1270 1271
			continue;
		}
		if (*bmc == 1 && !bitmap->need_sync) {
			/* We can clear the bit */
			*bmc = 0;
1272
			bitmap_count_page(counts, block, -1);
1273
			bitmap_file_clear_bit(bitmap, block);
1274 1275
		} else if (*bmc && *bmc <= 2) {
			*bmc = 1;
1276
			bitmap_set_pending(counts, block);
1277
			bitmap->allclean = 0;
1278
		}
1279
	}
1280
	spin_unlock_irq(&counts->lock);
1281

1282 1283 1284 1285 1286 1287 1288 1289
	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
	 * DIRTY pages need to be written by bitmap_unplug so it can wait
	 * for them.
	 * If we find any DIRTY page we stop there and let bitmap_unplug
	 * handle all the rest.  This is important in the case where
	 * the first blocking holds the superblock and it has been updated.
	 * We mustn't write any other blocks before the superblock.
	 */
1290 1291 1292 1293
	for (j = 0;
	     j < bitmap->storage.file_pages
		     && !test_bit(BITMAP_STALE, &bitmap->flags);
	     j++) {
1294
		if (test_page_attr(bitmap, j,
1295 1296 1297
				   BITMAP_PAGE_DIRTY))
			/* bitmap_unplug will handle the rest */
			break;
1298 1299
		if (test_and_clear_page_attr(bitmap, j,
					     BITMAP_PAGE_NEEDWRITE)) {
1300
			write_page(bitmap, bitmap->storage.filemap[j], 0);
1301 1302 1303
		}
	}

1304
 done:
1305
	if (bitmap->allclean == 0)
N
NeilBrown 已提交
1306 1307
		mddev->thread->timeout =
			mddev->bitmap_info.daemon_sleep;
1308
	mutex_unlock(&mddev->bitmap_info.mutex);
1309 1310
}

1311
static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
N
NeilBrown 已提交
1312
					    sector_t offset, sector_t *blocks,
1313
					    int create)
1314 1315
__releases(bitmap->lock)
__acquires(bitmap->lock)
1316 1317 1318 1319 1320
{
	/* If 'create', we might release the lock and reclaim it.
	 * The lock must have been taken with interrupts enabled.
	 * If !create, we don't release the lock.
	 */
1321
	sector_t chunk = offset >> bitmap->chunkshift;
1322 1323 1324
	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
	sector_t csize;
1325
	int err;
1326

1327
	err = bitmap_checkpage(bitmap, page, create, 0);
1328 1329 1330

	if (bitmap->bp[page].hijacked ||
	    bitmap->bp[page].map == NULL)
1331
		csize = ((sector_t)1) << (bitmap->chunkshift +
1332 1333
					  PAGE_COUNTER_SHIFT - 1);
	else
1334
		csize = ((sector_t)1) << bitmap->chunkshift;
1335 1336 1337
	*blocks = csize - (offset & (csize - 1));

	if (err < 0)
1338
		return NULL;
1339

1340 1341 1342 1343 1344 1345 1346 1347
	/* now locked ... */

	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
		/* should we use the first or second counter field
		 * of the hijacked pointer? */
		int hi = (pageoff > PAGE_COUNTER_MASK);
		return  &((bitmap_counter_t *)
			  &bitmap->bp[page].map)[hi];
1348
	} else /* page is allocated */
1349 1350 1351 1352
		return (bitmap_counter_t *)
			&(bitmap->bp[page].map[pageoff]);
}

1353
int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1354
{
1355 1356
	if (!bitmap)
		return 0;
1357 1358

	if (behind) {
1359
		int bw;
1360
		atomic_inc(&bitmap->behind_writes);
1361 1362 1363 1364
		bw = atomic_read(&bitmap->behind_writes);
		if (bw > bitmap->behind_writes_used)
			bitmap->behind_writes_used = bw;

1365 1366
		pr_debug("inc write-behind count %d/%lu\n",
			 bw, bitmap->mddev->bitmap_info.max_write_behind);
1367 1368
	}

1369
	while (sectors) {
N
NeilBrown 已提交
1370
		sector_t blocks;
1371 1372
		bitmap_counter_t *bmc;

1373 1374
		spin_lock_irq(&bitmap->counts.lock);
		bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1375
		if (!bmc) {
1376
			spin_unlock_irq(&bitmap->counts.lock);
1377 1378 1379
			return 0;
		}

1380
		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1381 1382 1383 1384 1385 1386 1387
			DEFINE_WAIT(__wait);
			/* note that it is safe to do the prepare_to_wait
			 * after the test as long as we do it before dropping
			 * the spinlock.
			 */
			prepare_to_wait(&bitmap->overflow_wait, &__wait,
					TASK_UNINTERRUPTIBLE);
1388
			spin_unlock_irq(&bitmap->counts.lock);
1389
			schedule();
1390 1391 1392 1393
			finish_wait(&bitmap->overflow_wait, &__wait);
			continue;
		}

1394
		switch (*bmc) {
1395 1396
		case 0:
			bitmap_file_set_bit(bitmap, offset);
1397
			bitmap_count_page(&bitmap->counts, offset, 1);
1398 1399 1400 1401
			/* fall through */
		case 1:
			*bmc = 2;
		}
1402

1403 1404
		(*bmc)++;

1405
		spin_unlock_irq(&bitmap->counts.lock);
1406 1407 1408 1409

		offset += blocks;
		if (sectors > blocks)
			sectors -= blocks;
1410 1411
		else
			sectors = 0;
1412 1413 1414
	}
	return 0;
}
1415
EXPORT_SYMBOL(bitmap_startwrite);
1416 1417

void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1418
		     int success, int behind)
1419
{
1420 1421
	if (!bitmap)
		return;
1422
	if (behind) {
1423 1424
		if (atomic_dec_and_test(&bitmap->behind_writes))
			wake_up(&bitmap->behind_wait);
1425 1426 1427
		pr_debug("dec write-behind count %d/%lu\n",
			 atomic_read(&bitmap->behind_writes),
			 bitmap->mddev->bitmap_info.max_write_behind);
1428 1429
	}

1430
	while (sectors) {
N
NeilBrown 已提交
1431
		sector_t blocks;
1432 1433 1434
		unsigned long flags;
		bitmap_counter_t *bmc;

1435 1436
		spin_lock_irqsave(&bitmap->counts.lock, flags);
		bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1437
		if (!bmc) {
1438
			spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1439 1440 1441
			return;
		}

1442
		if (success && !bitmap->mddev->degraded &&
1443 1444 1445
		    bitmap->events_cleared < bitmap->mddev->events) {
			bitmap->events_cleared = bitmap->mddev->events;
			bitmap->need_sync = 1;
1446
			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1447 1448
		}

1449
		if (!success && !NEEDED(*bmc))
1450 1451
			*bmc |= NEEDED_MASK;

1452
		if (COUNTER(*bmc) == COUNTER_MAX)
1453 1454
			wake_up(&bitmap->overflow_wait);

1455
		(*bmc)--;
1456
		if (*bmc <= 2) {
1457
			bitmap_set_pending(&bitmap->counts, offset);
1458 1459
			bitmap->allclean = 0;
		}
1460
		spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1461 1462 1463
		offset += blocks;
		if (sectors > blocks)
			sectors -= blocks;
1464 1465
		else
			sectors = 0;
1466 1467
	}
}
1468
EXPORT_SYMBOL(bitmap_endwrite);
1469

N
NeilBrown 已提交
1470
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1471
			       int degraded)
1472 1473 1474 1475 1476 1477 1478
{
	bitmap_counter_t *bmc;
	int rv;
	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
		*blocks = 1024;
		return 1; /* always resync if no bitmap */
	}
1479 1480
	spin_lock_irq(&bitmap->counts.lock);
	bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1481 1482 1483 1484 1485 1486 1487
	rv = 0;
	if (bmc) {
		/* locked */
		if (RESYNC(*bmc))
			rv = 1;
		else if (NEEDED(*bmc)) {
			rv = 1;
1488 1489 1490 1491
			if (!degraded) { /* don't set/clear bits if degraded */
				*bmc |= RESYNC_MASK;
				*bmc &= ~NEEDED_MASK;
			}
1492 1493
		}
	}
1494
	spin_unlock_irq(&bitmap->counts.lock);
1495 1496 1497
	return rv;
}

N
NeilBrown 已提交
1498
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1499 1500 1501 1502 1503 1504 1505 1506 1507 1508
		      int degraded)
{
	/* bitmap_start_sync must always report on multiples of whole
	 * pages, otherwise resync (which is very PAGE_SIZE based) will
	 * get confused.
	 * So call __bitmap_start_sync repeatedly (if needed) until
	 * At least PAGE_SIZE>>9 blocks are covered.
	 * Return the 'or' of the result.
	 */
	int rv = 0;
N
NeilBrown 已提交
1509
	sector_t blocks1;
1510 1511 1512 1513 1514 1515 1516 1517 1518 1519

	*blocks = 0;
	while (*blocks < (PAGE_SIZE>>9)) {
		rv |= __bitmap_start_sync(bitmap, offset,
					  &blocks1, degraded);
		offset += blocks1;
		*blocks += blocks1;
	}
	return rv;
}
1520
EXPORT_SYMBOL(bitmap_start_sync);
1521

N
NeilBrown 已提交
1522
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1523 1524 1525
{
	bitmap_counter_t *bmc;
	unsigned long flags;
1526 1527

	if (bitmap == NULL) {
1528 1529 1530
		*blocks = 1024;
		return;
	}
1531 1532
	spin_lock_irqsave(&bitmap->counts.lock, flags);
	bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1533 1534 1535 1536 1537 1538 1539 1540 1541
	if (bmc == NULL)
		goto unlock;
	/* locked */
	if (RESYNC(*bmc)) {
		*bmc &= ~RESYNC_MASK;

		if (!NEEDED(*bmc) && aborted)
			*bmc |= NEEDED_MASK;
		else {
1542
			if (*bmc <= 2) {
1543
				bitmap_set_pending(&bitmap->counts, offset);
1544 1545
				bitmap->allclean = 0;
			}
1546 1547 1548
		}
	}
 unlock:
1549
	spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1550
}
1551
EXPORT_SYMBOL(bitmap_end_sync);
1552 1553 1554 1555 1556 1557 1558 1559

void bitmap_close_sync(struct bitmap *bitmap)
{
	/* Sync has finished, and any bitmap chunks that weren't synced
	 * properly have been aborted.  It remains to us to clear the
	 * RESYNC bit wherever it is still on
	 */
	sector_t sector = 0;
N
NeilBrown 已提交
1560
	sector_t blocks;
N
NeilBrown 已提交
1561 1562
	if (!bitmap)
		return;
1563 1564
	while (sector < bitmap->mddev->resync_max_sectors) {
		bitmap_end_sync(bitmap, sector, &blocks, 0);
N
NeilBrown 已提交
1565 1566 1567
		sector += blocks;
	}
}
1568
EXPORT_SYMBOL(bitmap_close_sync);
N
NeilBrown 已提交
1569

1570
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
N
NeilBrown 已提交
1571 1572
{
	sector_t s = 0;
N
NeilBrown 已提交
1573
	sector_t blocks;
N
NeilBrown 已提交
1574 1575 1576 1577 1578 1579 1580

	if (!bitmap)
		return;
	if (sector == 0) {
		bitmap->last_end_sync = jiffies;
		return;
	}
1581
	if (!force && time_before(jiffies, (bitmap->last_end_sync
1582
				  + bitmap->mddev->bitmap_info.daemon_sleep)))
N
NeilBrown 已提交
1583 1584 1585 1586
		return;
	wait_event(bitmap->mddev->recovery_wait,
		   atomic_read(&bitmap->mddev->recovery_active) == 0);

1587
	bitmap->mddev->curr_resync_completed = sector;
1588
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1589
	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
N
NeilBrown 已提交
1590 1591 1592 1593
	s = 0;
	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
		bitmap_end_sync(bitmap, s, &blocks, 0);
		s += blocks;
1594
	}
N
NeilBrown 已提交
1595
	bitmap->last_end_sync = jiffies;
1596
	sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1597
}
1598
EXPORT_SYMBOL(bitmap_cond_end_sync);
1599

1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620
void bitmap_sync_with_cluster(struct mddev *mddev,
			      sector_t old_lo, sector_t old_hi,
			      sector_t new_lo, sector_t new_hi)
{
	struct bitmap *bitmap = mddev->bitmap;
	sector_t sector, blocks = 0;

	for (sector = old_lo; sector < new_lo; ) {
		bitmap_end_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");

	for (sector = old_hi; sector < new_hi; ) {
		bitmap_start_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
}
EXPORT_SYMBOL(bitmap_sync_with_cluster);

1621
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1622 1623
{
	/* For each chunk covered by any of these sectors, set the
1624
	 * counter to 2 and possibly set resync_needed.  They should all
1625 1626
	 * be 0 at this point
	 */
1627

N
NeilBrown 已提交
1628
	sector_t secs;
1629
	bitmap_counter_t *bmc;
1630 1631
	spin_lock_irq(&bitmap->counts.lock);
	bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1632
	if (!bmc) {
1633
		spin_unlock_irq(&bitmap->counts.lock);
1634
		return;
1635
	}
1636
	if (!*bmc) {
1637
		*bmc = 2;
1638 1639
		bitmap_count_page(&bitmap->counts, offset, 1);
		bitmap_set_pending(&bitmap->counts, offset);
1640
		bitmap->allclean = 0;
1641
	}
1642 1643
	if (needed)
		*bmc |= NEEDED_MASK;
1644
	spin_unlock_irq(&bitmap->counts.lock);
1645 1646
}

1647 1648 1649 1650 1651 1652
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
{
	unsigned long chunk;

	for (chunk = s; chunk <= e; chunk++) {
1653
		sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1654 1655
		bitmap_set_memory_bits(bitmap, sec, 1);
		bitmap_file_set_bit(bitmap, sec);
1656 1657 1658 1659 1660 1661
		if (sec < bitmap->mddev->recovery_cp)
			/* We are asserting that the array is dirty,
			 * so move the recovery_cp address back so
			 * that it is obvious that it is dirty
			 */
			bitmap->mddev->recovery_cp = sec;
1662 1663 1664
	}
}

1665 1666 1667
/*
 * flush out any pending updates
 */
1668
void bitmap_flush(struct mddev *mddev)
1669 1670
{
	struct bitmap *bitmap = mddev->bitmap;
1671
	long sleep;
1672 1673 1674 1675 1676 1677 1678

	if (!bitmap) /* there was no bitmap */
		return;

	/* run the daemon_work three time to ensure everything is flushed
	 * that can be
	 */
1679
	sleep = mddev->bitmap_info.daemon_sleep * 2;
1680
	bitmap->daemon_lastrun -= sleep;
1681
	bitmap_daemon_work(mddev);
1682
	bitmap->daemon_lastrun -= sleep;
1683
	bitmap_daemon_work(mddev);
1684
	bitmap->daemon_lastrun -= sleep;
1685
	bitmap_daemon_work(mddev);
1686 1687 1688
	bitmap_update_sb(bitmap);
}

1689 1690 1691
/*
 * free memory that was allocated
 */
1692
static void bitmap_free(struct bitmap *bitmap)
1693 1694 1695 1696 1697 1698 1699
{
	unsigned long k, pages;
	struct bitmap_page *bp;

	if (!bitmap) /* there was no bitmap */
		return;

1700 1701 1702
	if (bitmap->sysfs_can_clear)
		sysfs_put(bitmap->sysfs_can_clear);

1703 1704
	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
		bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1705 1706
		md_cluster_stop(bitmap->mddev);

1707 1708 1709 1710 1711 1712
	/* Shouldn't be needed - but just in case.... */
	wait_event(bitmap->write_wait,
		   atomic_read(&bitmap->pending_writes) == 0);

	/* release the bitmap file  */
	bitmap_file_unmap(&bitmap->storage);
1713

1714 1715
	bp = bitmap->counts.bp;
	pages = bitmap->counts.pages;
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725

	/* free all allocated memory */

	if (bp) /* deallocate the page memory */
		for (k = 0; k < pages; k++)
			if (bp[k].map && !bp[k].hijacked)
				kfree(bp[k].map);
	kfree(bp);
	kfree(bitmap);
}
1726

1727
void bitmap_destroy(struct mddev *mddev)
1728 1729 1730 1731 1732 1733
{
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap) /* there was no bitmap */
		return;

1734
	mutex_lock(&mddev->bitmap_info.mutex);
1735
	spin_lock(&mddev->lock);
1736
	mddev->bitmap = NULL; /* disconnect from the md device */
1737
	spin_unlock(&mddev->lock);
1738
	mutex_unlock(&mddev->bitmap_info.mutex);
1739 1740
	if (mddev->thread)
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1741 1742 1743

	bitmap_free(bitmap);
}
1744 1745 1746 1747

/*
 * initialize the bitmap structure
 * if this returns an error, bitmap_destroy must be called to do clean up
1748
 * once mddev->bitmap is set
1749
 */
1750
struct bitmap *bitmap_create(struct mddev *mddev, int slot)
1751 1752
{
	struct bitmap *bitmap;
1753
	sector_t blocks = mddev->resync_max_sectors;
1754
	struct file *file = mddev->bitmap_info.file;
1755
	int err;
1756
	struct kernfs_node *bm = NULL;
1757

A
Alexey Dobriyan 已提交
1758
	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1759

1760
	BUG_ON(file && mddev->bitmap_info.offset);
1761

1762
	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1763
	if (!bitmap)
1764
		return ERR_PTR(-ENOMEM);
1765

1766
	spin_lock_init(&bitmap->counts.lock);
1767 1768
	atomic_set(&bitmap->pending_writes, 0);
	init_waitqueue_head(&bitmap->write_wait);
1769
	init_waitqueue_head(&bitmap->overflow_wait);
1770
	init_waitqueue_head(&bitmap->behind_wait);
1771

1772
	bitmap->mddev = mddev;
1773
	bitmap->cluster_slot = slot;
1774

1775
	if (mddev->kobj.sd)
T
Tejun Heo 已提交
1776
		bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1777
	if (bm) {
T
Tejun Heo 已提交
1778
		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1779 1780 1781 1782
		sysfs_put(bm);
	} else
		bitmap->sysfs_can_clear = NULL;

1783
	bitmap->storage.file = file;
1784 1785
	if (file) {
		get_file(file);
1786 1787 1788 1789
		/* As future accesses to this file will use bmap,
		 * and bypass the page cache, we must sync the file
		 * first.
		 */
1790
		vfs_fsync(file, 1);
1791
	}
1792
	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1793 1794 1795 1796 1797 1798 1799 1800 1801 1802
	if (!mddev->bitmap_info.external) {
		/*
		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
		 * instructing us to create a new on-disk bitmap instance.
		 */
		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
			err = bitmap_new_disk_sb(bitmap);
		else
			err = bitmap_read_sb(bitmap);
	} else {
1803 1804 1805 1806 1807 1808 1809
		err = 0;
		if (mddev->bitmap_info.chunksize == 0 ||
		    mddev->bitmap_info.daemon_sleep == 0)
			/* chunksize and time_base need to be
			 * set first. */
			err = -EINVAL;
	}
1810
	if (err)
1811
		goto error;
1812

1813
	bitmap->daemon_lastrun = jiffies;
1814 1815
	err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
	if (err)
1816
		goto error;
1817

1818
	printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1819
	       bitmap->counts.pages, bmname(bitmap));
1820

1821 1822 1823
	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
	if (err)
		goto error;
1824

1825
	return bitmap;
1826 1827
 error:
	bitmap_free(bitmap);
1828
	return ERR_PTR(err);
1829 1830
}

1831
int bitmap_load(struct mddev *mddev)
1832 1833
{
	int err = 0;
1834
	sector_t start = 0;
1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
	sector_t sector = 0;
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap)
		goto out;

	/* Clear out old bitmap info first:  Either there is none, or we
	 * are resuming after someone else has possibly changed things,
	 * so we should forget old cached info.
	 * All chunks should be clean, but some might need_sync.
	 */
	while (sector < mddev->resync_max_sectors) {
N
NeilBrown 已提交
1847
		sector_t blocks;
1848 1849 1850 1851 1852
		bitmap_start_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	bitmap_close_sync(bitmap);

1853 1854 1855 1856 1857 1858
	if (mddev->degraded == 0
	    || bitmap->events_cleared == mddev->events)
		/* no need to keep dirty bits to optimise a
		 * re-add of a missing device */
		start = mddev->recovery_cp;

1859
	mutex_lock(&mddev->bitmap_info.mutex);
1860
	err = bitmap_init_from_disk(bitmap, start);
1861
	mutex_unlock(&mddev->bitmap_info.mutex);
1862

1863
	if (err)
1864
		goto out;
1865
	clear_bit(BITMAP_STALE, &bitmap->flags);
1866 1867 1868

	/* Kick recovery in case any bits were set */
	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1869

1870
	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1871
	md_wakeup_thread(mddev->thread);
1872

1873 1874
	bitmap_update_sb(bitmap);

1875
	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1876 1877
		err = -EIO;
out:
1878
	return err;
1879
}
1880
EXPORT_SYMBOL_GPL(bitmap_load);
1881

1882 1883 1884 1885
/* Loads the bitmap associated with slot and copies the resync information
 * to our bitmap
 */
int bitmap_copy_from_slot(struct mddev *mddev, int slot,
1886
		sector_t *low, sector_t *high, bool clear_bits)
1887 1888 1889 1890 1891 1892
{
	int rv = 0, i, j;
	sector_t block, lo = 0, hi = 0;
	struct bitmap_counts *counts;
	struct bitmap *bitmap = bitmap_create(mddev, slot);

1893 1894
	if (IS_ERR(bitmap)) {
		bitmap_free(bitmap);
1895
		return PTR_ERR(bitmap);
1896
	}
1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914

	rv = bitmap_init_from_disk(bitmap, 0);
	if (rv)
		goto err;

	counts = &bitmap->counts;
	for (j = 0; j < counts->chunks; j++) {
		block = (sector_t)j << counts->chunkshift;
		if (bitmap_file_test_bit(bitmap, block)) {
			if (!lo)
				lo = block;
			hi = block;
			bitmap_file_clear_bit(bitmap, block);
			bitmap_set_memory_bits(mddev->bitmap, block, 1);
			bitmap_file_set_bit(mddev->bitmap, block);
		}
	}

1915 1916 1917 1918 1919 1920 1921 1922 1923 1924
	if (clear_bits) {
		bitmap_update_sb(bitmap);
		/* Setting this for the ev_page should be enough.
		 * And we do not require both write_all and PAGE_DIRT either
		 */
		for (i = 0; i < bitmap->storage.file_pages; i++)
			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
		bitmap_write_all(bitmap);
		bitmap_unplug(bitmap);
	}
1925 1926 1927 1928 1929 1930 1931 1932 1933
	*low = lo;
	*high = hi;
err:
	bitmap_free(bitmap);
	return rv;
}
EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);


1934 1935 1936
void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
{
	unsigned long chunk_kb;
1937
	struct bitmap_counts *counts;
1938 1939 1940 1941

	if (!bitmap)
		return;

1942 1943
	counts = &bitmap->counts;

1944 1945 1946
	chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
		   "%lu%s chunk",
1947 1948 1949
		   counts->pages - counts->missing_pages,
		   counts->pages,
		   (counts->pages - counts->missing_pages)
1950 1951 1952
		   << (PAGE_SHIFT - 10),
		   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
		   chunk_kb ? "KB" : "B");
1953
	if (bitmap->storage.file) {
1954
		seq_printf(seq, ", file: ");
M
Miklos Szeredi 已提交
1955
		seq_file_path(seq, bitmap->storage.file, " \t\n");
1956 1957 1958 1959 1960
	}

	seq_printf(seq, "\n");
}

1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
		  int chunksize, int init)
{
	/* If chunk_size is 0, choose an appropriate chunk size.
	 * Then possibly allocate new storage space.
	 * Then quiesce, copy bits, replace bitmap, and re-start
	 *
	 * This function is called both to set up the initial bitmap
	 * and to resize the bitmap while the array is active.
	 * If this happens as a result of the array being resized,
	 * chunksize will be zero, and we need to choose a suitable
	 * chunksize, otherwise we use what we are given.
	 */
	struct bitmap_storage store;
	struct bitmap_counts old_counts;
	unsigned long chunks;
	sector_t block;
	sector_t old_blocks, new_blocks;
	int chunkshift;
	int ret = 0;
	long pages;
	struct bitmap_page *new_bp;

	if (chunksize == 0) {
		/* If there is enough space, leave the chunk size unchanged,
		 * else increase by factor of two until there is enough space.
		 */
		long bytes;
		long space = bitmap->mddev->bitmap_info.space;

		if (space == 0) {
			/* We don't know how much space there is, so limit
			 * to current size - in sectors.
			 */
			bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
			if (!bitmap->mddev->bitmap_info.external)
				bytes += sizeof(bitmap_super_t);
			space = DIV_ROUND_UP(bytes, 512);
			bitmap->mddev->bitmap_info.space = space;
		}
		chunkshift = bitmap->counts.chunkshift;
		chunkshift--;
		do {
			/* 'chunkshift' is shift from block size to chunk size */
			chunkshift++;
			chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
			bytes = DIV_ROUND_UP(chunks, 8);
			if (!bitmap->mddev->bitmap_info.external)
				bytes += sizeof(bitmap_super_t);
		} while (bytes > (space << 9));
	} else
		chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;

	chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
	memset(&store, 0, sizeof(store));
	if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
		ret = bitmap_storage_alloc(&store, chunks,
2018
					   !bitmap->mddev->bitmap_info.external,
2019 2020
					   mddev_is_clustered(bitmap->mddev)
					   ? bitmap->cluster_slot : 0);
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
	if (ret)
		goto err;

	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);

	new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
	ret = -ENOMEM;
	if (!new_bp) {
		bitmap_file_unmap(&store);
		goto err;
	}

	if (!init)
		bitmap->mddev->pers->quiesce(bitmap->mddev, 1);

	store.file = bitmap->storage.file;
	bitmap->storage.file = NULL;

	if (store.sb_page && bitmap->storage.sb_page)
		memcpy(page_address(store.sb_page),
		       page_address(bitmap->storage.sb_page),
		       sizeof(bitmap_super_t));
	bitmap_file_unmap(&bitmap->storage);
	bitmap->storage = store;

	old_counts = bitmap->counts;
	bitmap->counts.bp = new_bp;
	bitmap->counts.pages = pages;
	bitmap->counts.missing_pages = pages;
	bitmap->counts.chunkshift = chunkshift;
	bitmap->counts.chunks = chunks;
	bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
						     BITMAP_BLOCK_SHIFT);

	blocks = min(old_counts.chunks << old_counts.chunkshift,
		     chunks << chunkshift);

	spin_lock_irq(&bitmap->counts.lock);
2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088
	/* For cluster raid, need to pre-allocate bitmap */
	if (mddev_is_clustered(bitmap->mddev)) {
		unsigned long page;
		for (page = 0; page < pages; page++) {
			ret = bitmap_checkpage(&bitmap->counts, page, 1, 1);
			if (ret) {
				unsigned long k;

				/* deallocate the page memory */
				for (k = 0; k < page; k++) {
					if (new_bp[k].map)
						kfree(new_bp[k].map);
				}

				/* restore some fields from old_counts */
				bitmap->counts.bp = old_counts.bp;
				bitmap->counts.pages = old_counts.pages;
				bitmap->counts.missing_pages = old_counts.pages;
				bitmap->counts.chunkshift = old_counts.chunkshift;
				bitmap->counts.chunks = old_counts.chunks;
				bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
									     BITMAP_BLOCK_SHIFT);
				blocks = old_counts.chunks << old_counts.chunkshift;
				pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
				break;
			} else
				bitmap->counts.bp[page].count += 1;
		}
	}

2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156
	for (block = 0; block < blocks; ) {
		bitmap_counter_t *bmc_old, *bmc_new;
		int set;

		bmc_old = bitmap_get_counter(&old_counts, block,
					     &old_blocks, 0);
		set = bmc_old && NEEDED(*bmc_old);

		if (set) {
			bmc_new = bitmap_get_counter(&bitmap->counts, block,
						     &new_blocks, 1);
			if (*bmc_new == 0) {
				/* need to set on-disk bits too. */
				sector_t end = block + new_blocks;
				sector_t start = block >> chunkshift;
				start <<= chunkshift;
				while (start < end) {
					bitmap_file_set_bit(bitmap, block);
					start += 1 << chunkshift;
				}
				*bmc_new = 2;
				bitmap_count_page(&bitmap->counts,
						  block, 1);
				bitmap_set_pending(&bitmap->counts,
						   block);
			}
			*bmc_new |= NEEDED_MASK;
			if (new_blocks < old_blocks)
				old_blocks = new_blocks;
		}
		block += old_blocks;
	}

	if (!init) {
		int i;
		while (block < (chunks << chunkshift)) {
			bitmap_counter_t *bmc;
			bmc = bitmap_get_counter(&bitmap->counts, block,
						 &new_blocks, 1);
			if (bmc) {
				/* new space.  It needs to be resynced, so
				 * we set NEEDED_MASK.
				 */
				if (*bmc == 0) {
					*bmc = NEEDED_MASK | 2;
					bitmap_count_page(&bitmap->counts,
							  block, 1);
					bitmap_set_pending(&bitmap->counts,
							   block);
				}
			}
			block += new_blocks;
		}
		for (i = 0; i < bitmap->storage.file_pages; i++)
			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
	}
	spin_unlock_irq(&bitmap->counts.lock);

	if (!init) {
		bitmap_unplug(bitmap);
		bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
	}
	ret = 0;
err:
	return ret;
}
EXPORT_SYMBOL_GPL(bitmap_resize);

2157
static ssize_t
2158
location_show(struct mddev *mddev, char *page)
2159 2160
{
	ssize_t len;
2161
	if (mddev->bitmap_info.file)
2162
		len = sprintf(page, "file");
2163
	else if (mddev->bitmap_info.offset)
2164
		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2165
	else
2166 2167 2168 2169 2170 2171
		len = sprintf(page, "none");
	len += sprintf(page+len, "\n");
	return len;
}

static ssize_t
2172
location_store(struct mddev *mddev, const char *buf, size_t len)
2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208
{

	if (mddev->pers) {
		if (!mddev->pers->quiesce)
			return -EBUSY;
		if (mddev->recovery || mddev->sync_thread)
			return -EBUSY;
	}

	if (mddev->bitmap || mddev->bitmap_info.file ||
	    mddev->bitmap_info.offset) {
		/* bitmap already configured.  Only option is to clear it */
		if (strncmp(buf, "none", 4) != 0)
			return -EBUSY;
		if (mddev->pers) {
			mddev->pers->quiesce(mddev, 1);
			bitmap_destroy(mddev);
			mddev->pers->quiesce(mddev, 0);
		}
		mddev->bitmap_info.offset = 0;
		if (mddev->bitmap_info.file) {
			struct file *f = mddev->bitmap_info.file;
			mddev->bitmap_info.file = NULL;
			fput(f);
		}
	} else {
		/* No bitmap, OK to set a location */
		long long offset;
		if (strncmp(buf, "none", 4) == 0)
			/* nothing to be done */;
		else if (strncmp(buf, "file:", 5) == 0) {
			/* Not supported yet */
			return -EINVAL;
		} else {
			int rv;
			if (buf[0] == '+')
2209
				rv = kstrtoll(buf+1, 10, &offset);
2210
			else
2211
				rv = kstrtoll(buf, 10, &offset);
2212 2213 2214 2215
			if (rv)
				return rv;
			if (offset == 0)
				return -EINVAL;
2216 2217
			if (mddev->bitmap_info.external == 0 &&
			    mddev->major_version == 0 &&
2218 2219 2220 2221
			    offset != mddev->bitmap_info.default_offset)
				return -EINVAL;
			mddev->bitmap_info.offset = offset;
			if (mddev->pers) {
2222
				struct bitmap *bitmap;
2223
				mddev->pers->quiesce(mddev, 1);
2224 2225 2226 2227 2228
				bitmap = bitmap_create(mddev, -1);
				if (IS_ERR(bitmap))
					rv = PTR_ERR(bitmap);
				else {
					mddev->bitmap = bitmap;
2229
					rv = bitmap_load(mddev);
2230
					if (rv)
2231
						mddev->bitmap_info.offset = 0;
2232 2233
				}
				mddev->pers->quiesce(mddev, 0);
2234 2235
				if (rv) {
					bitmap_destroy(mddev);
2236
					return rv;
2237
				}
2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253
			}
		}
	}
	if (!mddev->external) {
		/* Ensure new bitmap info is stored in
		 * metadata promptly.
		 */
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		md_wakeup_thread(mddev->thread);
	}
	return len;
}

static struct md_sysfs_entry bitmap_location =
__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);

2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
/* 'bitmap/space' is the space available at 'location' for the
 * bitmap.  This allows the kernel to know when it is safe to
 * resize the bitmap to match a resized array.
 */
static ssize_t
space_show(struct mddev *mddev, char *page)
{
	return sprintf(page, "%lu\n", mddev->bitmap_info.space);
}

static ssize_t
space_store(struct mddev *mddev, const char *buf, size_t len)
{
	unsigned long sectors;
	int rv;

	rv = kstrtoul(buf, 10, &sectors);
	if (rv)
		return rv;

	if (sectors == 0)
		return -EINVAL;

	if (mddev->bitmap &&
2278
	    sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290
		return -EFBIG; /* Bitmap is too big for this small space */

	/* could make sure it isn't too big, but that isn't really
	 * needed - user-space should be careful.
	 */
	mddev->bitmap_info.space = sectors;
	return len;
}

static struct md_sysfs_entry bitmap_space =
__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);

2291
static ssize_t
2292
timeout_show(struct mddev *mddev, char *page)
2293 2294 2295 2296
{
	ssize_t len;
	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2297

2298 2299 2300 2301 2302 2303 2304 2305
	len = sprintf(page, "%lu", secs);
	if (jifs)
		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
	len += sprintf(page+len, "\n");
	return len;
}

static ssize_t
2306
timeout_store(struct mddev *mddev, const char *buf, size_t len)
2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341
{
	/* timeout can be set at any time */
	unsigned long timeout;
	int rv = strict_strtoul_scaled(buf, &timeout, 4);
	if (rv)
		return rv;

	/* just to make sure we don't overflow... */
	if (timeout >= LONG_MAX / HZ)
		return -EINVAL;

	timeout = timeout * HZ / 10000;

	if (timeout >= MAX_SCHEDULE_TIMEOUT)
		timeout = MAX_SCHEDULE_TIMEOUT-1;
	if (timeout < 1)
		timeout = 1;
	mddev->bitmap_info.daemon_sleep = timeout;
	if (mddev->thread) {
		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
		 * the bitmap is all clean and we don't need to
		 * adjust the timeout right now
		 */
		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
			mddev->thread->timeout = timeout;
			md_wakeup_thread(mddev->thread);
		}
	}
	return len;
}

static struct md_sysfs_entry bitmap_timeout =
__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);

static ssize_t
2342
backlog_show(struct mddev *mddev, char *page)
2343 2344 2345 2346 2347
{
	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
}

static ssize_t
2348
backlog_store(struct mddev *mddev, const char *buf, size_t len)
2349 2350
{
	unsigned long backlog;
2351
	int rv = kstrtoul(buf, 10, &backlog);
2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363
	if (rv)
		return rv;
	if (backlog > COUNTER_MAX)
		return -EINVAL;
	mddev->bitmap_info.max_write_behind = backlog;
	return len;
}

static struct md_sysfs_entry bitmap_backlog =
__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);

static ssize_t
2364
chunksize_show(struct mddev *mddev, char *page)
2365 2366 2367 2368 2369
{
	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
}

static ssize_t
2370
chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2371 2372 2373 2374 2375 2376
{
	/* Can only be changed when no bitmap is active */
	int rv;
	unsigned long csize;
	if (mddev->bitmap)
		return -EBUSY;
2377
	rv = kstrtoul(buf, 10, &csize);
2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389
	if (rv)
		return rv;
	if (csize < 512 ||
	    !is_power_of_2(csize))
		return -EINVAL;
	mddev->bitmap_info.chunksize = csize;
	return len;
}

static struct md_sysfs_entry bitmap_chunksize =
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);

2390
static ssize_t metadata_show(struct mddev *mddev, char *page)
2391
{
G
Goldwyn Rodrigues 已提交
2392 2393
	if (mddev_is_clustered(mddev))
		return sprintf(page, "clustered\n");
2394 2395 2396 2397
	return sprintf(page, "%s\n", (mddev->bitmap_info.external
				      ? "external" : "internal"));
}

2398
static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2399 2400 2401 2402 2403 2404 2405
{
	if (mddev->bitmap ||
	    mddev->bitmap_info.file ||
	    mddev->bitmap_info.offset)
		return -EBUSY;
	if (strncmp(buf, "external", 8) == 0)
		mddev->bitmap_info.external = 1;
G
Goldwyn Rodrigues 已提交
2406 2407
	else if ((strncmp(buf, "internal", 8) == 0) ||
			(strncmp(buf, "clustered", 9) == 0))
2408 2409 2410 2411 2412 2413 2414 2415 2416
		mddev->bitmap_info.external = 0;
	else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_metadata =
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);

2417
static ssize_t can_clear_show(struct mddev *mddev, char *page)
2418 2419
{
	int len;
2420
	spin_lock(&mddev->lock);
2421 2422 2423 2424 2425
	if (mddev->bitmap)
		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
					     "false" : "true"));
	else
		len = sprintf(page, "\n");
2426
	spin_unlock(&mddev->lock);
2427 2428 2429
	return len;
}

2430
static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
{
	if (mddev->bitmap == NULL)
		return -ENOENT;
	if (strncmp(buf, "false", 5) == 0)
		mddev->bitmap->need_sync = 1;
	else if (strncmp(buf, "true", 4) == 0) {
		if (mddev->degraded)
			return -EBUSY;
		mddev->bitmap->need_sync = 0;
	} else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);

2448
static ssize_t
2449
behind_writes_used_show(struct mddev *mddev, char *page)
2450
{
2451 2452
	ssize_t ret;
	spin_lock(&mddev->lock);
2453
	if (mddev->bitmap == NULL)
2454 2455 2456 2457 2458 2459
		ret = sprintf(page, "0\n");
	else
		ret = sprintf(page, "%lu\n",
			      mddev->bitmap->behind_writes_used);
	spin_unlock(&mddev->lock);
	return ret;
2460 2461 2462
}

static ssize_t
2463
behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2464 2465 2466 2467 2468 2469 2470 2471 2472 2473
{
	if (mddev->bitmap)
		mddev->bitmap->behind_writes_used = 0;
	return len;
}

static struct md_sysfs_entry max_backlog_used =
__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
       behind_writes_used_show, behind_writes_used_reset);

2474 2475
static struct attribute *md_bitmap_attrs[] = {
	&bitmap_location.attr,
2476
	&bitmap_space.attr,
2477 2478 2479
	&bitmap_timeout.attr,
	&bitmap_backlog.attr,
	&bitmap_chunksize.attr,
2480 2481
	&bitmap_metadata.attr,
	&bitmap_can_clear.attr,
2482
	&max_backlog_used.attr,
2483 2484 2485 2486 2487 2488 2489
	NULL
};
struct attribute_group md_bitmap_group = {
	.name = "bitmap",
	.attrs = md_bitmap_attrs,
};