提交 ee7fee0b 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: remove rd%d links immediately after stopping an array.
  md: remove ability to explicit set an inactive array to 'clean'.
  md: constify VFTs
  md: tidy up status_resync to handle large arrays.
  md: fix some (more) errors with bitmaps on devices larger than 2TB.
  md/raid10: don't clear bitmap during recovery if array will still be degraded.
  md: fix loading of out-of-date bitmap.
......@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
oldindex = index;
oldpage = page;
bitmap->filemap[bitmap->file_pages++] = page;
bitmap->last_page_size = count;
if (outofdate) {
/*
* if bitmap is out of date, dirty the
......@@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
write_page(bitmap, page, 1);
ret = -EIO;
if (bitmap->flags & BITMAP_WRITE_ERROR) {
/* release, page not in filemap yet */
put_page(page);
if (bitmap->flags & BITMAP_WRITE_ERROR)
goto err;
}
}
bitmap->filemap[bitmap->file_pages++] = page;
bitmap->last_page_size = count;
}
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
......@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
kunmap_atomic(paddr, KM_USER0);
if (b) {
/* if the disk bit is set, set the memory bit */
bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
);
int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
>= start);
bitmap_set_memory_bits(bitmap,
(sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
needed);
bit_cnt++;
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
......@@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
spin_lock_irqsave(&bitmap->lock, flags);
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
&blocks, 0);
bmc = bitmap_get_counter(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
&blocks, 0);
if (bmc) {
/*
if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
......@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
} else if (*bmc == 1) {
/* we can clear the bit */
*bmc = 0;
bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
bitmap_count_page(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
-1);
/* clear the bit */
......@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
unsigned long chunk;
for (chunk = s; chunk <= e; chunk++) {
sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
bitmap_set_memory_bits(bitmap, sec, 1);
bitmap_file_set_bit(bitmap, sec);
}
......
......@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
} else
err = -EBUSY;
spin_unlock_irq(&mddev->write_lock);
} else {
mddev->ro = 0;
mddev->recovery_cp = MaxSector;
err = do_md_run(mddev);
}
} else
err = -EINVAL;
break;
case active:
if (mddev->pers) {
......@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
{
int err = 0;
struct gendisk *disk = mddev->gendisk;
mdk_rdev_t *rdev;
if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev));
......@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
/* tell userspace to handle 'inactive' */
sysfs_notify_dirent(mddev->sysfs_state);
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
set_capacity(disk, 0);
mddev->changed = 1;
......@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
* Free resources if final stop
*/
if (mode == 0) {
mdk_rdev_t *rdev;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
......@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
}
mddev->bitmap_offset = 0;
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
/* make sure all md_delayed_delete calls have finished */
flush_scheduled_work();
......@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq)
static void status_resync(struct seq_file *seq, mddev_t * mddev)
{
sector_t max_blocks, resync, res;
unsigned long dt, db, rt;
sector_t max_sectors, resync, res;
unsigned long dt, db;
sector_t rt;
int scale;
unsigned int per_milli;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_blocks = mddev->resync_max_sectors >> 1;
max_sectors = mddev->resync_max_sectors;
else
max_blocks = mddev->dev_sectors / 2;
max_sectors = mddev->dev_sectors;
/*
* Should not happen.
*/
if (!max_blocks) {
if (!max_sectors) {
MD_BUG();
return;
}
/* Pick 'scale' such that (resync>>scale)*1000 will fit
* in a sector_t, and (max_blocks>>scale) will fit in a
* in a sector_t, and (max_sectors>>scale) will fit in a
* u32, as those are the requirements for sector_div.
* Thus 'scale' must be at least 10
*/
scale = 10;
if (sizeof(sector_t) > sizeof(unsigned long)) {
while ( max_blocks/2 > (1ULL<<(scale+32)))
while ( max_sectors/2 > (1ULL<<(scale+32)))
scale++;
}
res = (resync>>scale)*1000;
sector_div(res, (u32)((max_blocks>>scale)+1));
sector_div(res, (u32)((max_sectors>>scale)+1));
per_milli = res;
{
......@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
"resync" : "recovery"))),
per_milli/10, per_milli % 10,
(unsigned long long) resync,
(unsigned long long) max_blocks);
(unsigned long long) resync/2,
(unsigned long long) max_sectors/2);
/*
* We do not want to overflow, so the order of operands and
* the * 100 / 100 trick are important. We do a +1 to be
* safe against division by zero. We only estimate anyway.
*
* dt: time from mark until now
* db: blocks written from mark until now
* rt: remaining time
*
* rt is a sector_t, so could be 32bit or 64bit.
* So we divide before multiply in case it is 32bit and close
* to the limit.
* We scale the divisor (db) by 32 to avoid loosing precision
* near the end of resync when the number of remaining sectors
* is close to 'db'.
* We then divide rt by 32 after multiplying by db to compensate.
* The '+1' avoids division by zero if db is very small.
*/
dt = ((jiffies - mddev->resync_mark) / HZ);
if (!dt) dt++;
db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
- mddev->resync_mark_cnt;
rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
rt = max_sectors - resync; /* number of remaining sectors */
sector_div(rt, db/32+1);
rt *= dt;
rt >>= 5;
seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
((unsigned long)rt % 60)/6);
seq_printf(seq, " speed=%ldK/sec", db/2/dt);
}
......@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
return 0;
}
static struct seq_operations md_seq_ops = {
static const struct seq_operations md_seq_ops = {
.start = md_seq_start,
.next = md_seq_next,
.stop = md_seq_stop,
......
......@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
r10_bio->sector = sect;
raid10_find_phys(conf, r10_bio);
/* Need to check if this section will still be
/* Need to check if the array will still be
* degraded
*/
for (j=0; j<conf->copies;j++) {
int d = r10_bio->devs[j].devnum;
if (conf->mirrors[d].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
for (j=0; j<conf->raid_disks; j++)
if (conf->mirrors[j].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
still_degraded = 1;
break;
}
}
must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, still_degraded);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册