diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 84f7dc9fd3ac70f6a18bb5d6520225d62644d0f2..c94f475758de3b98be9b96a27b88bd90e8e9939d 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -259,6 +259,44 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) return 0; } +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing if there is no + * contention and does nothing if there is contention. Returns %0 in case of + * success, %1 in case of contention, and and a negative error code in case of + * failure. + */ +static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int free; + struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + if (down_write_trylock(&le->mutex)) + return 0; + + /* Contention, cancel */ + spin_lock(&ubi->ltree_lock); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + free = 1; + } else + free = 0; + spin_unlock(&ubi->ltree_lock); + if (free) + kmem_cache_free(ubi_ltree_slab, le); + + return 1; +} + /** * leb_write_unlock - unlock logical eraseblock. * @ubi: UBI device description object @@ -923,14 +961,16 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, * * This function copies logical eraseblock from physical eraseblock @from to * physical eraseblock @to. The @vid_hdr buffer may be changed by this - * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation - * was canceled because bit-flips were detected at the target PEB, and a - * negative error code in case of failure. + * function. Returns: + * o %0 in case of success; + * o %1 if the operation was canceled and should be tried later (e.g., + * because a bit-flip was detected at the target PEB); + * o %2 if the volume is being deleted and this LEB should not be moved. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr) { - int err, vol_id, lnum, data_size, aldata_size, pnum, idx; + int err, vol_id, lnum, data_size, aldata_size, idx; struct ubi_volume *vol; uint32_t crc; @@ -946,57 +986,67 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, data_size = aldata_size = ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); - /* - * We do not want anybody to write to this logical eraseblock while we - * are moving it, so we lock it. - */ - err = leb_write_lock(ubi, vol_id, lnum); - if (err) - return err; - - mutex_lock(&ubi->buf_mutex); - - /* - * But the logical eraseblock might have been put by this time. - * Cancel if it is true. - */ idx = vol_id2idx(ubi, vol_id); - + spin_lock(&ubi->volumes_lock); /* - * We may race with volume deletion/re-size, so we have to hold - * @ubi->volumes_lock. - * - * Note, it is not a problem if we race with volume deletion or re-size - * here. If the volume is deleted or re-sized while we are moving an - * eraseblock which belongs to this volume, we'll end up with finding - * out that this LEB was unmapped at the end (see WL), and drop this - * PEB. + * Note, we may race with volume deletion, which means that the volume + * this logical eraseblock belongs to might be being deleted. Since the + * volume deletion unmaps all the volume's logical eraseblocks, it will + * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. */ - spin_lock(&ubi->volumes_lock); vol = ubi->volumes[idx]; if (!vol) { - dbg_eba("volume %d was removed meanwhile", vol_id); + /* No need to do further work, cancel */ + dbg_eba("volume %d is being removed, cancel", vol_id); spin_unlock(&ubi->volumes_lock); - goto out_unlock; + return 2; } + spin_unlock(&ubi->volumes_lock); - pnum = vol->eba_tbl[lnum]; - if (pnum != from) { - dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " - "PEB %d, cancel", vol_id, lnum, from, pnum); - spin_unlock(&ubi->volumes_lock); - goto out_unlock; + /* + * We do not want anybody to write to this logical eraseblock while we + * are moving it, so lock it. + * + * Note, we are using non-waiting locking here, because we cannot sleep + * on the LEB, since it may cause deadlocks. Indeed, imagine a task is + * unmapping the LEB which is mapped to the PEB we are going to move + * (@from). This task locks the LEB and goes sleep in the + * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are + * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the + * LEB is already locked, we just do not move it and return %1. + */ + err = leb_write_trylock(ubi, vol_id, lnum); + if (err) { + dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum); + return err; } - spin_unlock(&ubi->volumes_lock); - /* OK, now the LEB is locked and we can safely start moving it */ + /* + * The LEB might have been put meanwhile, and the task which put it is + * probably waiting on @ubi->move_mutex. No need to continue the work, + * cancel it. + */ + if (vol->eba_tbl[lnum] != from) { + dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " + "PEB %d, cancel", vol_id, lnum, from, + vol->eba_tbl[lnum]); + err = 1; + goto out_unlock_leb; + } + /* + * OK, now the LEB is locked and we can safely start moving iy. Since + * this function utilizes thie @ubi->peb1_buf buffer which is shared + * with some other functions, so lock the buffer by taking the + * @ubi->buf_mutex. + */ + mutex_lock(&ubi->buf_mutex); dbg_eba("read %d bytes of data", aldata_size); err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); if (err && err != UBI_IO_BITFLIPS) { ubi_warn("error %d while reading data from PEB %d", err, from); - goto out_unlock; + goto out_unlock_buf; } /* @@ -1032,7 +1082,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); if (err) - goto out_unlock; + goto out_unlock_buf; cond_resched(); @@ -1041,13 +1091,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (err) { if (err != UBI_IO_BITFLIPS) ubi_warn("cannot read VID header back from PEB %d", to); - goto out_unlock; + else + err = 1; + goto out_unlock_buf; } if (data_size > 0) { err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); if (err) - goto out_unlock; + goto out_unlock_buf; cond_resched(); @@ -1061,7 +1113,9 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (err != UBI_IO_BITFLIPS) ubi_warn("cannot read data back from PEB %d", to); - goto out_unlock; + else + err = 1; + goto out_unlock_buf; } cond_resched(); @@ -1069,15 +1123,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { ubi_warn("read data back from PEB %d - it is different", to); - goto out_unlock; + goto out_unlock_buf; } } ubi_assert(vol->eba_tbl[lnum] == from); vol->eba_tbl[lnum] = to; -out_unlock: +out_unlock_buf: mutex_unlock(&ubi->buf_mutex); +out_unlock_leb: leb_write_unlock(ubi, vol_id, lnum); return err; } diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index f782d5aa849aaa5d54536546f4a8f16a10f77a45..ea9a6990a4dccc788150fd68bcdbe7ad7694234d 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -275,13 +275,13 @@ struct ubi_wl_entry; * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works * fields + * @move_mutex: serializes eraseblock moves * @wl_scheduled: non-zero if the wear-leveling was scheduled * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any * physical eraseblock * @abs_ec: absolute erase counter * @move_from: physical eraseblock from where the data is being moved * @move_to: physical eraseblock where the data is being moved to - * @move_from_put: if the "from" PEB was put * @move_to_put: if the "to" PEB was put * @works: list of pending works * @works_count: count of pending works @@ -354,12 +354,12 @@ struct ubi_device { struct rb_root aec; } prot; spinlock_t wl_lock; + struct mutex move_mutex; int wl_scheduled; struct ubi_wl_entry **lookuptbl; unsigned long long abs_ec; struct ubi_wl_entry *move_from; struct ubi_wl_entry *move_to; - int move_from_put; int move_to_put; struct list_head works; int works_count; @@ -561,8 +561,10 @@ static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf, */ static inline void ubi_ro_mode(struct ubi_device *ubi) { - ubi->ro_mode = 1; - ubi_warn("switch to read-only mode"); + if (!ubi->ro_mode) { + ubi->ro_mode = 1; + ubi_warn("switch to read-only mode"); + } } /** diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 36aa097203f9162a88f438886c9adbd2f45ba2e7..a60f9425ab137c90decdd05bc00cbb38948b416f 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -249,6 +249,8 @@ static int do_work(struct ubi_device *ubi) int err; struct ubi_work *wrk; + cond_resched(); + spin_lock(&ubi->wl_lock); if (list_empty(&ubi->works)) { @@ -531,8 +533,12 @@ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) * prot_tree_del - remove a physical eraseblock from the protection trees * @ubi: UBI device description object * @pnum: the physical eraseblock to remove + * + * This function returns PEB @pnum from the protection trees and returns zero + * in case of success and %-ENODEV if the PEB was not found in the protection + * trees. */ -static void prot_tree_del(struct ubi_device *ubi, int pnum) +static int prot_tree_del(struct ubi_device *ubi, int pnum) { struct rb_node *p; struct ubi_wl_prot_entry *pe = NULL; @@ -543,7 +549,7 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum) pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); if (pnum == pe->e->pnum) - break; + goto found; if (pnum < pe->e->pnum) p = p->rb_left; @@ -551,10 +557,14 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum) p = p->rb_right; } + return -ENODEV; + +found: ubi_assert(pe->e->pnum == pnum); rb_erase(&pe->rb_aec, &ubi->prot.aec); rb_erase(&pe->rb_pnum, &ubi->prot.pnum); kfree(pe); + return 0; } /** @@ -726,7 +736,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { - int err, put = 0; + int err, put = 0, scrubbing = 0, protect = 0; + struct ubi_wl_prot_entry *pe; struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; @@ -739,21 +750,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, if (!vid_hdr) return -ENOMEM; + mutex_lock(&ubi->move_mutex); spin_lock(&ubi->wl_lock); + ubi_assert(!ubi->move_from && !ubi->move_to); + ubi_assert(!ubi->move_to_put); - /* - * Only one WL worker at a time is supported at this implementation, so - * make sure a PEB is not being moved already. - */ - if (ubi->move_to || !ubi->free.rb_node || + if (!ubi->free.rb_node || (!ubi->used.rb_node && !ubi->scrub.rb_node)) { /* - * Only one WL worker at a time is supported at this - * implementation, so if a LEB is already being moved, cancel. - * - * No free physical eraseblocks? Well, we cancel wear-leveling - * then. It will be triggered again when a free physical - * eraseblock appears. + * No free physical eraseblocks? Well, they must be waiting in + * the queue to be erased. Cancel movement - it will be + * triggered again when a free physical eraseblock appears. * * No used physical eraseblocks? They must be temporarily * protected from being moved. They will be moved to the @@ -762,10 +769,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, */ dbg_wl("cancel WL, a list is empty: free %d, used %d", !ubi->free.rb_node, !ubi->used.rb_node); - ubi->wl_scheduled = 0; - spin_unlock(&ubi->wl_lock); - ubi_free_vid_hdr(ubi, vid_hdr); - return 0; + goto out_cancel; } if (!ubi->scrub.rb_node) { @@ -780,16 +784,15 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { dbg_wl("no WL needed: min used EC %d, max free EC %d", e1->ec, e2->ec); - ubi->wl_scheduled = 0; - spin_unlock(&ubi->wl_lock); - ubi_free_vid_hdr(ubi, vid_hdr); - return 0; + goto out_cancel; } paranoid_check_in_wl_tree(e1, &ubi->used); rb_erase(&e1->rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); } else { + /* Perform scrubbing */ + scrubbing = 1; e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); paranoid_check_in_wl_tree(e1, &ubi->scrub); @@ -799,8 +802,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, paranoid_check_in_wl_tree(e2, &ubi->free); rb_erase(&e2->rb, &ubi->free); - ubi_assert(!ubi->move_from && !ubi->move_to); - ubi_assert(!ubi->move_to_put && !ubi->move_from_put); ubi->move_from = e1; ubi->move_to = e2; spin_unlock(&ubi->wl_lock); @@ -810,6 +811,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, * We so far do not know which logical eraseblock our physical * eraseblock (@e1) belongs to. We have to read the volume identifier * header first. + * + * Note, we are protected from this PEB being unmapped and erased. The + * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB + * which is being moved was unmapped. */ err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); @@ -824,32 +829,51 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, * likely have the VID header in place. */ dbg_wl("PEB %d has no VID header", e1->pnum); - err = 0; - } else { - ubi_err("error %d while reading VID header from PEB %d", - err, e1->pnum); - if (err > 0) - err = -EIO; + goto out_not_moved; } - goto error; + + ubi_err("error %d while reading VID header from PEB %d", + err, e1->pnum); + if (err > 0) + err = -EIO; + goto out_error; } err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); if (err) { - if (err == UBI_IO_BITFLIPS) - err = 0; - goto error; + + if (err < 0) + goto out_error; + if (err == 1) + goto out_not_moved; + + /* + * For some reason the LEB was not moved - it might be because + * the volume is being deleted. We should prevent this PEB from + * being selected for wear-levelling movement for some "time", + * so put it to the protection tree. + */ + + dbg_wl("cancelled moving PEB %d", e1->pnum); + pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); + if (!pe) { + err = -ENOMEM; + goto out_error; + } + + protect = 1; } ubi_free_vid_hdr(ubi, vid_hdr); spin_lock(&ubi->wl_lock); + if (protect) + prot_tree_add(ubi, e1, pe, protect); if (!ubi->move_to_put) wl_tree_add(e2, &ubi->used); else put = 1; ubi->move_from = ubi->move_to = NULL; - ubi->move_from_put = ubi->move_to_put = 0; - ubi->wl_scheduled = 0; + ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); if (put) { @@ -859,62 +883,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, */ dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); err = schedule_erase(ubi, e2, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); - ubi_ro_mode(ubi); - } + if (err) + goto out_error; } - err = schedule_erase(ubi, e1, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e1); - ubi_ro_mode(ubi); + if (!protect) { + err = schedule_erase(ubi, e1, 0); + if (err) + goto out_error; } + dbg_wl("done"); - return err; + mutex_unlock(&ubi->move_mutex); + return 0; /* - * Some error occurred. @e1 was not changed, so return it back. @e2 - * might be changed, schedule it for erasure. + * For some reasons the LEB was not moved, might be an error, might be + * something else. @e1 was not changed, so return it back. @e2 might + * be changed, schedule it for erasure. */ -error: - if (err) - dbg_wl("error %d occurred, cancel operation", err); - ubi_assert(err <= 0); - +out_not_moved: ubi_free_vid_hdr(ubi, vid_hdr); spin_lock(&ubi->wl_lock); - ubi->wl_scheduled = 0; - if (ubi->move_from_put) - put = 1; + if (scrubbing) + wl_tree_add(e1, &ubi->scrub); else wl_tree_add(e1, &ubi->used); ubi->move_from = ubi->move_to = NULL; - ubi->move_from_put = ubi->move_to_put = 0; + ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - if (put) { - /* - * Well, the target PEB was put meanwhile, schedule it for - * erasure. - */ - dbg_wl("PEB %d was put meanwhile, erase", e1->pnum); - err = schedule_erase(ubi, e1, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e1); - ubi_ro_mode(ubi); - } - } - err = schedule_erase(ubi, e2, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); - ubi_ro_mode(ubi); - } + if (err) + goto out_error; + + mutex_unlock(&ubi->move_mutex); + return 0; + +out_error: + ubi_err("error %d while moving PEB %d to PEB %d", + err, e1->pnum, e2->pnum); - yield(); + ubi_free_vid_hdr(ubi, vid_hdr); + spin_lock(&ubi->wl_lock); + ubi->move_from = ubi->move_to = NULL; + ubi->move_to_put = ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + + kmem_cache_free(ubi_wl_entry_slab, e1); + kmem_cache_free(ubi_wl_entry_slab, e2); + ubi_ro_mode(ubi); + + mutex_unlock(&ubi->move_mutex); return err; + +out_cancel: + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + mutex_unlock(&ubi->move_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; } /** @@ -1101,8 +1130,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, } /** - * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling - * unit. + * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. * @ubi: UBI device description object * @pnum: physical eraseblock to return * @torture: if this physical eraseblock has to be tortured @@ -1110,7 +1138,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, * This function is called to return physical eraseblock @pnum to the pool of * free physical eraseblocks. The @torture flag has to be set if an I/O error * occurred to this @pnum and it has to be tested. This function returns zero - * in case of success and a negative error code in case of failure. + * in case of success, and a negative error code in case of failure. */ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) { @@ -1121,8 +1149,8 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) ubi_assert(pnum >= 0); ubi_assert(pnum < ubi->peb_count); +retry: spin_lock(&ubi->wl_lock); - e = ubi->lookuptbl[pnum]; if (e == ubi->move_from) { /* @@ -1130,11 +1158,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) * be moved. It will be scheduled for erasure in the * wear-leveling worker. */ - dbg_wl("PEB %d is being moved", pnum); - ubi_assert(!ubi->move_from_put); - ubi->move_from_put = 1; + dbg_wl("PEB %d is being moved, wait", pnum); spin_unlock(&ubi->wl_lock); - return 0; + + /* Wait for the WL worker by taking the @ubi->move_mutex */ + mutex_lock(&ubi->move_mutex); + mutex_unlock(&ubi->move_mutex); + goto retry; } else if (e == ubi->move_to) { /* * User is putting the physical eraseblock which was selected @@ -1157,8 +1187,15 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) } else if (in_wl_tree(e, &ubi->scrub)) { paranoid_check_in_wl_tree(e, &ubi->scrub); rb_erase(&e->rb, &ubi->scrub); - } else - prot_tree_del(ubi, e->pnum); + } else { + err = prot_tree_del(ubi, e->pnum); + if (err) { + ubi_err("PEB %d not found", pnum); + ubi_ro_mode(ubi); + spin_unlock(&ubi->wl_lock); + return err; + } + } } spin_unlock(&ubi->wl_lock); @@ -1212,8 +1249,17 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) if (in_wl_tree(e, &ubi->used)) { paranoid_check_in_wl_tree(e, &ubi->used); rb_erase(&e->rb, &ubi->used); - } else - prot_tree_del(ubi, pnum); + } else { + int err; + + err = prot_tree_del(ubi, e->pnum); + if (err) { + ubi_err("PEB %d not found", pnum); + ubi_ro_mode(ubi); + spin_unlock(&ubi->wl_lock); + return err; + } + } wl_tree_add(e, &ubi->scrub); spin_unlock(&ubi->wl_lock); @@ -1379,6 +1425,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) ubi->used = ubi->free = ubi->scrub = RB_ROOT; ubi->prot.pnum = ubi->prot.aec = RB_ROOT; spin_lock_init(&ubi->wl_lock); + mutex_init(&ubi->move_mutex); ubi->max_ec = si->max_ec; INIT_LIST_HEAD(&ubi->works);