提交 9ea9b9c4 编写于 作者: C Christoph Hellwig 提交者: Jens Axboe

remove the lightnvm subsystem

Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts
to produce Open Channel SSDs and never made it into the NVMe spec
proper.  They have since been superceeded by NVMe enhancements such
as ZNS support.  Remove the support per the deprecation schedule.
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.deReviewed-by: NMatias Bjørling <mb@lightnvm.io>
Reviewed-by: NJavier González <javier@javigon.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
上级 6e4df4c6
......@@ -85,7 +85,6 @@ available subsections can be seen below.
io-mapping
io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
men-chameleon-bus
ntb
......
pblk: Physical Block Device Target
==================================
pblk implements a fully associative, host-based FTL that exposes a traditional
block I/O interface. Its primary responsibilities are:
- Map logical addresses onto physical addresses (4KB granularity) in a
logical-to-physical (L2P) table.
- Maintain the integrity and consistency of the L2P table as well as its
recovery from normal tear down and power outage.
- Deal with controller- and media-specific constrains.
- Handle I/O errors.
- Implement garbage collection.
- Maintain consistency across the I/O stack during synchronization points.
For more information please refer to:
http://lightnvm.io
which maintains updated FAQs, manual pages, technical documentation, tools,
contacts, etc.
......@@ -160,7 +160,6 @@ Code Seq# Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!
......
......@@ -10609,15 +10609,6 @@ F: LICENSES/
F: scripts/spdxcheck-test.sh
F: scripts/spdxcheck.py
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
L: linux-block@vger.kernel.org
S: Maintained
W: http://github/OpenChannelSSD
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINEAR RANGES HELPERS
M: Mark Brown <broonie@kernel.org>
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
......
......@@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"
......
......@@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/
......
# SPDX-License-Identifier: GPL-2.0-only
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support (DEPRECATED)"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
This code is deprecated and will be removed in Linux 5.15.
if NVM
config NVM_PBLK
tristate "Physical Block Device Open-Channel SSD target"
select CRC32
help
Allows an open-channel SSD to be exposed as a block device to the
host. The target assumes the device exposes raw flash and must be
explicitly managed by the host.
Please note the disk format is considered EXPERIMENTAL for now.
if NVM_PBLK
config NVM_PBLK_DEBUG
bool "PBlk Debug Support"
default n
help
Enables debug support for pblk. This includes extra checks, more
vocal error messages, and extra tracking fields in the pblk sysfs
entries.
endif # NVM_PBLK_DEBUG
endif # NVM
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_PBLK) += pblk.o
pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
pblk-write.o pblk-cache.o pblk-read.o \
pblk-gc.o pblk-recovery.o pblk-map.o \
pblk-rl.o pblk-sysfs.o
此差异已折叠。
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-cache.c - pblk's write cache
*/
#include "pblk.h"
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags)
{
struct pblk_w_ctx w_ctx;
sector_t lba = pblk_get_lba(bio);
unsigned long start_time;
unsigned int bpos, pos;
int nr_entries = pblk_get_secs(bio);
int i, ret;
start_time = bio_start_io_acct(bio);
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
switch (ret) {
case NVM_IO_REQUEUE:
io_schedule();
goto retry;
case NVM_IO_ERR:
pblk_pipeline_stop(pblk);
bio_io_error(bio);
goto out;
}
pblk_ppa_set_empty(&w_ctx.ppa);
w_ctx.flags = flags;
if (bio->bi_opf & REQ_PREFLUSH) {
w_ctx.flags |= PBLK_FLUSH_ENTRY;
pblk_write_kick(pblk);
}
if (unlikely(!bio_has_data(bio)))
goto out;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
w_ctx.lba = lba + i;
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
atomic64_add(nr_entries, &pblk->user_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_entries, &pblk->inflight_writes);
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
pblk_rl_inserted(&pblk->rl, nr_entries);
out:
bio_end_io_acct(bio, start_time);
pblk_write_should_kick(pblk);
if (ret == NVM_IO_DONE)
bio_endio(bio);
}
/*
* On GC the incoming lbas are not necessarily sequential. Also, some of the
* lbas might not be valid entries, which are marked as empty by the GC thread
*/
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct pblk_w_ctx w_ctx;
unsigned int bpos, pos;
void *data = gc_rq->data;
int i, valid_entries;
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
io_schedule();
goto retry;
}
w_ctx.flags = PBLK_IOTYPE_GC;
pblk_ppa_set_empty(&w_ctx.ppa);
for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
if (gc_rq->lba_list[i] == ADDR_EMPTY)
continue;
w_ctx.lba = gc_rq->lba_list[i];
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
gc_rq->paddr_list[i], pos);
data += PBLK_EXPOSED_PAGE_SIZE;
valid_entries++;
}
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
atomic64_add(valid_entries, &pblk->gc_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_entries, &pblk->inflight_writes);
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
#endif
pblk_write_should_kick(pblk);
return NVM_IO_OK;
}
此差异已折叠。
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-gc.c - pblk's garbage collector
*/
#include "pblk.h"
#include "pblk-trace.h"
#include <linux/delay.h>
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
vfree(gc_rq->data);
kfree(gc_rq);
}
static int pblk_gc_write(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_gc_rq *gc_rq, *tgc_rq;
LIST_HEAD(w_list);
spin_lock(&gc->w_lock);
if (list_empty(&gc->w_list)) {
spin_unlock(&gc->w_lock);
return 1;
}
list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
pblk_write_gc_to_cache(pblk, gc_rq);
list_del(&gc_rq->list);
kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
return 0;
}
static void pblk_gc_writer_kick(struct pblk_gc *gc)
{
wake_up_process(gc->gc_writer_ts);
}
void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
spin_lock(&l_mg->gc_lock);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_GC);
line->state = PBLK_LINESTATE_CLOSED;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
/* We need to reset gc_group in order to ensure that
* pblk_line_gc_list will return proper move_list
* since right now current line is not on any of the
* gc lists.
*/
line->gc_group = PBLK_LINEGC_NONE;
move_list = pblk_line_gc_list(pblk, line);
spin_unlock(&line->lock);
list_add_tail(&line->list, move_list);
spin_unlock(&l_mg->gc_lock);
}
static void pblk_gc_line_ws(struct work_struct *work)
{
struct pblk_line_ws *gc_rq_ws = container_of(work,
struct pblk_line_ws, ws);
struct pblk *pblk = gc_rq_ws->pblk;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line = gc_rq_ws->line;
struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
int ret;
up(&gc->gc_sem);
/* Read from GC victim block */
ret = pblk_submit_read_gc(pblk, gc_rq);
if (ret) {
line->w_err_gc->has_gc_err = 1;
goto out;
}
if (!gc_rq->secs_to_gc)
goto out;
retry:
spin_lock(&gc->w_lock);
if (gc->w_entries >= PBLK_GC_RQ_QD) {
spin_unlock(&gc->w_lock);
pblk_gc_writer_kick(&pblk->gc);
usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
list_add_tail(&gc_rq->list, &gc->w_list);
spin_unlock(&gc->w_lock);
pblk_gc_writer_kick(&pblk->gc);
kfree(gc_rq_ws);
return;
out:
pblk_gc_free_gc_rq(gc_rq);
kref_put(&line->ref, pblk_line_put);
kfree(gc_rq_ws);
}
static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
struct pblk_line *line)
{
struct line_emeta *emeta_buf;
struct pblk_line_meta *lm = &pblk->lm;
unsigned int lba_list_size = lm->emeta_len[2];
__le64 *lba_list;
int ret;
emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
if (!emeta_buf)
return NULL;
ret = pblk_line_emeta_read(pblk, line, emeta_buf);
if (ret) {
pblk_err(pblk, "line %d read emeta failed (%d)\n",
line->id, ret);
kvfree(emeta_buf);
return NULL;
}
/* If this read fails, it means that emeta is corrupted.
* For now, leave the line untouched.
* TODO: Implement a recovery routine that scans and moves
* all sectors on the line.
*/
ret = pblk_recov_check_emeta(pblk, emeta_buf);
if (ret) {
pblk_err(pblk, "inconsistent emeta (line %d)\n",
line->id);
kvfree(emeta_buf);
return NULL;
}
lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
if (lba_list)
memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
kvfree(emeta_buf);
return lba_list;
}
static void pblk_gc_line_prepare_ws(struct work_struct *work)
{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
struct pblk_line *line = line_ws->line;
struct pblk_line_meta *lm = &pblk->lm;
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *gc_rq_ws;
struct pblk_gc_rq *gc_rq;
__le64 *lba_list;
unsigned long *invalid_bitmap;
int sec_left, nr_secs, bit;
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!invalid_bitmap)
goto fail_free_ws;
if (line->w_err_gc->has_write_err) {
lba_list = line->w_err_gc->lba_list;
line->w_err_gc->lba_list = NULL;
} else {
lba_list = get_lba_list_from_emeta(pblk, line);
if (!lba_list) {
pblk_err(pblk, "could not interpret emeta (line %d)\n",
line->id);
goto fail_free_invalid_bitmap;
}
}
spin_lock(&line->lock);
bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
sec_left = pblk_line_vsc(line);
spin_unlock(&line->lock);
if (sec_left < 0) {
pblk_err(pblk, "corrupted GC line (%d)\n", line->id);
goto fail_free_lba_list;
}
bit = -1;
next_rq:
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
if (!gc_rq)
goto fail_free_lba_list;
nr_secs = 0;
do {
bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
gc_rq->paddr_list[nr_secs] = bit;
gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
} while (nr_secs < pblk->max_write_pgs);
if (unlikely(!nr_secs)) {
kfree(gc_rq);
goto out;
}
gc_rq->nr_secs = nr_secs;
gc_rq->line = line;
gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
if (!gc_rq->data)
goto fail_free_gc_rq;
gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!gc_rq_ws)
goto fail_free_gc_data;
gc_rq_ws->pblk = pblk;
gc_rq_ws->line = line;
gc_rq_ws->priv = gc_rq;
/* The write GC path can be much slower than the read GC one due to
* the budget imposed by the rate-limiter. Balance in case that we get
* back pressure from the write GC path.
*/
while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
io_schedule();
kref_get(&line->ref);
INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
kvfree(lba_list);
kfree(line_ws);
kfree(invalid_bitmap);
kref_put(&line->ref, pblk_line_put);
atomic_dec(&gc->read_inflight_gc);
return;
fail_free_gc_data:
vfree(gc_rq->data);
fail_free_gc_rq:
kfree(gc_rq);
fail_free_lba_list:
kvfree(lba_list);
fail_free_invalid_bitmap:
kfree(invalid_bitmap);
fail_free_ws:
kfree(line_ws);
/* Line goes back to closed state, so we cannot release additional
* reference for line, since we do that only when we want to do
* gc to free line state transition.
*/
pblk_put_line_back(pblk, line);
atomic_dec(&gc->read_inflight_gc);
pblk_err(pblk, "failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id);
line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
if (!line_ws)
return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
atomic_inc(&gc->pipeline_gc);
INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
}
static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
wake_up_process(gc->gc_reader_ts);
}
static void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
pblk_gc_writer_kick(gc);
pblk_gc_reader_kick(gc);
/* If we're shutting down GC, let's not start it up again */
if (gc->gc_enabled) {
wake_up_process(gc->gc_ts);
mod_timer(&gc->gc_timer,
jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
}
static int pblk_gc_read(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
spin_lock(&gc->r_lock);
if (list_empty(&gc->r_list)) {
spin_unlock(&gc->r_lock);
return 1;
}
line = list_first_entry(&gc->r_list, struct pblk_line, list);
list_del(&line->list);
spin_unlock(&gc->r_lock);
pblk_gc_kick(pblk);
if (pblk_gc_line(pblk, line)) {
pblk_err(pblk, "failed to GC line %d\n", line->id);
/* rollback */
spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
}
return 0;
}
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
struct list_head *group_list)
{
struct pblk_line *line, *victim;
unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
victim = list_first_entry(group_list, struct pblk_line, list);
list_for_each_entry(line, group_list, list) {
if (!atomic_read(&line->sec_to_update))
line_vsc = le32_to_cpu(*line->vsc);
if (line_vsc < victim_vsc) {
victim = line;
victim_vsc = le32_to_cpu(*victim->vsc);
}
}
if (victim_vsc == ~0x0)
return NULL;
return victim;
}
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
unsigned int nr_blocks_free, nr_blocks_need;
unsigned int werr_lines = atomic_read(&rl->werr_lines);
nr_blocks_need = pblk_rl_high_thrs(rl);
nr_blocks_free = pblk_rl_nr_free_blks(rl);
/* This is not critical, no need to take lock here */
return ((werr_lines > 0) ||
((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
}
void pblk_gc_free_full_lines(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
do {
spin_lock(&l_mg->gc_lock);
if (list_empty(&l_mg->gc_full_list)) {
spin_unlock(&l_mg->gc_lock);
return;
}
line = list_first_entry(&l_mg->gc_full_list,
struct pblk_line, list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
spin_unlock(&line->lock);
list_del(&line->list);
spin_unlock(&l_mg->gc_lock);
atomic_inc(&gc->pipeline_gc);
kref_put(&line->ref, pblk_line_put);
} while (1);
}
/*
* Lines with no valid sectors will be returned to the free list immediately. If
* GC is activated - either because the free block count is under the determined
* threshold, or because it is being forced from user space - only lines with a
* high count of invalid sectors will be recycled.
*/
static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
struct list_head *group_list;
bool run_gc;
int read_inflight_gc, gc_group = 0, prev_group = 0;
pblk_gc_free_full_lines(pblk);
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
do {
spin_lock(&l_mg->gc_lock);
line = pblk_gc_get_victim_line(pblk, group_list);
if (!line) {
spin_unlock(&l_mg->gc_lock);
break;
}
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
line->state);
spin_unlock(&line->lock);
list_del(&line->list);
spin_unlock(&l_mg->gc_lock);
spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
pblk_gc_reader_kick(gc);
prev_group = 1;
/* No need to queue up more GC lines than we can handle */
run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
break;
} while (1);
if (!prev_group && pblk->rl.rb_state > gc_group &&
gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
static void pblk_gc_timer(struct timer_list *t)
{
struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
pblk_gc_kick(pblk);
}
static int pblk_gc_ts(void *data)
{
struct pblk *pblk = data;
while (!kthread_should_stop()) {
pblk_gc_run(pblk);
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
}
static int pblk_gc_writer_ts(void *data)
{
struct pblk *pblk = data;
while (!kthread_should_stop()) {
if (!pblk_gc_write(pblk))
continue;
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
}
static int pblk_gc_reader_ts(void *data)
{
struct pblk *pblk = data;
struct pblk_gc *gc = &pblk->gc;
while (!kthread_should_stop()) {
if (!pblk_gc_read(pblk))
continue;
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
#ifdef CONFIG_NVM_PBLK_DEBUG
pblk_info(pblk, "flushing gc pipeline, %d lines left\n",
atomic_read(&gc->pipeline_gc));
#endif
do {
if (!atomic_read(&gc->pipeline_gc))
break;
schedule();
} while (1);
return 0;
}
static void pblk_gc_start(struct pblk *pblk)
{
pblk->gc.gc_active = 1;
pblk_debug(pblk, "gc start\n");
}
void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
if (gc->gc_enabled && !gc->gc_active) {
pblk_gc_start(pblk);
pblk_gc_kick(pblk);
}
}
void pblk_gc_should_stop(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
if (gc->gc_active && !gc->gc_forced)
gc->gc_active = 0;
}
void pblk_gc_should_kick(struct pblk *pblk)
{
pblk_rl_update_rates(&pblk->rl);
}
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active)
{
struct pblk_gc *gc = &pblk->gc;
spin_lock(&gc->lock);
*gc_enabled = gc->gc_enabled;
*gc_active = gc->gc_active;
spin_unlock(&gc->lock);
}
int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
if (force < 0 || force > 1)
return -EINVAL;
spin_lock(&gc->lock);
gc->gc_forced = force;
if (force)
gc->gc_enabled = 1;
else
gc->gc_enabled = 0;
spin_unlock(&gc->lock);
pblk_gc_should_start(pblk);
return 0;
}
int pblk_gc_init(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
int ret;
gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
if (IS_ERR(gc->gc_ts)) {
pblk_err(pblk, "could not allocate GC main kthread\n");
return PTR_ERR(gc->gc_ts);
}
gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
"pblk-gc-writer-ts");
if (IS_ERR(gc->gc_writer_ts)) {
pblk_err(pblk, "could not allocate GC writer kthread\n");
ret = PTR_ERR(gc->gc_writer_ts);
goto fail_free_main_kthread;
}
gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
"pblk-gc-reader-ts");
if (IS_ERR(gc->gc_reader_ts)) {
pblk_err(pblk, "could not allocate GC reader kthread\n");
ret = PTR_ERR(gc->gc_reader_ts);
goto fail_free_writer_kthread;
}
timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
gc->w_entries = 0;
atomic_set(&gc->read_inflight_gc, 0);
atomic_set(&gc->pipeline_gc, 0);
/* Workqueue that reads valid sectors from a line and submit them to the
* GC writer to be recycled.
*/
gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
if (!gc->gc_line_reader_wq) {
pblk_err(pblk, "could not allocate GC line reader workqueue\n");
ret = -ENOMEM;
goto fail_free_reader_kthread;
}
/* Workqueue that prepare lines for GC */
gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pblk_err(pblk, "could not allocate GC reader workqueue\n");
ret = -ENOMEM;
goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
spin_lock_init(&gc->r_lock);
sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
INIT_LIST_HEAD(&gc->w_list);
INIT_LIST_HEAD(&gc->r_list);
return 0;
fail_free_reader_line_wq:
destroy_workqueue(gc->gc_line_reader_wq);
fail_free_reader_kthread:
kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
kthread_stop(gc->gc_ts);
return ret;
}
void pblk_gc_exit(struct pblk *pblk, bool graceful)
{
struct pblk_gc *gc = &pblk->gc;
gc->gc_enabled = 0;
del_timer_sync(&gc->gc_timer);
gc->gc_active = 0;
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
if (gc->gc_reader_ts)
kthread_stop(gc->gc_reader_ts);
if (graceful) {
flush_workqueue(gc->gc_reader_wq);
flush_workqueue(gc->gc_line_reader_wq);
}
destroy_workqueue(gc->gc_reader_wq);
destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
}
此差异已折叠。
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-map.c - pblk's lba-ppa mapping strategy
*
*/
#include "pblk.h"
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
struct ppa_addr *ppa_list,
unsigned long *lun_bitmap,
void *meta_list,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
struct pblk_emeta *emeta;
struct pblk_w_ctx *w_ctx;
__le64 *lba_list;
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
if (!line)
return -ENOSPC;
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
/* If we cannot allocate a new line, make sure to store metadata
* on current line and then fail
*/
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
if (!line) {
pblk_pipeline_stop(pblk);
return -ENOSPC;
}
}
emeta = line->emeta;
lba_list = emeta_to_lbas(pblk, emeta->buf);
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
/* ppa to be sent to the device */
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
/* Write context for target bio completion on write buffer. Note
* that the write buffer is protected by the sync backpointer,
* and a single writer thread have access to each specific entry
* at a time. Thus, it is safe to modify the context for the
* entry we are setting up for submission without taking any
* lock or memory barrier.
*/
if (i < valid_secs) {
kref_get(&line->ref);
atomic_inc(&line->sec_to_update);
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
meta->lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(&pblk->pad_wa);
} else {
lba_list[paddr] = addr_empty;
meta->lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
}
}
pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
return 0;
}
int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i;
int ret;
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
}
return 0;
}
/* only if erase_ppa is set, acquire erase semaphore */
int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
int ret;
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
spin_lock(&e_line->lock);
if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id;
erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock);
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
spin_unlock(&e_line->lock);
}
d_line = pblk_line_get_data(pblk);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return -ENOSPC;
/* Erase blocks that are bad in this line but might not be in next */
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
int bit = -1;
retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
return 0;
spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
spin_unlock(&e_line->lock);
goto retry;
}
spin_unlock(&e_line->lock);
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->a.blk = e_line->id;
}
return 0;
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pblk
#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PBLK_H
#include <linux/tracepoint.h>
struct ppa_addr;
#define show_chunk_flags(state) __print_flags(state, "", \
{ NVM_CHK_ST_FREE, "FREE", }, \
{ NVM_CHK_ST_CLOSED, "CLOSED", }, \
{ NVM_CHK_ST_OPEN, "OPEN", }, \
{ NVM_CHK_ST_OFFLINE, "OFFLINE", })
#define show_line_state(state) __print_symbolic(state, \
{ PBLK_LINESTATE_NEW, "NEW", }, \
{ PBLK_LINESTATE_FREE, "FREE", }, \
{ PBLK_LINESTATE_OPEN, "OPEN", }, \
{ PBLK_LINESTATE_CLOSED, "CLOSED", }, \
{ PBLK_LINESTATE_GC, "GC", }, \
{ PBLK_LINESTATE_BAD, "BAD", }, \
{ PBLK_LINESTATE_CORRUPT, "CORRUPT" })
#define show_pblk_state(state) __print_symbolic(state, \
{ PBLK_STATE_RUNNING, "RUNNING", }, \
{ PBLK_STATE_STOPPING, "STOPPING", }, \
{ PBLK_STATE_RECOVERING, "RECOVERING", }, \
{ PBLK_STATE_STOPPED, "STOPPED" })
#define show_chunk_erase_state(state) __print_symbolic(state, \
{ PBLK_CHUNK_RESET_START, "START", }, \
{ PBLK_CHUNK_RESET_DONE, "OK", }, \
{ PBLK_CHUNK_RESET_FAILED, "FAILED" })
TRACE_EVENT(pblk_chunk_reset,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_erase_state((int)__entry->state))
);
TRACE_EVENT(pblk_chunk_state,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_flags((int)__entry->state))
);
TRACE_EVENT(pblk_line_state,
TP_PROTO(const char *name, int line, int state),
TP_ARGS(name, line, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, line)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->line = line;
__entry->state = state;
),
TP_printk("dev=%s line=%d state=%s", __get_str(name),
(int)__entry->line,
show_line_state((int)__entry->state))
);
TRACE_EVENT(pblk_state,
TP_PROTO(const char *name, int state),
TP_ARGS(name, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->state = state;
),
TP_printk("dev=%s state=%s", __get_str(name),
show_pblk_state((int)__entry->state))
);
#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE pblk-trace
#include <trace/define_trace.h>
此差异已折叠。
此差异已折叠。
......@@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
......
......@@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
if (ns->ndev)
nvme_nvm_unregister(ns);
put_disk(ns->disk);
nvme_put_ns_head(ns->head);
nvme_put_ctrl(ns->ctrl);
......@@ -3218,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = {
const struct attribute_group *nvme_ns_id_attr_groups[] = {
&nvme_ns_id_attr_group,
#ifdef CONFIG_NVM
&nvme_nvm_attr_group,
#endif
NULL,
};
......@@ -3767,13 +3761,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (nvme_update_ns_info(ns, id))
goto out_put_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk->disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_put_disk;
}
}
down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
up_write(&ctrl->namespaces_rwsem);
......
......@@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp);
default:
if (!ns->ndev)
return -ENOTTY;
return nvme_nvm_ioctl(ns, cmd, argp);
return -ENOTTY;
}
}
......
此差异已折叠。
此差异已折叠。
......@@ -3243,12 +3243,6 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册