diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 1b87df6cd4761ab0e548d0cc0611c2c5b74725eb..05e2822a80b34d0f31649cfb486ade2b4679f9e8 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt @@ -11,6 +11,13 @@ controllers), BFQ's main features are: groups (switching back to time distribution when needed to keep throughput high). +In its default configuration, BFQ privileges latency over +throughput. So, when needed for achieving a lower latency, BFQ builds +schedules that may lead to a lower throughput. If your main or only +goal, for a given device, is to achieve the maximum-possible +throughput at all times, then do switch off all low-latency heuristics +for that device, by setting low_latency to 0. Full details in Section 3. + On average CPUs, the current version of BFQ can handle devices performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a reference, 30-50 KIOPS correspond to very high bandwidths with @@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft real-time applications are privileged and experience a lower latency, as explained in more detail in the description of how BFQ works. -DO NOT enable this mode if you need full control on bandwidth +DISABLE this mode if you need full control on bandwidth distribution. In fact, if it is enabled, then BFQ automatically increases the bandwidth share of privileged applications, as the main means to guarantee a lower latency to them. +In addition, as already highlighted at the beginning of this document, +DISABLE this mode if your only goal is to achieve a high throughput. +In fact, privileging the I/O of some application over the rest may +entail a lower throughput. To achieve the highest-possible throughput +on a non-rotational device, setting slice_idle to 0 may be needed too +(at the cost of giving up any strong guarantee on fairness and low +latency). + timeout_sync ------------ diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index bd8499ef157ce8786c6eaa164448eb0bf9e84c6c..08ce45096350561896fb6c8959c5c04603e98555 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -56,6 +56,11 @@ * rotational or flash-based devices, and to get the job done quickly * for applications consisting in many I/O-bound processes. * + * NOTE: if the main or only goal, with a given device, is to achieve + * the maximum-possible throughput at all times, then do switch off + * all low-latency heuristics for that device, by setting low_latency + * to 0. + * * BFQ is described in [1], where also a reference to the initial, more * theoretical paper on BFQ can be found. The interested reader can find * in the latter paper full details on the main algorithm, as well as diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b4fc3e4260b71f91ee8e989ddf9b09b0d7986a13..8726ede19eef2c632a8e53d2bc340a7cae832e6e 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) { struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - int is_in_service = entity == sd->in_service_entity; + struct bfq_service_tree *st; + bool is_in_service; if (!entity->on_st) /* entity never activated, or already inactive */ return false; + /* + * If we get here, then entity is active, which implies that + * bfq_group_set_parent has already been invoked for the group + * represented by entity. Therefore, the field + * entity->sched_data has been set, and we can safely use it. + */ + st = bfq_entity_service_tree(entity); + is_in_service = entity == sd->in_service_entity; + if (is_in_service) bfq_calc_finish(entity, entity->service); diff --git a/block/blk-core.c b/block/blk-core.c index c580b0138a7f3713486dc1005d47a75a357a24d2..c7068520794bd0ba060b905f850efaae6a8cbd36 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2644,8 +2644,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) return false; } - WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD); - req->__data_len -= total_bytes; /* update sector only for requests with clear definition of sector */ @@ -2658,17 +2656,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; } - /* - * If total number of sectors is less than the first segment - * size, something has gone terribly wrong. - */ - if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { - blk_dump_rq_flags(req, "request botched"); - req->__data_len = blk_rq_cur_bytes(req); - } + if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + blk_dump_rq_flags(req, "request botched"); + req->__data_len = blk_rq_cur_bytes(req); + } - /* recalculate the number of segments */ - blk_recalc_rq_segments(req); + /* recalculate the number of segments */ + blk_recalc_rq_segments(req); + } return true; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 5d4ce7eb8dbfb9100b7cfc7b673a64a1ab4822ad..a69ad122ed66c6b93385f1b3959a893b407b9d05 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1236,7 +1236,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_stop_hw_queue); -void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) { struct blk_mq_hw_ctx *hctx; int i; @@ -1554,13 +1554,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + blk_queue_split(q, &bio, q->bio_split); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; @@ -2341,15 +2341,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_mq_init_cpu_queues(q, set->nr_hw_queues); - mutex_lock(&all_q_mutex); get_online_cpus(); + mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); blk_mq_add_queue_tag_set(set, q); blk_mq_map_swqueue(q, cpu_online_mask); - put_online_cpus(); mutex_unlock(&all_q_mutex); + put_online_cpus(); if (!(set->flags & BLK_MQ_F_NO_SCHED)) { int ret; diff --git a/block/blk-stat.c b/block/blk-stat.c index 6c2f40940439c5b50a6aad0c0a560e8a2b2bf08b..c52356d90fe3854f05faebda241707a1b112a543 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq) rcu_read_lock(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { - if (blk_stat_is_active(cb)) { - bucket = cb->bucket_fn(rq); - if (bucket < 0) - continue; - stat = &this_cpu_ptr(cb->cpu_stat)[bucket]; - __blk_stat_add(stat, value); - } + if (!blk_stat_is_active(cb)) + continue; + + bucket = cb->bucket_fn(rq); + if (bucket < 0) + continue; + + stat = &get_cpu_ptr(cb->cpu_stat)[bucket]; + __blk_stat_add(stat, value); + put_cpu_ptr(cb->cpu_stat); } rcu_read_unlock(); } diff --git a/block/elevator.c b/block/elevator.c index ab726a5c0bf6eaa3039a472a65dd8fb3435f606b..dac99fbfc273f36234b95f80feb711173fdf41e7 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1062,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name) strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name), true); - if (!e) { - printk(KERN_ERR "elevator: type %s not found\n", elevator_name); + if (!e) return -EINVAL; - } if (q->elevator && !strcmp(elevator_name, q->elevator->type->elevator_name)) { @@ -1105,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!ret) return count; - printk(KERN_ERR "elevator: switch to %s failed\n", name); return ret; } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8c4adac6fafcc56dfe37d05a51f45798e5194652..f5df78ed1e10974ffb9e239b57ce260f0bb5bae9 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, if (unlikely(elba > nvmdev->total_secs)) { pr_err("nvm: L2P data from device is out of bounds!\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } /* Transform physical address to target address space */ @@ -464,8 +465,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, return ret; } -static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, - struct nvme_ns *ns, struct nvme_nvm_command *c) +static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, + struct nvme_nvm_command *c) { c->ph_rw.opcode = rqd->opcode; c->ph_rw.nsid = cpu_to_le32(ns->ns_id); @@ -503,7 +504,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (!cmd) return -ENOMEM; - nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + nvme_nvm_rqtocmd(rqd, ns, cmd); rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) {