blk.h 13.2 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
2 3 4
#ifndef BLK_INTERNAL_H
#define BLK_INTERNAL_H

5
#include <linux/idr.h>
6
#include <linux/blk-mq.h>
7
#include <linux/part_stat.h>
8
#include <linux/blk-crypto.h>
9
#include <xen/xen.h>
10
#include "blk-crypto-internal.h"
11
#include "blk-mq.h"
12
#include "blk-mq-sched.h"
13

14 15 16
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT		(5 * HZ)

17 18 19 20
#ifdef CONFIG_DEBUG_FS
extern struct dentry *blk_debugfs_root;
#endif

21 22 23
struct blk_flush_queue {
	unsigned int		flush_pending_idx:1;
	unsigned int		flush_running_idx:1;
24
	blk_status_t 		rq_status;
25 26 27 28
	unsigned long		flush_pending_since;
	struct list_head	flush_queue[2];
	struct list_head	flush_data_in_flight;
	struct request		*flush_rq;
29 30 31 32 33 34

	/*
	 * flush_rq shares tag with this rq, both can't be active
	 * at the same time
	 */
	struct request		*orig_rq;
35
	struct lock_class_key	key;
36 37 38
	spinlock_t		mq_flush_lock;
};

39 40
extern struct kmem_cache *blk_requestq_cachep;
extern struct kobj_type blk_queue_ktype;
41
extern struct ida blk_queue_ida;
42

43 44
static inline struct blk_flush_queue *
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
45
{
46
	return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
47 48
}

T
Tejun Heo 已提交
49 50 51 52 53
static inline void __blk_get_queue(struct request_queue *q)
{
	kobject_get(&q->kobj);
}

54 55 56 57 58 59
static inline bool
is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
{
	return hctx->fq->flush_rq == req;
}

60 61
struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
					      gfp_t flags);
62
void blk_free_flush_queue(struct blk_flush_queue *q);
63

64 65
void blk_freeze_queue(struct request_queue *q);

66 67
static inline bool biovec_phys_mergeable(struct request_queue *q,
		struct bio_vec *vec1, struct bio_vec *vec2)
68
{
69
	unsigned long mask = queue_segment_boundary(q);
C
Christoph Hellwig 已提交
70 71
	phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
	phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
72 73

	if (addr1 + vec1->bv_len != addr2)
74
		return false;
75
	if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
76
		return false;
77 78
	if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
		return false;
79 80 81
	return true;
}

82 83 84
static inline bool __bvec_gap_to_prev(struct request_queue *q,
		struct bio_vec *bprv, unsigned int offset)
{
85
	return (offset & queue_virt_boundary(q)) ||
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
}

/*
 * Check if adding a bio_vec after bprv with offset would create a gap in
 * the SG list. Most drivers don't care about this, but some do.
 */
static inline bool bvec_gap_to_prev(struct request_queue *q,
		struct bio_vec *bprv, unsigned int offset)
{
	if (!queue_virt_boundary(q))
		return false;
	return __bvec_gap_to_prev(q, bprv, offset);
}

101 102 103 104 105 106 107 108 109 110 111 112
static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
		unsigned int nr_segs)
{
	rq->nr_phys_segments = nr_segs;
	rq->__data_len = bio->bi_iter.bi_size;
	rq->bio = rq->biotail = bio;
	rq->ioprio = bio_prio(bio);

	if (bio->bi_disk)
		rq->rq_disk = bio->bi_disk;
}

113 114
#ifdef CONFIG_BLK_DEV_INTEGRITY
void blk_flush_integrity(void);
115
bool __bio_integrity_endio(struct bio *);
116
void bio_integrity_free(struct bio *bio);
117 118 119 120 121 122
static inline bool bio_integrity_endio(struct bio *bio)
{
	if (bio_integrity(bio))
		return __bio_integrity_endio(bio);
	return true;
}
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142

static inline bool integrity_req_gap_back_merge(struct request *req,
		struct bio *next)
{
	struct bio_integrity_payload *bip = bio_integrity(req->bio);
	struct bio_integrity_payload *bip_next = bio_integrity(next);

	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
				bip_next->bip_vec[0].bv_offset);
}

static inline bool integrity_req_gap_front_merge(struct request *req,
		struct bio *bio)
{
	struct bio_integrity_payload *bip = bio_integrity(bio);
	struct bio_integrity_payload *bip_next = bio_integrity(req->bio);

	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
				bip_next->bip_vec[0].bv_offset);
}
143 144 145

void blk_integrity_add(struct gendisk *);
void blk_integrity_del(struct gendisk *);
146 147 148 149 150 151 152 153 154 155 156 157
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool integrity_req_gap_back_merge(struct request *req,
		struct bio *next)
{
	return false;
}
static inline bool integrity_req_gap_front_merge(struct request *req,
		struct bio *bio)
{
	return false;
}

158 159 160
static inline void blk_flush_integrity(void)
{
}
161 162 163 164
static inline bool bio_integrity_endio(struct bio *bio)
{
	return true;
}
165 166 167
static inline void bio_integrity_free(struct bio *bio)
{
}
168 169 170 171 172 173
static inline void blk_integrity_add(struct gendisk *disk)
{
}
static inline void blk_integrity_del(struct gendisk *disk)
{
}
174
#endif /* CONFIG_BLK_DEV_INTEGRITY */
175

176
unsigned long blk_rq_timeout(unsigned long timeout);
177
void blk_add_timer(struct request *req);
178

179 180 181 182
bool bio_attempt_front_merge(struct request *req, struct bio *bio,
		unsigned int nr_segs);
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
		unsigned int nr_segs);
183 184
bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
		struct bio *bio);
185
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
186
		unsigned int nr_segs, struct request **same_queue_rq);
187 188

void blk_account_io_start(struct request *req, bool new_io);
189
void blk_account_io_done(struct request *req, u64 now);
190

191 192 193
/*
 * Internal elevator interface
 */
194
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
195

196
void blk_insert_flush(struct request *rq);
T
Tejun Heo 已提交
197

198
void elevator_init_mq(struct request_queue *q);
199 200
int elevator_switch_mq(struct request_queue *q,
			      struct elevator_type *new_e);
201
void __elevator_exit(struct request_queue *, struct elevator_queue *);
202
int elv_register_queue(struct request_queue *q, bool uevent);
203 204
void elv_unregister_queue(struct request_queue *q);

205 206 207
static inline void elevator_exit(struct request_queue *q,
		struct elevator_queue *e)
{
208 209
	lockdep_assert_held(&q->sysfs_lock);

210 211 212 213
	blk_mq_sched_free_requests(q);
	__elevator_exit(q, e);
}

214 215
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);

216 217 218 219 220 221 222 223 224 225 226
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
		char *buf);
ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
		char *buf);
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
		char *buf);
ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
		char *buf);
ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t count);

227 228 229 230 231 232 233 234 235 236 237 238
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
				const char *, size_t);
#else
static inline int blk_should_fake_timeout(struct request_queue *q)
{
	return 0;
}
#endif

239 240 241 242 243 244
void __blk_queue_split(struct request_queue *q, struct bio **bio,
		unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
		unsigned int nr_segs);
int ll_front_merge_fn(struct request *req,  struct bio *bio,
		unsigned int nr_segs);
245 246
struct request *attempt_back_merge(struct request_queue *q, struct request *rq);
struct request *attempt_front_merge(struct request_queue *q, struct request *rq);
247 248
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
				struct request *next);
249
unsigned int blk_recalc_rq_segments(struct request *rq);
250
void blk_rq_set_mixed_merge(struct request *rq);
251
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
252
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
253

254 255
int blk_dev_init(void);

256 257 258 259
/*
 * Contribute to IO statistics IFF:
 *
 *	a) it's attached to a gendisk, and
260
 *	b) the queue had IO stats enabled when this request was started
261
 */
262
static inline bool blk_do_io_stat(struct request *rq)
263
{
264
	return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
265 266
}

267 268 269 270 271 272 273
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
	req->cmd_flags |= REQ_NOMERGE;
	if (req == q->last_merge)
		q->last_merge = NULL;
}

274 275 276 277 278 279 280 281 282 283
/*
 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
 * is defined as 'unsigned int', meantime it has to aligned to with logical
 * block size which is the minimum accepted unit by hardware.
 */
static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
{
	return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
}

284 285 286 287
/*
 * Internal io_context interface
 */
void get_io_context(struct io_context *ioc);
288
struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
289 290
struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
			     gfp_t gfp_mask);
291
void ioc_clear_queue(struct request_queue *q);
292

293
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
294 295 296 297

/*
 * Internal throttling interface
 */
298
#ifdef CONFIG_BLK_DEV_THROTTLING
299
extern void blk_throtl_drain(struct request_queue *q);
300 301
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
302
extern void blk_throtl_register_queue(struct request_queue *q);
303
#else /* CONFIG_BLK_DEV_THROTTLING */
304
static inline void blk_throtl_drain(struct request_queue *q) { }
305 306
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
307
static inline void blk_throtl_register_queue(struct request_queue *q) { }
308
#endif /* CONFIG_BLK_DEV_THROTTLING */
309 310 311 312
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
	const char *page, size_t count);
313
extern void blk_throtl_bio_endio(struct bio *bio);
314
extern void blk_throtl_stat_add(struct request *rq, u64 time);
315 316
#else
static inline void blk_throtl_bio_endio(struct bio *bio) { }
317
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
318
#endif
319

320 321 322 323 324 325 326 327 328 329 330 331 332
#ifdef CONFIG_BOUNCE
extern int init_emergency_isa_pool(void);
extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
#else
static inline int init_emergency_isa_pool(void)
{
	return 0;
}
static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
{
}
#endif /* CONFIG_BOUNCE */

333 334 335 336 337 338
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
extern int blk_iolatency_init(struct request_queue *q);
#else
static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
#endif

339 340
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);

341 342 343 344 345 346
#ifdef CONFIG_BLK_DEV_ZONED
void blk_queue_free_zone_bitmaps(struct request_queue *q);
#else
static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
#endif

347 348 349 350 351 352 353 354 355
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);

int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
void blk_free_devt(dev_t devt);
void blk_invalidate_devt(dev_t devt);
char *disk_name(struct gendisk *hd, int partno, char *buf);
#define ADDPART_FLAG_NONE	0
#define ADDPART_FLAG_RAID	1
#define ADDPART_FLAG_WHOLEDISK	2
356
void delete_partition(struct gendisk *disk, struct hd_struct *part);
C
Christoph Hellwig 已提交
357 358 359 360 361
int bdev_add_partition(struct block_device *bdev, int partno,
		sector_t start, sector_t length);
int bdev_del_partition(struct block_device *bdev, int partno);
int bdev_resize_partition(struct block_device *bdev, int partno,
		sector_t start, sector_t length);
362
int disk_expand_part_tbl(struct gendisk *disk, int target);
363
int hd_ref_init(struct hd_struct *part);
364

365
/* no need to get/put refcount of part0 */
366 367
static inline int hd_struct_try_get(struct hd_struct *part)
{
368 369 370
	if (part->partno)
		return percpu_ref_tryget_live(&part->ref);
	return 1;
371 372 373 374
}

static inline void hd_struct_put(struct hd_struct *part)
{
375 376
	if (part->partno)
		percpu_ref_put(&part->ref);
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
}

static inline void hd_free_part(struct hd_struct *part)
{
	free_part_stats(part);
	kfree(part->info);
	percpu_ref_exit(&part->ref);
}

/*
 * Any access of part->nr_sects which is not protected by partition
 * bd_mutex or gendisk bdev bd_mutex, should be done using this
 * accessor function.
 *
 * Code written along the lines of i_size_read() and i_size_write().
 * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
 * on.
 */
static inline sector_t part_nr_sects_read(struct hd_struct *part)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	sector_t nr_sects;
	unsigned seq;
	do {
		seq = read_seqcount_begin(&part->nr_sects_seq);
		nr_sects = part->nr_sects;
	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
	return nr_sects;
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
	sector_t nr_sects;

	preempt_disable();
	nr_sects = part->nr_sects;
	preempt_enable();
	return nr_sects;
#else
	return part->nr_sects;
#endif
}

/*
 * Should be called with mutex lock held (typically bd_mutex) of partition
 * to provide mutual exlusion among writers otherwise seqcount might be
 * left in wrong state leaving the readers spinning infinitely.
 */
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	write_seqcount_begin(&part->nr_sects_seq);
	part->nr_sects = size;
	write_seqcount_end(&part->nr_sects_seq);
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
	preempt_disable();
	part->nr_sects = size;
	preempt_enable();
#else
	part->nr_sects = size;
#endif
}

437 438
struct request_queue *__blk_alloc_queue(int node_id);

439
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
440
		struct page *page, unsigned int len, unsigned int offset,
441
		unsigned int max_sectors, bool *same_page);
442

443
#endif /* BLK_INTERNAL_H */