diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 53cea9e1e9b6117edf0e5f2c1d6a7072b3526171..24c3e80aff3e70d4928b36f3f876db12117dd0f6 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -79,6 +79,8 @@ struct throtl_service_queue {
 	 */
 	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
 	unsigned int		nr_queued[2];	/* number of queued bios */
+	long			nr_queued_bytes[2]; /* number of queued bytes */
+	wait_queue_head_t	wait[2];
 
 	/*
 	 * RB tree of active children throtl_grp's, which are sorted by
@@ -486,6 +488,10 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
 {
 	INIT_LIST_HEAD(&sq->queued[0]);
 	INIT_LIST_HEAD(&sq->queued[1]);
+	sq->nr_queued_bytes[0] = 0;
+	sq->nr_queued_bytes[1] = 0;
+	init_waitqueue_head(&sq->wait[0]);
+	init_waitqueue_head(&sq->wait[1]);
 	sq->pending_tree = RB_ROOT;
 	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
 }
@@ -1201,6 +1207,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
 	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
 
 	sq->nr_queued[rw]++;
+	sq->nr_queued_bytes[rw] += throtl_bio_data_size(bio);
 	blkg_rwstat_add(&tg->total_bytes_queued, bio_op(bio),
 			throtl_bio_data_size(bio));
 	blkg_rwstat_add(&tg->total_io_queued, bio_op(bio), 1);
@@ -1259,6 +1266,15 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
 	 */
 	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
 	sq->nr_queued[rw]--;
+	sq->nr_queued_bytes[rw] -= throtl_bio_data_size(bio);
+	WARN_ON_ONCE(sq->nr_queued_bytes[rw] < 0);
+
+	if (wq_has_sleeper(&sq->wait[rw])) {
+		if (sq->nr_queued_bytes[rw] > 0)
+			wake_up(&sq->wait[rw]);
+		else
+			wake_up_all(&sq->wait[rw]);
+	}
 
 	throtl_charge_bio(tg, bio);
 
@@ -2301,7 +2317,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 }
 
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-		    struct bio *bio)
+		    struct bio *bio, wait_queue_head_t **wait)
 {
 	struct throtl_qnode *qn = NULL;
 	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
@@ -2392,6 +2408,16 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	tg->last_low_overflow_time[rw] = jiffies;
 
 	td->nr_queued[rw]++;
+
+	if (rw == WRITE) {
+		u64 bps_limit = tg_bps_limit(tg, rw);
+
+		if (bps_limit != U64_MAX &&
+		    (wq_has_sleeper(&sq->wait[rw]) ||
+		     sq->nr_queued_bytes[rw] > div_u64(bps_limit, 2)))
+			*wait = &sq->wait[rw];
+	}
+
 	throtl_add_bio_tg(bio, qn, tg);
 	throttled = true;
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index e00350b16b04bece291394f8500e30e9b9cd5512..ad61a9985e82cf264df5fb47aaf93c4c0b3e4b12 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -793,10 +793,13 @@ static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
 
 #ifdef CONFIG_BLK_DEV_THROTTLING
 extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-			   struct bio *bio);
+			   struct bio *bio, wait_queue_head_t **wait);
 #else
 static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
-				  struct bio *bio) { return false; }
+				  struct bio *bio, wait_queue_head_t **wait)
+{
+	return false;
+}
 #endif
 
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
@@ -805,6 +808,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool throtl = false;
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wait_head = NULL;
 
 	rcu_read_lock();
 	blkcg = bio_blkcg(bio);
@@ -821,7 +826,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 		spin_unlock_irq(q->queue_lock);
 	}
 
-	throtl = blk_throtl_bio(q, blkg, bio);
+	throtl = blk_throtl_bio(q, blkg, bio, &wait_head);
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
@@ -837,6 +842,12 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	}
 
 	rcu_read_unlock();
+	if (wait_head) {
+		prepare_to_wait_exclusive(wait_head, &wait, TASK_UNINTERRUPTIBLE);
+		io_schedule();
+		finish_wait(wait_head, &wait);
+	}
+
 	return !throtl;
 }