diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 14367499cfedf8fe6954ea2b48d0e695afb3874b..3b6a0e1265aaff4eb3c98a9a3e026aa38874202c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -463,6 +463,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
 	rcu_assign_pointer(blkg->q, q);
 	blkg->blkcg = blkcg;
 	blkg->plid = pol->plid;
+	blkg->refcnt = 1;
 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 
 	/* alloc per-policy data */
@@ -633,6 +634,29 @@ void blkg_destroy_all(struct request_queue *q)
 	}
 }
 
+static void blkg_rcu_free(struct rcu_head *rcu_head)
+{
+	blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
+}
+
+void __blkg_release(struct blkio_group *blkg)
+{
+	/* release the extra blkcg reference this blkg has been holding */
+	css_put(&blkg->blkcg->css);
+
+	/*
+	 * A group is freed in rcu manner. But having an rcu lock does not
+	 * mean that one can access all the fields of blkg and assume these
+	 * are valid. For example, don't try to follow throtl_data and
+	 * request queue links.
+	 *
+	 * Having a reference to blkg under an rcu allows acess to only
+	 * values local to groups like group stats and group rate limits
+	 */
+	call_rcu(&blkg->rcu_head, blkg_rcu_free);
+}
+EXPORT_SYMBOL_GPL(__blkg_release);
+
 static void blkio_reset_stats_cpu(struct blkio_group *blkg)
 {
 	struct blkio_group_stats_cpu *stats_cpu;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9537819c29c694daceed3b505eca31291f85541f..7da106843f01cd690783ed79cd90952fd755dc84 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -177,6 +177,8 @@ struct blkio_group {
 	char path[128];
 	/* policy which owns this blk group */
 	enum blkio_policy_id plid;
+	/* reference count */
+	int refcnt;
 
 	/* Configuration */
 	struct blkio_group_conf conf;
@@ -188,6 +190,8 @@ struct blkio_group {
 	struct blkio_group_stats_cpu __percpu *stats_cpu;
 
 	struct blkg_policy_data *pd;
+
+	struct rcu_head rcu_head;
 };
 
 typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
@@ -272,6 +276,35 @@ static inline char *blkg_path(struct blkio_group *blkg)
 	return blkg->path;
 }
 
+/**
+ * blkg_get - get a blkg reference
+ * @blkg: blkg to get
+ *
+ * The caller should be holding queue_lock and an existing reference.
+ */
+static inline void blkg_get(struct blkio_group *blkg)
+{
+	lockdep_assert_held(blkg->q->queue_lock);
+	WARN_ON_ONCE(!blkg->refcnt);
+	blkg->refcnt++;
+}
+
+void __blkg_release(struct blkio_group *blkg);
+
+/**
+ * blkg_put - put a blkg reference
+ * @blkg: blkg to put
+ *
+ * The caller should be holding queue_lock.
+ */
+static inline void blkg_put(struct blkio_group *blkg)
+{
+	lockdep_assert_held(blkg->q->queue_lock);
+	WARN_ON_ONCE(blkg->refcnt <= 0);
+	if (!--blkg->refcnt)
+		__blkg_release(blkg);
+}
+
 #else
 
 struct blkio_group {
@@ -292,6 +325,8 @@ static inline void *blkg_to_pdata(struct blkio_group *blkg,
 static inline struct blkio_group *pdata_to_blkg(void *pdata,
 				struct blkio_policy_type *pol) { return NULL; }
 static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
+static inline void blkg_get(struct blkio_group *blkg) { }
+static inline void blkg_put(struct blkio_group *blkg) { }
 
 #endif
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9c8a12477e13b15286cbdae6f8441c1f1182d5e7..153ba509446b1157342ab2c515537f8ac82c41bf 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -54,7 +54,6 @@ struct throtl_grp {
 	 */
 	unsigned long disptime;
 
-	atomic_t ref;
 	unsigned int flags;
 
 	/* Two lists for READ and WRITE */
@@ -80,8 +79,6 @@ struct throtl_grp {
 
 	/* Some throttle limits got updated for the group */
 	int limits_changed;
-
-	struct rcu_head rcu_head;
 };
 
 struct throtl_data
@@ -151,45 +148,6 @@ static inline unsigned int total_nr_queued(struct throtl_data *td)
 	return td->nr_queued[0] + td->nr_queued[1];
 }
 
-static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
-{
-	atomic_inc(&tg->ref);
-	return tg;
-}
-
-static void throtl_free_tg(struct rcu_head *head)
-{
-	struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head);
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	free_percpu(blkg->stats_cpu);
-	kfree(blkg->pd);
-	kfree(blkg);
-}
-
-static void throtl_put_tg(struct throtl_grp *tg)
-{
-	struct blkio_group *blkg = tg_to_blkg(tg);
-
-	BUG_ON(atomic_read(&tg->ref) <= 0);
-	if (!atomic_dec_and_test(&tg->ref))
-		return;
-
-	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-
-	/*
-	 * A group is freed in rcu manner. But having an rcu lock does not
-	 * mean that one can access all the fields of blkg and assume these
-	 * are valid. For example, don't try to follow throtl_data and
-	 * request queue links.
-	 *
-	 * Having a reference to blkg under an rcu allows acess to only
-	 * values local to groups like group stats and group rate limits
-	 */
-	call_rcu(&tg->rcu_head, throtl_free_tg);
-}
-
 static void throtl_init_blkio_group(struct blkio_group *blkg)
 {
 	struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -204,14 +162,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg)
 	tg->bps[WRITE] = -1;
 	tg->iops[READ] = -1;
 	tg->iops[WRITE] = -1;
-
-	/*
-	 * Take the initial reference that will be released on destroy
-	 * This can be thought of a joint reference by cgroup and
-	 * request queue which will be dropped by either request queue
-	 * exit or cgroup deletion path depending on who is exiting first.
-	 */
-	atomic_set(&tg->ref, 1);
 }
 
 static void throtl_link_blkio_group(struct request_queue *q,
@@ -648,7 +598,7 @@ static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
 
 	bio_list_add(&tg->bio_lists[rw], bio);
 	/* Take a bio reference on tg */
-	throtl_ref_get_tg(tg);
+	blkg_get(tg_to_blkg(tg));
 	tg->nr_queued[rw]++;
 	td->nr_queued[rw]++;
 	throtl_enqueue_tg(td, tg);
@@ -681,8 +631,8 @@ static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg,
 
 	bio = bio_list_pop(&tg->bio_lists[rw]);
 	tg->nr_queued[rw]--;
-	/* Drop bio reference on tg */
-	throtl_put_tg(tg);
+	/* Drop bio reference on blkg */
+	blkg_put(tg_to_blkg(tg));
 
 	BUG_ON(td->nr_queued[rw] <= 0);
 	td->nr_queued[rw]--;
@@ -880,7 +830,7 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	throtl_put_tg(tg);
+	blkg_put(tg_to_blkg(tg));
 	td->nr_undestroyed_grps--;
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c7449db52a863c020925a5f91b0376814bae46e2..86980023339a7273fb9aaff9825dfb662b5b0e19 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,7 +210,6 @@ struct cfq_group {
 	enum wl_prio_t saved_serving_prio;
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 	struct hlist_node cfqd_node;
-	int ref;
 #endif
 	/* number of requests that are on the dispatch list or inside driver */
 	int dispatched;
@@ -1071,14 +1070,6 @@ static void cfq_init_blkio_group(struct blkio_group *blkg)
 
 	cfq_init_cfqg_base(cfqg);
 	cfqg->weight = blkg->blkcg->weight;
-
-	/*
-	 * Take the initial reference that will be released on destroy
-	 * This can be thought of a joint reference by cgroup and
-	 * elevator which will be dropped by either elevator exit
-	 * or cgroup deletion path depending on who is exiting first.
-	 */
-	cfqg->ref = 1;
 }
 
 /*
@@ -1105,12 +1096,6 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	return cfqg;
 }
 
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-	cfqg->ref++;
-	return cfqg;
-}
-
 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 {
 	/* Currently, all async queues are mapped to root group */
@@ -1119,28 +1104,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 
 	cfqq->cfqg = cfqg;
 	/* cfqq reference on cfqg */
-	cfqq->cfqg->ref++;
-}
-
-static void cfq_put_cfqg(struct cfq_group *cfqg)
-{
-	struct blkio_group *blkg = cfqg_to_blkg(cfqg);
-	struct cfq_rb_root *st;
-	int i, j;
-
-	BUG_ON(cfqg->ref <= 0);
-	cfqg->ref--;
-	if (cfqg->ref)
-		return;
-
-	/* release the extra blkcg reference this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-
-	for_each_cfqg_st(cfqg, i, j, st)
-		BUG_ON(!RB_EMPTY_ROOT(&st->rb));
-	free_percpu(blkg->stats_cpu);
-	kfree(blkg->pd);
-	kfree(blkg);
+	blkg_get(cfqg_to_blkg(cfqg));
 }
 
 static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
@@ -1157,7 +1121,7 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	cfq_put_cfqg(cfqg);
+	blkg_put(cfqg_to_blkg(cfqg));
 }
 
 static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
@@ -1225,18 +1189,12 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
 	return cfqd->root_group;
 }
 
-static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
-{
-	return cfqg;
-}
-
 static inline void
 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
 	cfqq->cfqg = cfqg;
 }
 
 static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
-static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
 
 #endif /* GROUP_IOSCHED */
 
@@ -2630,7 +2588,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	kmem_cache_free(cfq_pool, cfqq);
-	cfq_put_cfqg(cfqg);
+	blkg_put(cfqg_to_blkg(cfqg));
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -3382,7 +3340,7 @@ static void cfq_put_request(struct request *rq)
 		cfqq->allocated[rw]--;
 
 		/* Put down rq reference on cfqg */
-		cfq_put_cfqg(RQ_CFQG(rq));
+		blkg_put(cfqg_to_blkg(RQ_CFQG(rq)));
 		rq->elv.priv[0] = NULL;
 		rq->elv.priv[1] = NULL;
 
@@ -3477,8 +3435,9 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
+	blkg_get(cfqg_to_blkg(cfqq->cfqg));
 	rq->elv.priv[0] = cfqq;
-	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+	rq->elv.priv[1] = cfqq->cfqg;
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 }
@@ -3676,8 +3635,11 @@ static int cfq_init_queue(struct request_queue *q)
 	 */
 	cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
 	cfqd->oom_cfqq.ref++;
+
+	spin_lock_irq(q->queue_lock);
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
-	cfq_put_cfqg(cfqd->root_group);
+	blkg_put(cfqg_to_blkg(cfqd->root_group));
+	spin_unlock_irq(q->queue_lock);
 
 	init_timer(&cfqd->idle_slice_timer);
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;