diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b8dc141896b6ee036da70329295bcb9e2afc75cb..6935c8ecad7fc1f7ce723f660ce671c38c611dfe 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -426,6 +426,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
 					   GFP_KERNEL);
 		if (!d->rmid_busy_llc)
 			return -ENOMEM;
+		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
 	}
 	if (is_mbm_total_enabled()) {
 		tsize = sizeof(*d->mbm_total);
@@ -536,11 +537,33 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		list_del(&d->list);
 		if (is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
+		if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
+			/*
+			 * When a package is going down, forcefully
+			 * decrement rmid->ebusy. There is no way to know
+			 * that the L3 was flushed and hence may lead to
+			 * incorrect counts in rare scenarios, but leaving
+			 * the RMID as busy creates RMID leaks if the
+			 * package never comes back.
+			 */
+			__check_limbo(d, true);
+			cancel_delayed_work(&d->cqm_limbo);
+		}
+
 		kfree(d);
-	} else if (r == &rdt_resources_all[RDT_RESOURCE_L3] &&
-		   cpu == d->mbm_work_cpu && is_mbm_enabled()) {
-		cancel_delayed_work(&d->mbm_over);
-		mbm_setup_overflow_handler(d, 0);
+		return;
+	}
+
+	if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+			cancel_delayed_work(&d->mbm_over);
+			mbm_setup_overflow_handler(d, 0);
+		}
+		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+		    has_busy_rmid(r, d)) {
+			cancel_delayed_work(&d->cqm_limbo);
+			cqm_setup_limbo_handler(d, 0);
+		}
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3e48693906031418223516aad9ca363f5f2585ef..ebaddaeef023f8625a36f1064425826dedb1a39d 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -20,6 +20,8 @@
 #define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
 #define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
 
+#define CQM_LIMBOCHECK_INTERVAL	1000
+
 #define MBM_CNTR_WIDTH			24
 #define MBM_OVERFLOW_INTERVAL		1000
 
@@ -187,8 +189,11 @@ struct mbm_state {
  * @mbm_total:	saved state for MBM total bandwidth
  * @mbm_local:	saved state for MBM local bandwidth
  * @mbm_over:	worker to periodically read MBM h/w counters
+ * @cqm_limbo:	worker to periodically read CQM h/w counters
  * @mbm_work_cpu:
  *		worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ *		worker cpu for CQM h/w counters
  * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
  * @new_ctrl:	new ctrl value to be loaded
  * @have_new_ctrl: did user provide new_ctrl for this domain
@@ -201,7 +206,9 @@ struct rdt_domain {
 	struct mbm_state	*mbm_total;
 	struct mbm_state	*mbm_local;
 	struct delayed_work	mbm_over;
+	struct delayed_work	cqm_limbo;
 	int			mbm_work_cpu;
+	int			cqm_work_cpu;
 	u32			*ctrl_val;
 	u32			new_ctrl;
 	bool			have_new_ctrl;
@@ -422,7 +429,12 @@ void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 				    struct rdt_domain *d);
 void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
 		    struct rdtgroup *rdtgrp, int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+				unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
 
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 8378785883dc2e03a638e4ed973cb7772aafe739..30827510094befb37aec9be1b356201a3644dd5e 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -33,7 +33,7 @@
 
 struct rmid_entry {
 	u32				rmid;
-	atomic_t			busy;
+	int				busy;
 	struct list_head		list;
 };
 
@@ -45,13 +45,13 @@ struct rmid_entry {
 static LIST_HEAD(rmid_free_lru);
 
 /**
- * @rmid_limbo_lru       list of currently unused but (potentially)
+ * @rmid_limbo_count     count of currently unused but (potentially)
  *     dirty RMIDs.
- *     This list contains RMIDs that no one is currently using but that
+ *     This counts RMIDs that no one is currently using but that
  *     may have a occupancy value > intel_cqm_threshold. User can change
  *     the threshold occupancy value.
  */
-static LIST_HEAD(rmid_limbo_lru);
+unsigned int rmid_limbo_count;
 
 /**
  * @rmid_entry - The entry in the limbo and free lists.
@@ -103,124 +103,53 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
 	return val;
 }
 
-/*
- * Walk the limbo list looking at any RMIDs that are flagged in the
- * domain rmid_busy_llc bitmap as busy. If the reported LLC occupancy
- * is below the threshold clear the busy bit and decrement the count.
- * If the busy count gets to zero on an RMID we stop looking.
- * This can be called from an IPI.
- * We need an atomic for the busy count because multiple CPUs may check
- * the same RMID at the same time.
- */
-static bool __check_limbo(struct rdt_domain *d)
-{
-	struct rmid_entry *entry;
-	u64 val;
-
-	list_for_each_entry(entry, &rmid_limbo_lru, list) {
-		if (!test_bit(entry->rmid, d->rmid_busy_llc))
-			continue;
-		val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-		if (val <= intel_cqm_threshold) {
-			clear_bit(entry->rmid, d->rmid_busy_llc);
-			if (atomic_dec_and_test(&entry->busy))
-				return true;
-		}
-	}
-	return false;
-}
-
-static void check_limbo(void *arg)
+static bool rmid_dirty(struct rmid_entry *entry)
 {
-	struct rdt_domain *d;
-
-	d = get_domain_from_cpu(smp_processor_id(),
-				&rdt_resources_all[RDT_RESOURCE_L3]);
-
-	if (d)
-		__check_limbo(d);
-}
+	u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
 
-static bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
-{
-	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+	return val >= intel_cqm_threshold;
 }
 
 /*
- * Scan the limbo list and move all entries that are below the
- * intel_cqm_threshold to the free list.
- * Return "true" if the limbo list is empty, "false" if there are
- * still some RMIDs there.
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
  */
-static bool try_freeing_limbo_rmid(void)
+void __check_limbo(struct rdt_domain *d, bool force_free)
 {
-	struct rmid_entry *entry, *tmp;
+	struct rmid_entry *entry;
 	struct rdt_resource *r;
-	cpumask_var_t cpu_mask;
-	struct rdt_domain *d;
-	bool ret = true;
-	int cpu;
-
-	if (list_empty(&rmid_limbo_lru))
-		return ret;
+	u32 crmid = 1, nrmid;
 
 	r = &rdt_resources_all[RDT_RESOURCE_L3];
 
-	cpu = get_cpu();
-
 	/*
-	 * First see if we can free up an RMID by checking busy values
-	 * on the local package.
+	 * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+	 * are marked as busy for occupancy < threshold. If the occupancy
+	 * is less than the threshold decrement the busy counter of the
+	 * RMID and move it to the free list when the counter reaches 0.
 	 */
-	d = get_domain_from_cpu(cpu, r);
-	if (d && has_busy_rmid(r, d) && __check_limbo(d)) {
-		list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
-			if (atomic_read(&entry->busy) == 0) {
-				list_del(&entry->list);
+	for (;;) {
+		nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
+		if (nrmid >= r->num_rmid)
+			break;
+
+		entry = __rmid_entry(nrmid);
+		if (force_free || !rmid_dirty(entry)) {
+			clear_bit(entry->rmid, d->rmid_busy_llc);
+			if (!--entry->busy) {
+				rmid_limbo_count--;
 				list_add_tail(&entry->list, &rmid_free_lru);
-				goto done;
 			}
 		}
+		crmid = nrmid + 1;
 	}
+}
 
-	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) {
-		ret = false;
-		goto done;
-	}
-
-	/*
-	 * Build a mask of other domains that have busy RMIDs
-	 */
-	list_for_each_entry(d, &r->domains, list) {
-		if (!cpumask_test_cpu(cpu, &d->cpu_mask) &&
-		    has_busy_rmid(r, d))
-			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-	}
-	if (cpumask_empty(cpu_mask)) {
-		ret = false;
-		goto free_mask;
-	}
-
-	/*
-	 * Scan domains with busy RMIDs to check if they still are busy
-	 */
-	on_each_cpu_mask(cpu_mask, check_limbo, NULL, true);
-
-	/* Walk limbo list moving all free RMIDs to the &rmid_free_lru list */
-	list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
-		if (atomic_read(&entry->busy) != 0) {
-			ret = false;
-			continue;
-		}
-		list_del(&entry->list);
-		list_add_tail(&entry->list, &rmid_free_lru);
-	}
-
-free_mask:
-	free_cpumask_var(cpu_mask);
-done:
-	put_cpu();
-	return ret;
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+{
+	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
 }
 
 /*
@@ -231,15 +160,11 @@ static bool try_freeing_limbo_rmid(void)
 int alloc_rmid(void)
 {
 	struct rmid_entry *entry;
-	bool ret;
 
 	lockdep_assert_held(&rdtgroup_mutex);
 
-	if (list_empty(&rmid_free_lru)) {
-		ret = try_freeing_limbo_rmid();
-		if (list_empty(&rmid_free_lru))
-			return ret ? -ENOSPC : -EBUSY;
-	}
+	if (list_empty(&rmid_free_lru))
+		return rmid_limbo_count ? -EBUSY : -ENOSPC;
 
 	entry = list_first_entry(&rmid_free_lru,
 				 struct rmid_entry, list);
@@ -252,11 +177,12 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
 	struct rdt_resource *r;
 	struct rdt_domain *d;
-	int cpu, nbusy = 0;
+	int cpu;
 	u64 val;
 
 	r = &rdt_resources_all[RDT_RESOURCE_L3];
 
+	entry->busy = 0;
 	cpu = get_cpu();
 	list_for_each_entry(d, &r->domains, list) {
 		if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
@@ -264,17 +190,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 			if (val <= intel_cqm_threshold)
 				continue;
 		}
+
+		/*
+		 * For the first limbo RMID in the domain,
+		 * setup up the limbo worker.
+		 */
+		if (!has_busy_rmid(r, d))
+			cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
 		set_bit(entry->rmid, d->rmid_busy_llc);
-		nbusy++;
+		entry->busy++;
 	}
 	put_cpu();
 
-	if (nbusy) {
-		atomic_set(&entry->busy, nbusy);
-		list_add_tail(&entry->list, &rmid_limbo_lru);
-	} else {
+	if (entry->busy)
+		rmid_limbo_count++;
+	else
 		list_add_tail(&entry->list, &rmid_free_lru);
-	}
 }
 
 void free_rmid(u32 rmid)
@@ -387,6 +318,50 @@ static void mbm_update(struct rdt_domain *d, int rmid)
 	}
 }
 
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+	int cpu = smp_processor_id();
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+
+	mutex_lock(&rdtgroup_mutex);
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+	d = get_domain_from_cpu(cpu, r);
+
+	if (!d) {
+		pr_warn_once("Failure to get domain for limbo worker\n");
+		goto out_unlock;
+	}
+
+	__check_limbo(d, false);
+
+	if (has_busy_rmid(r, d))
+		schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+
+out_unlock:
+	mutex_unlock(&rdtgroup_mutex);
+}
+
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+	unsigned long delay = msecs_to_jiffies(delay_ms);
+	struct rdt_resource *r;
+	int cpu;
+
+	r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+	cpu = cpumask_any(&dom->cpu_mask);
+	dom->cqm_work_cpu = cpu;
+
+	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
 void mbm_handle_overflow(struct work_struct *work)
 {
 	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
@@ -413,6 +388,7 @@ void mbm_handle_overflow(struct work_struct *work)
 	}
 
 	schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+
 out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }