diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0301896ac157899552855c34a8a8b64eb9282103..374f9a4d94ef34dacbb05d7e6f683033b161cee7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -242,32 +242,6 @@ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
 
-/*
- * Semaphores.
- */
-struct radeon_ring;
-
-struct radeon_semaphore_driver {
-	rwlock_t		lock;
-	struct list_head	free;
-};
-
-struct radeon_semaphore {
-	struct radeon_bo	*robj;
-	struct list_head	list;
-	uint64_t		gpu_addr;
-};
-
-void radeon_semaphore_driver_fini(struct radeon_device *rdev);
-int radeon_semaphore_create(struct radeon_device *rdev,
-			    struct radeon_semaphore **semaphore);
-void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
-				  struct radeon_semaphore *semaphore);
-void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
-				struct radeon_semaphore *semaphore);
-void radeon_semaphore_free(struct radeon_device *rdev,
-			   struct radeon_semaphore *semaphore);
-
 /*
  * Tiling registers
  */
@@ -386,6 +360,46 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 			     struct drm_device *dev,
 			     uint32_t handle);
 
+/*
+ * Semaphores.
+ */
+struct radeon_ring;
+
+#define	RADEON_SEMAPHORE_BO_SIZE	256
+
+struct radeon_semaphore_driver {
+	rwlock_t			lock;
+	struct list_head		bo;
+};
+
+struct radeon_semaphore_bo;
+
+/* everything here is constant */
+struct radeon_semaphore {
+	struct list_head		list;
+	uint64_t			gpu_addr;
+	uint32_t			*cpu_ptr;
+	struct radeon_semaphore_bo	*bo;
+};
+
+struct radeon_semaphore_bo {
+	struct list_head		list;
+	struct radeon_ib		*ib;
+	struct list_head		free;
+	struct radeon_semaphore		semaphores[RADEON_SEMAPHORE_BO_SIZE/8];
+	unsigned			nused;
+};
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev);
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore);
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+				  struct radeon_semaphore *semaphore);
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+				struct radeon_semaphore *semaphore);
+void radeon_semaphore_free(struct radeon_device *rdev,
+			   struct radeon_semaphore *semaphore);
+
 /*
  * GART structures, functions & helpers
  */
@@ -641,6 +655,7 @@ void r600_blit_suspend(struct radeon_device *rdev);
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
+bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e81c333e0f97fcfa48fc2a872c57af55c3863910..846eeb852e5990e2616eb3d86c7239260c4d9472 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -731,7 +731,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
-	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
+	INIT_LIST_HEAD(&rdev->semaphore_drv.bo);
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index bc844fe86ae6f4d707e4f49a1efb58a22feaac87..465fb34c197bbd30da8fe90c4225286021b51704 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -77,8 +77,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 /*
  * IB.
  */
-static bool radeon_ib_try_free(struct radeon_device *rdev,
-			       struct radeon_ib *ib)
+bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	bool done = false;
 
@@ -148,7 +147,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 	/* this should be rare event, ie all ib scheduled none signaled yet.
 	 */
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		if (rdev->ib_pool.ibs[idx].fence) {
+		if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) {
 			r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
 			if (!r) {
 				goto retry;
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index f8cf04499a5971b0ea57a22b2db12080c2429675..61dd4e3c9209692aaefbd6c4104235300a9d0314 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -31,90 +31,109 @@
 #include "drm.h"
 #include "radeon.h"
 
-static int allocate_semaphores(struct radeon_device *rdev)
+static int radeon_semaphore_add_bo(struct radeon_device *rdev)
 {
-	const unsigned long bo_size = PAGE_SIZE * 4;
-
-	struct radeon_bo *bo;
-	struct list_head new_entrys;
+	struct radeon_semaphore_bo *bo;
 	unsigned long irq_flags;
 	uint64_t gpu_addr;
-	void *map;
-	int i, r;
+	uint32_t *cpu_ptr;
+	int r, i;
 
-	r = radeon_bo_create(rdev, bo_size, RADEON_GPU_PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_GTT, &bo);
-	if (r) {
-		dev_err(rdev->dev, "(%d) failed to allocate semaphore bo\n", r);
-		return r;
-	}
-
-	r = radeon_bo_reserve(bo, false);
-	if (r) {
-		radeon_bo_unref(&bo);
-		dev_err(rdev->dev, "(%d) failed to reserve semaphore bo\n", r);
-		return r;
-	}
 
-	r = radeon_bo_kmap(bo, &map);
-	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
-		dev_err(rdev->dev, "(%d) semaphore map failed\n", r);
-		return r;
+	bo = kmalloc(sizeof(struct radeon_semaphore_bo), GFP_KERNEL);
+	if (bo == NULL) {
+		return -ENOMEM;
 	}
-	memset(map, 0, bo_size);
-	radeon_bo_kunmap(bo);
+	INIT_LIST_HEAD(&bo->free);
+	INIT_LIST_HEAD(&bo->list);
+	bo->nused = 0;
 
-	r = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
+	r = radeon_ib_get(rdev, 0, &bo->ib, RADEON_SEMAPHORE_BO_SIZE);
 	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
-		dev_err(rdev->dev, "(%d) semaphore pin failed\n", r);
+		dev_err(rdev->dev, "failed to get a bo after 5 retry\n");
+		kfree(bo);
 		return r;
 	}
-
-	INIT_LIST_HEAD(&new_entrys);
-	for (i = 0; i < bo_size/8; ++i) {
-		struct radeon_semaphore *sem = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
-		ttm_bo_reference(&bo->tbo);
-		sem->robj = bo;
-		sem->gpu_addr = gpu_addr;
+	gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
+	gpu_addr += bo->ib->sa_bo.offset;
+	cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr;
+	cpu_ptr += (bo->ib->sa_bo.offset >> 2);
+	for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) {
+		bo->semaphores[i].gpu_addr = gpu_addr;
+		bo->semaphores[i].cpu_ptr = cpu_ptr;
+		bo->semaphores[i].bo = bo;
+		list_add_tail(&bo->semaphores[i].list, &bo->free);
 		gpu_addr += 8;
-		list_add_tail(&sem->list, &new_entrys);
+		cpu_ptr += 2;
 	}
-
-	radeon_bo_unreserve(bo);
-	radeon_bo_unref(&bo);
-
 	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	list_splice_tail(&new_entrys, &rdev->semaphore_drv.free);
+	list_add_tail(&bo->list, &rdev->semaphore_drv.bo);
 	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+	return 0;
+}
 
-	DRM_INFO("%d new semaphores allocated\n", (int)(bo_size/8));
+static void radeon_semaphore_del_bo_locked(struct radeon_device *rdev,
+					   struct radeon_semaphore_bo *bo)
+{
+	radeon_sa_bo_free(rdev, &bo->ib->sa_bo);
+	radeon_fence_unref(&bo->ib->fence);
+	list_del(&bo->list);
+	kfree(bo);
+}
 
-	return 0;
+void radeon_semaphore_shrink_locked(struct radeon_device *rdev)
+{
+	struct radeon_semaphore_bo *bo, *n;
+
+	if (list_empty(&rdev->semaphore_drv.bo)) {
+		return;
+	}
+	/* only shrink if first bo has free semaphore */
+	bo = list_first_entry(&rdev->semaphore_drv.bo, struct radeon_semaphore_bo, list);
+	if (list_empty(&bo->free)) {
+		return;
+	}
+	list_for_each_entry_safe_continue(bo, n, &rdev->semaphore_drv.bo, list) {
+		if (bo->nused)
+			continue;
+		radeon_semaphore_del_bo_locked(rdev, bo);
+	}
 }
 
 int radeon_semaphore_create(struct radeon_device *rdev,
 			    struct radeon_semaphore **semaphore)
 {
+	struct radeon_semaphore_bo *bo;
 	unsigned long irq_flags;
+	bool do_retry = true;
+	int r;
 
+retry:
+	*semaphore = NULL;
 	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	if (list_empty(&rdev->semaphore_drv.free)) {
-		int r;
-		write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
-		r = allocate_semaphores(rdev);
-		if (r)
-			return r;
-		write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	list_for_each_entry(bo, &rdev->semaphore_drv.bo, list) {
+		if (list_empty(&bo->free))
+			continue;
+		*semaphore = list_first_entry(&bo->free, struct radeon_semaphore, list);
+		(*semaphore)->cpu_ptr[0] = 0;
+		(*semaphore)->cpu_ptr[1] = 0;
+		list_del(&(*semaphore)->list);
+		bo->nused++;
+		break;
 	}
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
 
-	*semaphore = list_first_entry(&rdev->semaphore_drv.free, struct radeon_semaphore, list);
-	list_del(&(*semaphore)->list);
+	if (*semaphore == NULL) {
+		if (do_retry) {
+			do_retry = false;
+			r = radeon_semaphore_add_bo(rdev);
+			if (r)
+				return r;
+			goto retry;
+		}
+		return -ENOMEM;
+	}
 
-	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
 	return 0;
 }
 
@@ -131,31 +150,29 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 }
 
 void radeon_semaphore_free(struct radeon_device *rdev,
-			  struct radeon_semaphore *semaphore)
+			   struct radeon_semaphore *semaphore)
 {
 	unsigned long irq_flags;
 
 	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	list_add_tail(&semaphore->list, &rdev->semaphore_drv.free);
+	semaphore->bo->nused--;
+	list_add_tail(&semaphore->list, &semaphore->bo->free);
+	radeon_semaphore_shrink_locked(rdev);
 	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
 }
 
 void radeon_semaphore_driver_fini(struct radeon_device *rdev)
 {
-	struct radeon_semaphore *i, *n;
-	struct list_head entrys;
+	struct radeon_semaphore_bo *bo, *n;
 	unsigned long irq_flags;
 
-	INIT_LIST_HEAD(&entrys);
 	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	if (!list_empty(&rdev->semaphore_drv.free)) {
-		list_splice(&rdev->semaphore_drv.free, &entrys);
+	/* we force to free everything */
+	list_for_each_entry_safe(bo, n, &rdev->semaphore_drv.bo, list) {
+		if (!list_empty(&bo->free)) {
+			dev_err(rdev->dev, "still in use semaphore\n");
+		}
+		radeon_semaphore_del_bo_locked(rdev, bo);
 	}
-	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
 	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
-
-	list_for_each_entry_safe(i, n, &entrys, list) {
-		radeon_bo_unref(&i->robj);
-		kfree(i);
-	}
 }