drm/radeon: multiple ring allocator v3

A startover with a new idea for a multiple ring allocator. Should perform as well as a normal ring allocator as long as only one ring does somthing, but falls back to a more complex algorithm if more complex things start to happen. We store the last allocated bo in last, we always try to allocate after the last allocated bo. Principle is that in a linear GPU ring progression was is after last is the oldest bo we allocated and thus the first one that should no longer be in use by the GPU. If it's not the case we skip over the bo after last to the closest done bo if such one exist. If none exist and we are not asked to block we report failure to allocate. If we are asked to block we wait on all the oldest fence of all rings. We just wait for any of those fence to complete. v2: We need to be able to let hole point to the list_head, otherwise try free will never free the first allocation of the list. Also stop calling radeon_fence_signalled more than necessary. v3: Don't free allocations without considering them as a hole, otherwise we might lose holes. Also return ENOMEM instead of ENOENT when running out of fences to wait for. Limit the number of holes we try for each ring to 3. Signed-off-by: N Christian König <deathsimple@vodafone.de> Signed-off-by: N Jerome Glisse <jglisse@redhat.com> Signed-off-by: N Dave Airlie <airlied@redhat.com>

drm/radeon: multiple ring allocator v3
A startover with a new idea for a multiple ring allocator. Should perform as well as a normal ring allocator as long as only one ring does somthing, but falls back to a more complex algorithm if more complex things start to happen. We store the last allocated bo in last, we always try to allocate after the last allocated bo. Principle is that in a linear GPU ring progression was is after last is the oldest bo we allocated and thus the first one that should no longer be in use by the GPU. If it's not the case we skip over the bo after last to the closest done bo if such one exist. If none exist and we are not asked to block we report failure to allocate. If we are asked to block we wait on all the oldest fence of all rings. We just wait for any of those fence to complete. v2: We need to be able to let hole point to the list_head, otherwise try free will never free the first allocation of the list. Also stop calling radeon_fence_signalled more than necessary. v3: Don't free allocations without considering them as a hole, otherwise we might lose holes. Also return ENOMEM instead of ENOENT when running out of fences to wait for. Limit the number of holes we try for each ring to 3. Signed-off-by: N Christian König <deathsimple@vodafone.de> Signed-off-by: N Jerome Glisse <jglisse@redhat.com> Signed-off-by: N Dave Airlie <airlied@redhat.com>
c3b7fe8b · Christian König · Dave Airlie · 0085c950 · c3b7fe8b · c3b7fe8b
3 changed file
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -385,7 +385,9 @@ struct radeon_bo_list {
 struct radeon_sa_manager {
 	spinlock_t		lock;
 	struct radeon_bo	*bo;
-	struct list_head	sa_bo;
+	struct list_head	*hole;
+	struct list_head	flist[RADEON_NUM_RINGS];
+	struct list_head	olist;
 	unsigned		size;
 	uint64_t		gpu_addr;
 	void			*cpu_ptr;
@@ -396,7 +398,8 @@ struct radeon_sa_bo;
 /* sub-allocation buffer */
 struct radeon_sa_bo {
-	struct list_head		list;
+	struct list_head		olist;
+	struct list_head		flist;
 	struct radeon_sa_manager	*manager;
 	unsigned			soffset;
 	unsigned			eoffset;

--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-	struct radeon_sa_manager tmp;
 	int i, r;
-	r = radeon_sa_bo_manager_init(rdev, &tmp,
-				      RADEON_IB_POOL_SIZE*64*1024,
-				      RADEON_GEM_DOMAIN_GTT);
-	if (r) {
-		return r;
-	}
 	radeon_mutex_lock(&rdev->ib_pool.mutex);
 	if (rdev->ib_pool.ready) {
 		radeon_mutex_unlock(&rdev->ib_pool.mutex);
-		radeon_sa_bo_manager_fini(rdev, &tmp);
 		return 0;
 	}
-	rdev->ib_pool.sa_manager = tmp;
+	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
-	INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
+				      RADEON_IB_POOL_SIZE*64*1024,
+				      RADEON_GEM_DOMAIN_GTT);
+	if (r) {
+		radeon_mutex_unlock(&rdev->ib_pool.mutex);
+		return r;
+	}
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		rdev->ib_pool.ibs[i].fence = NULL;
 		rdev->ib_pool.ibs[i].idx = i;

--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,21 +27,42 @@
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 */
+/* Algorithm:
+ *
+ * We store the last allocated bo in "hole", we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
 #include "drmP.h"
 #include "drm.h"
 #include "radeon.h"
+static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
 int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 			      struct radeon_sa_manager *sa_manager,
 			      unsigned size, u32 domain)
 {
-	int r;
+	int i, r;
 	spin_lock_init(&sa_manager->lock);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
 	sa_manager->domain = domain;
-	INIT_LIST_HEAD(&sa_manager->sa_bo);
+	sa_manager->hole = &sa_manager->olist;
+	INIT_LIST_HEAD(&sa_manager->olist);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		INIT_LIST_HEAD(&sa_manager->flist[i]);
+	}
 	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
 			     RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
@@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
 {
 	struct radeon_sa_bo *sa_bo, *tmp;
-	if (!list_empty(&sa_manager->sa_bo)) {
+	if (!list_empty(&sa_manager->olist)) {
-		dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+		sa_manager->hole = &sa_manager->olist,
+		radeon_sa_bo_try_free(sa_manager);
+		if (!list_empty(&sa_manager->olist)) {
+			dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+		}
 	}
-	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
+	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
-		list_del_init(&sa_bo->list);
+		radeon_sa_bo_remove_locked(sa_bo);
 	}
 	radeon_bo_unref(&sa_manager->bo);
 	sa_manager->size = 0;
@@ -114,111 +139,203 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
 	return r;
 }
-/*
- * Principe is simple, we keep a list of sub allocation in offset
- * order (first entry has offset == 0, last entry has the highest
- * offset).
- *
- * When allocating new object we first check if there is room at
- * the end total_size - (last_object_offset + last_object_size) >=
- * alloc_size. If so we allocate new object there.
- *
- * When there is not enough room at the end, we start waiting for
- * each sub object until we reach object_offset+object_size >=
- * alloc_size, this object then become the sub object we return.
- *
- * Alignment can't be bigger than page size
- */
 static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
 {
-	list_del(&sa_bo->list);
+	struct radeon_sa_manager *sa_manager = sa_bo->manager;
+	if (sa_manager->hole == &sa_bo->olist) {
+		sa_manager->hole = sa_bo->olist.prev;
+	}
+	list_del_init(&sa_bo->olist);
+	list_del_init(&sa_bo->flist);
 	radeon_fence_unref(&sa_bo->fence);
 	kfree(sa_bo);
 }
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
+{
+	struct radeon_sa_bo *sa_bo, *tmp;
+	if (sa_manager->hole->next == &sa_manager->olist)
+		return;
+	sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist);
+	list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
+		if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
+			return;
+		}
+		radeon_sa_bo_remove_locked(sa_bo);
+	}
+}
+static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager)
+{
+	struct list_head *hole = sa_manager->hole;
+	if (hole != &sa_manager->olist) {
+		return list_entry(hole, struct radeon_sa_bo, olist)->eoffset;
+	}
+	return 0;
+}
+static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager)
+{
+	struct list_head *hole = sa_manager->hole;
+	if (hole->next != &sa_manager->olist) {
+		return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset;
+	}
+	return sa_manager->size;
+}
+static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
+				   struct radeon_sa_bo *sa_bo,
+				   unsigned size, unsigned align)
+{
+	unsigned soffset, eoffset, wasted;
+	soffset = radeon_sa_bo_hole_soffset(sa_manager);
+	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
+	wasted = (align - (soffset % align)) % align;
+	if ((eoffset - soffset) >= (size + wasted)) {
+		soffset += wasted;
+		sa_bo->manager = sa_manager;
+		sa_bo->soffset = soffset;
+		sa_bo->eoffset = soffset + size;
+		list_add(&sa_bo->olist, sa_manager->hole);
+		INIT_LIST_HEAD(&sa_bo->flist);
+		sa_manager->hole = &sa_bo->olist;
+		return true;
+	}
+	return false;
+}
+static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
+				   struct radeon_fence **fences,
+				   unsigned *tries)
+{
+	struct radeon_sa_bo *best_bo = NULL;
+	unsigned i, soffset, best, tmp;
+	/* if hole points to the end of the buffer */
+	if (sa_manager->hole->next == &sa_manager->olist) {
+		/* try again with its beginning */
+		sa_manager->hole = &sa_manager->olist;
+		return true;
+	}
+	soffset = radeon_sa_bo_hole_soffset(sa_manager);
+	/* to handle wrap around we add sa_manager->size */
+	best = sa_manager->size * 2;
+	/* go over all fence list and try to find the closest sa_bo
+	 * of the current last
+	 */
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_sa_bo *sa_bo;
+		if (list_empty(&sa_manager->flist[i])) {
+			continue;
+		}
+		sa_bo = list_first_entry(&sa_manager->flist[i],
+					 struct radeon_sa_bo, flist);
+		if (!radeon_fence_signaled(sa_bo->fence)) {
+			fences[i] = sa_bo->fence;
+			continue;
+		}
+		/* limit the number of tries each ring gets */
+		if (tries[i] > 2) {
+			continue;
+		}
+		tmp = sa_bo->soffset;
+		if (tmp < soffset) {
+			/* wrap around, pretend it's after */
+			tmp += sa_manager->size;
+		}
+		tmp -= soffset;
+		if (tmp < best) {
+			/* this sa bo is the closest one */
+			best = tmp;
+			best_bo = sa_bo;
+		}
+	}
+	if (best_bo) {
+		++tries[best_bo->fence->ring];
+		sa_manager->hole = best_bo->olist.prev;
+		/* we knew that this one is signaled,
+		   so it's save to remote it */
+		radeon_sa_bo_remove_locked(best_bo);
+		return true;
+	}
+	return false;
+}
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo **sa_bo,
 		     unsigned size, unsigned align, bool block)
 {
-	struct radeon_fence *fence = NULL;
+	struct radeon_fence *fences[RADEON_NUM_RINGS];
-	struct radeon_sa_bo *tmp, *next;
+	unsigned tries[RADEON_NUM_RINGS];
-	struct list_head *head;
+	int i, r = -ENOMEM;
-	unsigned offset = 0, wasted = 0;
-	int r;
 	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
 	BUG_ON(size > sa_manager->size);
 	*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
+	if ((*sa_bo) == NULL) {
-retry:
+		return -ENOMEM;
+	}
+	(*sa_bo)->manager = sa_manager;
+	(*sa_bo)->fence = NULL;
+	INIT_LIST_HEAD(&(*sa_bo)->olist);
+	INIT_LIST_HEAD(&(*sa_bo)->flist);
 	spin_lock(&sa_manager->lock);
+	do {
+		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+			fences[i] = NULL;
+			tries[i] = 0;
+		}
-	/* no one ? */
+		do {
-	head = sa_manager->sa_bo.prev;
+			radeon_sa_bo_try_free(sa_manager);
-	if (list_empty(&sa_manager->sa_bo)) {
-		goto out;
-	}
-	/* look for a hole big enough */
+			if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
-	offset = 0;
+						   size, align)) {
-	list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {
+				spin_unlock(&sa_manager->lock);
-		/* try to free this object */
+				return 0;
-		if (tmp->fence) {
-			if (radeon_fence_signaled(tmp->fence)) {
-				radeon_sa_bo_remove_locked(tmp);
-				continue;
-			} else {
-				fence = tmp->fence;
 			}
-		}
-		/* room before this object ? */
+			/* see if we can skip over some allocations */
-		if (offset < tmp->soffset && (tmp->soffset - offset) >= size) {
+		} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
-			head = tmp->list.prev;
-			goto out;
+		if (block) {
-		}
+			spin_unlock(&sa_manager->lock);
-		offset = tmp->eoffset;
+			r = radeon_fence_wait_any(rdev, fences, false);
-		wasted = offset % align;
+			spin_lock(&sa_manager->lock);
-		if (wasted) {
+			if (r) {
-			wasted = align - wasted;
+				/* if we have nothing to wait for we
-		}
+				   are practically out of memory */
-		offset += wasted;
+				if (r == -ENOENT) {
-	}
+					r = -ENOMEM;
-	/* room at the end ? */
+				}
-	head = sa_manager->sa_bo.prev;
+				goto out_err;
-	tmp = list_entry(head, struct radeon_sa_bo, list);
+			}
-	offset = tmp->eoffset;
-	wasted = offset % align;
-	if (wasted) {
-		wasted = align - wasted;
-	}
-	offset += wasted;
-	if ((sa_manager->size - offset) < size) {
-		/* failed to find somethings big enough */
-		spin_unlock(&sa_manager->lock);
-		if (block && fence) {
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			goto retry;
 		}
-		kfree(*sa_bo);
+	} while (block);
-		*sa_bo = NULL;
-		return -ENOMEM;
-	}
-out:
+out_err:
-	(*sa_bo)->manager = sa_manager;
-	(*sa_bo)->soffset = offset;
-	(*sa_bo)->eoffset = offset + size;
-	list_add(&(*sa_bo)->list, head);
 	spin_unlock(&sa_manager->lock);
-	return 0;
+	kfree(*sa_bo);
+	*sa_bo = NULL;
+	return r;
 }
 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
@@ -226,13 +343,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 {
 	struct radeon_sa_manager *sa_manager;
-	if (!sa_bo || !*sa_bo)
+	if (sa_bo == NULL || *sa_bo == NULL) {
 		return;
+	}
 	sa_manager = (*sa_bo)->manager;
 	spin_lock(&sa_manager->lock);
 	if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
 		(*sa_bo)->fence = radeon_fence_ref(fence);
+		list_add_tail(&(*sa_bo)->flist,
+			      &sa_manager->flist[fence->ring]);
 	} else {
 		radeon_sa_bo_remove_locked(*sa_bo);
 	}
@@ -247,15 +367,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 	struct radeon_sa_bo *i;
 	spin_lock(&sa_manager->lock);
-	list_for_each_entry(i, &sa_manager->sa_bo, list) {
+	list_for_each_entry(i, &sa_manager->olist, olist) {
-		seq_printf(m, "[%08x %08x] size %4d (%p)",
+		if (&i->olist == sa_manager->hole) {
-			   i->soffset, i->eoffset, i->eoffset - i->soffset, i);
+			seq_printf(m, ">");
-		if (i->fence) {
-			seq_printf(m, " protected by %Ld (%p) on ring %d\n",
-				   i->fence->seq, i->fence, i->fence->ring);
 		} else {
-			seq_printf(m, "\n");
+			seq_printf(m, " ");
+		}
+		seq_printf(m, "[0x%08x 0x%08x] size %8d",
+			   i->soffset, i->eoffset, i->eoffset - i->soffset);
+		if (i->fence) {
+			seq_printf(m, " protected by 0x%016llx on ring %d",
+				   i->fence->seq, i->fence->ring);
 		}
+		seq_printf(m, "\n");
 	}
 	spin_unlock(&sa_manager->lock);
 }