diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e99ea816d8c91487f28ffce6ac5dcd72e2ed842c..cdf46bc6dcc4f052ab4e30a88cd5f0a7915d2a01 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -100,28 +100,32 @@ extern int radeon_lockup_timeout; * Copy from radeon_drv.h so we don't have to include both and have conflicting * symbol; */ -#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ -#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) +#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ +#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* RADEON_IB_POOL_SIZE must be a power of 2 */ -#define RADEON_IB_POOL_SIZE 16 -#define RADEON_DEBUGFS_MAX_COMPONENTS 32 -#define RADEONFB_CONN_LIMIT 4 -#define RADEON_BIOS_NUM_SCRATCH 8 +#define RADEON_IB_POOL_SIZE 16 +#define RADEON_DEBUGFS_MAX_COMPONENTS 32 +#define RADEONFB_CONN_LIMIT 4 +#define RADEON_BIOS_NUM_SCRATCH 8 /* max number of rings */ -#define RADEON_NUM_RINGS 3 +#define RADEON_NUM_RINGS 3 + +/* fence seq are set to this number when signaled */ +#define RADEON_FENCE_SIGNALED_SEQ 0LL +#define RADEON_FENCE_NOTEMITED_SEQ (~0LL) /* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0 /* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2 /* hardcode those limit for now */ -#define RADEON_VA_RESERVED_SIZE (8 << 20) -#define RADEON_IB_VM_MAX_SIZE (64 << 10) +#define RADEON_VA_RESERVED_SIZE (8 << 20) +#define RADEON_IB_VM_MAX_SIZE (64 << 10) /* * Errata workarounds. @@ -254,8 +258,9 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; - atomic_t seq; - uint32_t last_seq; + /* seq is protected by ring emission lock */ + uint64_t seq; + atomic64_t last_seq; unsigned long last_activity; wait_queue_head_t queue; struct list_head emitted; @@ -268,11 +273,9 @@ struct radeon_fence { struct kref kref; struct list_head list; /* protected by radeon_fence.lock */ - uint32_t seq; - bool emitted; - bool signaled; + uint64_t seq; /* RB, DMA, etc. */ - int ring; + unsigned ring; struct radeon_semaphore *semaphore; }; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 5bb78bf547ea6eed5aa4d46543f9e524b0700631..feb2bbc6ef6d74525b425163a5b9d3c22a5b26b1 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -66,14 +66,14 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) unsigned long irq_flags; write_lock_irqsave(&rdev->fence_lock, irq_flags); - if (fence->emitted) { + if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; } - fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); + /* we are protected by the ring emission mutex */ + fence->seq = ++rdev->fence_drv[fence->ring].seq; radeon_fence_ring_emit(rdev, fence->ring, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); - fence->emitted = true; /* are we the first fence on a previusly idle ring? */ if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { rdev->fence_drv[fence->ring].last_activity = jiffies; @@ -87,14 +87,60 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) { struct radeon_fence *fence; struct list_head *i, *n; - uint32_t seq; + uint64_t seq, last_seq; + unsigned count_loop = 0; bool wake = false; - seq = radeon_fence_read(rdev, ring); - if (seq == rdev->fence_drv[ring].last_seq) - return false; + /* Note there is a scenario here for an infinite loop but it's + * very unlikely to happen. For it to happen, the current polling + * process need to be interrupted by another process and another + * process needs to update the last_seq btw the atomic read and + * xchg of the current process. + * + * More over for this to go in infinite loop there need to be + * continuously new fence signaled ie radeon_fence_read needs + * to return a different value each time for both the currently + * polling process and the other process that xchg the last_seq + * btw atomic read and xchg of the current process. And the + * value the other process set as last seq must be higher than + * the seq value we just read. Which means that current process + * need to be interrupted after radeon_fence_read and before + * atomic xchg. + * + * To be even more safe we count the number of time we loop and + * we bail after 10 loop just accepting the fact that we might + * have temporarly set the last_seq not to the true real last + * seq but to an older one. + */ + last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); + do { + seq = radeon_fence_read(rdev, ring); + seq |= last_seq & 0xffffffff00000000LL; + if (seq < last_seq) { + seq += 0x100000000LL; + } - rdev->fence_drv[ring].last_seq = seq; + if (!wake && seq == last_seq) { + return false; + } + /* If we loop over we don't want to return without + * checking if a fence is signaled as it means that the + * seq we just read is different from the previous on. + */ + wake = true; + if ((count_loop++) > 10) { + /* We looped over too many time leave with the + * fact that we might have set an older fence + * seq then the current real last seq as signaled + * by the hw. + */ + break; + } + last_seq = seq; + } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); + + /* reset wake to false */ + wake = false; rdev->fence_drv[ring].last_activity = jiffies; n = NULL; @@ -112,7 +158,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) n = i->prev; list_move_tail(i, &rdev->fence_drv[ring].signaled); fence = list_entry(i, struct radeon_fence, list); - fence->signaled = true; + fence->seq = RADEON_FENCE_SIGNALED_SEQ; i = n; } while (i != &rdev->fence_drv[ring].emitted); wake = true; @@ -128,7 +174,7 @@ static void radeon_fence_destroy(struct kref *kref) fence = container_of(kref, struct radeon_fence, kref); write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); list_del(&fence->list); - fence->emitted = false; + fence->seq = RADEON_FENCE_NOTEMITED_SEQ; write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); if (fence->semaphore) radeon_semaphore_free(fence->rdev, fence->semaphore); @@ -145,9 +191,7 @@ int radeon_fence_create(struct radeon_device *rdev, } kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->emitted = false; - (*fence)->signaled = false; - (*fence)->seq = 0; + (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; (*fence)->ring = ring; (*fence)->semaphore = NULL; INIT_LIST_HEAD(&(*fence)->list); @@ -163,18 +207,18 @@ bool radeon_fence_signaled(struct radeon_fence *fence) return true; write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); - signaled = fence->signaled; + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); /* if we are shuting down report all fence as signaled */ if (fence->rdev->shutdown) { signaled = true; } - if (!fence->emitted) { + if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { WARN(1, "Querying an unemitted fence : %p !\n", fence); signaled = true; } if (!signaled) { radeon_fence_poll_locked(fence->rdev, fence->ring); - signaled = fence->signaled; + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); } write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); return signaled; @@ -183,8 +227,8 @@ bool radeon_fence_signaled(struct radeon_fence *fence) int radeon_fence_wait(struct radeon_fence *fence, bool intr) { struct radeon_device *rdev; - unsigned long irq_flags, timeout; - u32 seq; + unsigned long irq_flags, timeout, last_activity; + uint64_t seq; int i, r; bool signaled; @@ -207,7 +251,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) timeout = 1; } /* save current sequence value used to check for GPU lockups */ - seq = rdev->fence_drv[fence->ring].last_seq; + seq = atomic64_read(&rdev->fence_drv[fence->ring].last_seq); + /* Save current last activity valuee, used to check for GPU lockups */ + last_activity = rdev->fence_drv[fence->ring].last_activity; read_unlock_irqrestore(&rdev->fence_lock, irq_flags); trace_radeon_fence_wait_begin(rdev->ddev, seq); @@ -235,24 +281,23 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) } write_lock_irqsave(&rdev->fence_lock, irq_flags); - /* check if sequence value has changed since last_activity */ - if (seq != rdev->fence_drv[fence->ring].last_seq) { + /* test if somebody else has already decided that this is a lockup */ + if (last_activity != rdev->fence_drv[fence->ring].last_activity) { write_unlock_irqrestore(&rdev->fence_lock, irq_flags); continue; } - /* change sequence value on all rings, so nobody else things there is a lockup */ - for (i = 0; i < RADEON_NUM_RINGS; ++i) - rdev->fence_drv[i].last_seq -= 0x10000; - - rdev->fence_drv[fence->ring].last_activity = jiffies; write_unlock_irqrestore(&rdev->fence_lock, irq_flags); if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { - /* good news we believe it's a lockup */ - printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", - fence->seq, seq); + dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n", + fence->seq, seq); + + /* change last activity so nobody else think there is a lockup */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + rdev->fence_drv[i].last_activity = jiffies; + } /* mark the ring as not ready any more */ rdev->ring[fence->ring].ready = false; @@ -387,9 +432,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; - radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring); + radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); rdev->fence_drv[ring].initialized = true; - DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", + DRM_INFO("fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; @@ -400,7 +445,8 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; - atomic_set(&rdev->fence_drv[ring].seq, 0); + rdev->fence_drv[ring].seq = 0; + atomic64_set(&rdev->fence_drv[ring].last_seq, 0); INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); init_waitqueue_head(&rdev->fence_drv[ring].queue); @@ -458,12 +504,12 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) continue; seq_printf(m, "--- ring %d ---\n", i); - seq_printf(m, "Last signaled fence 0x%08X\n", - radeon_fence_read(rdev, i)); + seq_printf(m, "Last signaled fence 0x%016lx\n", + atomic64_read(&rdev->fence_drv[i].last_seq)); if (!list_empty(&rdev->fence_drv[i].emitted)) { fence = list_entry(rdev->fence_drv[i].emitted.prev, struct radeon_fence, list); - seq_printf(m, "Last emitted fence %p with 0x%08X\n", + seq_printf(m, "Last emitted fence %p with 0x%016llx\n", fence, fence->seq); } } diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index a4d60ae524b16b447eef91cf6ee4164d63ddf38f..4ae222bb3ec53279d2b8e3d84ff4ea2091e1e0d6 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -82,7 +82,7 @@ bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib) bool done = false; /* only free ib which have been emited */ - if (ib->fence && ib->fence->emitted) { + if (ib->fence && ib->fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { if (radeon_fence_signaled(ib->fence)) { radeon_fence_unref(&ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo); @@ -149,8 +149,9 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, /* this should be rare event, ie all ib scheduled none signaled yet. */ for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { - if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) { - r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false); + struct radeon_fence *fence = rdev->ib_pool.ibs[idx].fence; + if (fence && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { + r = radeon_fence_wait(fence, false); if (!r) { goto retry; } @@ -173,7 +174,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib) return; } radeon_mutex_lock(&rdev->ib_pool.mutex); - if (tmp->fence && !tmp->fence->emitted) { + if (tmp->fence && tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { radeon_sa_bo_free(rdev, &tmp->sa_bo); radeon_fence_unref(&tmp->fence); }