diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index d79db3698f16699e5edd3a9432357cb7b1cc10f5..89e36ee057c8b4421993f2fae50a9a1bf4efa9df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -47,12 +47,23 @@ nouveau_debugfs_channel_info(struct seq_file *m, void *data) seq_printf(m, " cur: 0x%08x\n", chan->dma.cur << 2); seq_printf(m, " put: 0x%08x\n", chan->dma.put << 2); seq_printf(m, " free: 0x%08x\n", chan->dma.free << 2); + if (chan->dma.ib_max) { + seq_printf(m, " ib max: 0x%08x\n", chan->dma.ib_max); + seq_printf(m, " ib put: 0x%08x\n", chan->dma.ib_put); + seq_printf(m, " ib free: 0x%08x\n", chan->dma.ib_free); + } seq_printf(m, "gpu fifo state:\n"); seq_printf(m, " get: 0x%08x\n", nvchan_rd32(chan, chan->user_get)); seq_printf(m, " put: 0x%08x\n", nvchan_rd32(chan, chan->user_put)); + if (chan->dma.ib_max) { + seq_printf(m, " ib get: 0x%08x\n", + nvchan_rd32(chan, 0x88)); + seq_printf(m, " ib put: 0x%08x\n", + nvchan_rd32(chan, 0x8c)); + } seq_printf(m, "last fence : %d\n", chan->fence.sequence); seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack); diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 50d9e67745af1e7d1c5872f39db736d2c76dfe05..b9c80bb1725032ea528b4c92db7fe1aeb294e70d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -32,7 +32,22 @@ void nouveau_dma_pre_init(struct nouveau_channel *chan) { - chan->dma.max = (chan->pushbuf_bo->bo.mem.size >> 2) - 2; + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; + struct nouveau_bo *pushbuf = chan->pushbuf_bo; + + if (dev_priv->card_type == NV_50) { + const int ib_size = pushbuf->bo.mem.size / 2; + + chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2; + chan->dma.ib_max = (ib_size / 8) - 1; + chan->dma.ib_put = 0; + chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put; + + chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2; + } else { + chan->dma.max = (pushbuf->bo.mem.size >> 2) - 2; + } + chan->dma.put = 0; chan->dma.cur = chan->dma.put; chan->dma.free = chan->dma.max - chan->dma.cur; @@ -162,12 +177,101 @@ READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout) return (val - chan->pushbuf_base) >> 2; } +void +nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, + int delta, int dwords) +{ + struct nouveau_bo *pb = chan->pushbuf_bo; + uint64_t offset = (bo->bo.mem.mm_node->start << PAGE_SHIFT) + delta; + int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; + + BUG_ON(chan->dma.ib_free < 1); + nouveau_bo_wr32(pb, ip++, offset); + nouveau_bo_wr32(pb, ip++, dwords << 10); + + chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; + nvchan_wr32(chan, 0x8c, chan->dma.ib_put); + chan->dma.ib_free--; +} + +static int +nv50_dma_push_wait(struct nouveau_channel *chan, int count) +{ + uint32_t cnt = 0, prev_get = 0; + + while (chan->dma.ib_free < count) { + uint32_t get = nvchan_rd32(chan, 0x88); + if (get != prev_get) { + prev_get = get; + cnt = 0; + } + + if ((++cnt & 0xff) == 0) { + DRM_UDELAY(1); + if (cnt > 100000) + return -EBUSY; + } + + chan->dma.ib_free = get - chan->dma.ib_put; + if (chan->dma.ib_free <= 0) + chan->dma.ib_free += chan->dma.ib_max + 1; + } + + return 0; +} + +static int +nv50_dma_wait(struct nouveau_channel *chan, int slots, int count) +{ + uint32_t cnt = 0, prev_get = 0; + int ret; + + ret = nv50_dma_push_wait(chan, slots + 1); + if (unlikely(ret)) + return ret; + + while (chan->dma.free < count) { + int get = READ_GET(chan, &prev_get, &cnt); + if (unlikely(get < 0)) { + if (get == -EINVAL) + continue; + + return get; + } + + if (get <= chan->dma.cur) { + chan->dma.free = chan->dma.max - chan->dma.cur; + if (chan->dma.free >= count) + break; + + FIRE_RING(chan); + do { + get = READ_GET(chan, &prev_get, &cnt); + if (unlikely(get < 0)) { + if (get == -EINVAL) + continue; + return get; + } + } while (get == 0); + chan->dma.cur = 0; + chan->dma.put = 0; + } + + chan->dma.free = get - chan->dma.cur - 1; + } + + return 0; +} + int -nouveau_dma_wait(struct nouveau_channel *chan, int size) +nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) { uint32_t prev_get = 0, cnt = 0; int get; + if (chan->dma.ib_max) + return nv50_dma_wait(chan, slots, size); + while (chan->dma.free < size) { get = READ_GET(chan, &prev_get, &cnt); if (unlikely(get == -EBUSY)) diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index dabfd655f93ec84e0a7f74a27f031c87c6234bfb..da6e16dafa4dfefee73a6d030071128941da6b37 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -31,6 +31,9 @@ #define NOUVEAU_DMA_DEBUG 0 #endif +void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *, + int delta, int dwords); + /* * There's a hw race condition where you can't jump to your PUT offset, * to avoid this we jump to offset + SKIPS and fill the difference with @@ -96,13 +99,11 @@ enum { static __must_check inline int RING_SPACE(struct nouveau_channel *chan, int size) { - if (chan->dma.free < size) { - int ret; + int ret; - ret = nouveau_dma_wait(chan, size); - if (ret) - return ret; - } + ret = nouveau_dma_wait(chan, 1, size); + if (ret) + return ret; chan->dma.free -= size; return 0; @@ -146,7 +147,13 @@ FIRE_RING(struct nouveau_channel *chan) return; chan->accel_done = true; - WRITE_PUT(chan->dma.cur); + if (chan->dma.ib_max) { + nv50_dma_push(chan, chan->pushbuf_bo, chan->dma.put << 2, + chan->dma.cur - chan->dma.put); + } else { + WRITE_PUT(chan->dma.cur); + } + chan->dma.put = chan->dma.cur; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 52cc13bd02b9e6527fa3d4a2c14a9d0180f33c70..d221044e079321a7cb6d5e0ddf2ec8a0f1f7a26f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -239,6 +239,11 @@ struct nouveau_channel { int cur; int put; /* access via pushbuf_bo */ + + int ib_base; + int ib_max; + int ib_free; + int ib_put; } dma; uint32_t sw_subchannel[8]; @@ -848,7 +853,7 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan) /* nouveau_dma.c */ extern void nouveau_dma_pre_init(struct nouveau_channel *); extern int nouveau_dma_init(struct nouveau_channel *); -extern int nouveau_dma_wait(struct nouveau_channel *, int size); +extern int nouveau_dma_wait(struct nouveau_channel *, int slots, int size); /* nouveau_acpi.c */ #ifdef CONFIG_ACPI diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 70cc30803e3bec1f28d4143faf1fc7754efb7986..986b67099f6c9d7072001b46675b001fc9c9d65d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -707,7 +707,7 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data, uint32_t retaddy; if (chan->dma.free < 4 + NOUVEAU_DMA_SKIPS) { - ret = nouveau_dma_wait(chan, 4 + NOUVEAU_DMA_SKIPS); + ret = nouveau_dma_wait(chan, 0, 4 + NOUVEAU_DMA_SKIPS); if (ret) { NV_ERROR(dev, "jmp_space: %d\n", ret); goto out; @@ -754,6 +754,15 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data, } } + if (chan->dma.ib_max) { + ret = nouveau_dma_wait(chan, 2, 6); + if (ret) { + NV_INFO(dev, "nv50cal_space: %d\n", ret); + goto out; + } + + nv50_dma_push(chan, pbbo, req->offset, req->nr_dwords); + } else if (PUSHBUF_CAL) { ret = RING_SPACE(chan, 2); if (ret) { @@ -792,6 +801,10 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data, kfree(bo); out_next: + if (chan->dma.ib_max) { + req->suffix0 = 0x00000000; + req->suffix1 = 0x00000000; + } else if (PUSHBUF_CAL) { req->suffix0 = 0x00020000; req->suffix1 = 0x00000000; diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c index 369ecb4cee57f84bd7a94555f5880862dd1ec878..e20c0e2474f3b56035b3c6391b86a17e4c693f0f 100644 --- a/drivers/gpu/drm/nouveau/nv50_fifo.c +++ b/drivers/gpu/drm/nouveau/nv50_fifo.c @@ -283,17 +283,17 @@ nv50_fifo_create_context(struct nouveau_channel *chan) dev_priv->engine.instmem.prepare_access(dev, true); - nv_wo32(dev, ramfc, 0x08/4, chan->pushbuf_base); - nv_wo32(dev, ramfc, 0x10/4, chan->pushbuf_base); nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4); nv_wo32(dev, ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4)); - nv_wo32(dev, ramfc, 0x3c/4, 0x00086078); nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff); nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff); nv_wo32(dev, ramfc, 0x40/4, 0x00000000); nv_wo32(dev, ramfc, 0x7c/4, 0x30000001); nv_wo32(dev, ramfc, 0x78/4, 0x00000000); - nv_wo32(dev, ramfc, 0x4c/4, 0xffffffff); + nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078); + nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base + + chan->dma.ib_base * 4); + nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16); if (!IS_G80) { nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);