diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index d79db3698f16699e5edd3a9432357cb7b1cc10f5..89e36ee057c8b4421993f2fae50a9a1bf4efa9df 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -47,12 +47,23 @@ nouveau_debugfs_channel_info(struct seq_file *m, void *data)
 	seq_printf(m, "           cur: 0x%08x\n", chan->dma.cur << 2);
 	seq_printf(m, "           put: 0x%08x\n", chan->dma.put << 2);
 	seq_printf(m, "          free: 0x%08x\n", chan->dma.free << 2);
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib max: 0x%08x\n", chan->dma.ib_max);
+		seq_printf(m, "        ib put: 0x%08x\n", chan->dma.ib_put);
+		seq_printf(m, "       ib free: 0x%08x\n", chan->dma.ib_free);
+	}
 
 	seq_printf(m, "gpu fifo state:\n");
 	seq_printf(m, "           get: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_get));
 	seq_printf(m, "           put: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_put));
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib get: 0x%08x\n",
+			   nvchan_rd32(chan, 0x88));
+		seq_printf(m, "        ib put: 0x%08x\n",
+			   nvchan_rd32(chan, 0x8c));
+	}
 
 	seq_printf(m, "last fence    : %d\n", chan->fence.sequence);
 	seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 50d9e67745af1e7d1c5872f39db736d2c76dfe05..b9c80bb1725032ea528b4c92db7fe1aeb294e70d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -32,7 +32,22 @@
 void
 nouveau_dma_pre_init(struct nouveau_channel *chan)
 {
-	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_bo *pushbuf = chan->pushbuf_bo;
+
+	if (dev_priv->card_type == NV_50) {
+		const int ib_size = pushbuf->bo.mem.size / 2;
+
+		chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2;
+		chan->dma.ib_max = (ib_size / 8) - 1;
+		chan->dma.ib_put = 0;
+		chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put;
+
+		chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2;
+	} else {
+		chan->dma.max  = (pushbuf->bo.mem.size >> 2) - 2;
+	}
+
 	chan->dma.put  = 0;
 	chan->dma.cur  = chan->dma.put;
 	chan->dma.free = chan->dma.max - chan->dma.cur;
@@ -162,12 +177,101 @@ READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 	return (val - chan->pushbuf_base) >> 2;
 }
 
+void
+nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
+	      int delta, int dwords)
+{
+	struct nouveau_bo *pb = chan->pushbuf_bo;
+	uint64_t offset = (bo->bo.mem.mm_node->start << PAGE_SHIFT) + delta;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
+	BUG_ON(chan->dma.ib_free < 1);
+	nouveau_bo_wr32(pb, ip++, offset);
+	nouveau_bo_wr32(pb, ip++, dwords << 10);
+
+	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
+	nvchan_wr32(chan, 0x8c, chan->dma.ib_put);
+	chan->dma.ib_free--;
+}
+
+static int
+nv50_dma_push_wait(struct nouveau_channel *chan, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+
+	while (chan->dma.ib_free < count) {
+		uint32_t get = nvchan_rd32(chan, 0x88);
+		if (get != prev_get) {
+			prev_get = get;
+			cnt = 0;
+		}
+
+		if ((++cnt & 0xff) == 0) {
+			DRM_UDELAY(1);
+			if (cnt > 100000)
+				return -EBUSY;
+		}
+
+		chan->dma.ib_free = get - chan->dma.ib_put;
+		if (chan->dma.ib_free <= 0)
+			chan->dma.ib_free += chan->dma.ib_max + 1;
+	}
+
+	return 0;
+}
+
+static int
+nv50_dma_wait(struct nouveau_channel *chan, int slots, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+	int ret;
+
+	ret = nv50_dma_push_wait(chan, slots + 1);
+	if (unlikely(ret))
+		return ret;
+
+	while (chan->dma.free < count) {
+		int get = READ_GET(chan, &prev_get, &cnt);
+		if (unlikely(get < 0)) {
+			if (get == -EINVAL)
+				continue;
+
+			return get;
+		}
+
+		if (get <= chan->dma.cur) {
+			chan->dma.free = chan->dma.max - chan->dma.cur;
+			if (chan->dma.free >= count)
+				break;
+
+			FIRE_RING(chan);
+			do {
+				get = READ_GET(chan, &prev_get, &cnt);
+				if (unlikely(get < 0)) {
+					if (get == -EINVAL)
+						continue;
+					return get;
+				}
+			} while (get == 0);
+			chan->dma.cur = 0;
+			chan->dma.put = 0;
+		}
+
+		chan->dma.free = get - chan->dma.cur - 1;
+	}
+
+	return 0;
+}
+
 int
-nouveau_dma_wait(struct nouveau_channel *chan, int size)
+nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size)
 {
 	uint32_t prev_get = 0, cnt = 0;
 	int get;
 
+	if (chan->dma.ib_max)
+		return nv50_dma_wait(chan, slots, size);
+
 	while (chan->dma.free < size) {
 		get = READ_GET(chan, &prev_get, &cnt);
 		if (unlikely(get == -EBUSY))
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index dabfd655f93ec84e0a7f74a27f031c87c6234bfb..da6e16dafa4dfefee73a6d030071128941da6b37 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,6 +31,9 @@
 #define NOUVEAU_DMA_DEBUG 0
 #endif
 
+void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
+		   int delta, int dwords);
+
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
  * to avoid this we jump to offset + SKIPS and fill the difference with
@@ -96,13 +99,11 @@ enum {
 static __must_check inline int
 RING_SPACE(struct nouveau_channel *chan, int size)
 {
-	if (chan->dma.free < size) {
-		int ret;
+	int ret;
 
-		ret = nouveau_dma_wait(chan, size);
-		if (ret)
-			return ret;
-	}
+	ret = nouveau_dma_wait(chan, 1, size);
+	if (ret)
+		return ret;
 
 	chan->dma.free -= size;
 	return 0;
@@ -146,7 +147,13 @@ FIRE_RING(struct nouveau_channel *chan)
 		return;
 	chan->accel_done = true;
 
-	WRITE_PUT(chan->dma.cur);
+	if (chan->dma.ib_max) {
+		nv50_dma_push(chan, chan->pushbuf_bo, chan->dma.put << 2,
+			      chan->dma.cur - chan->dma.put);
+	} else {
+		WRITE_PUT(chan->dma.cur);
+	}
+
 	chan->dma.put = chan->dma.cur;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 52cc13bd02b9e6527fa3d4a2c14a9d0180f33c70..d221044e079321a7cb6d5e0ddf2ec8a0f1f7a26f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -239,6 +239,11 @@ struct nouveau_channel {
 		int cur;
 		int put;
 		/* access via pushbuf_bo */
+
+		int ib_base;
+		int ib_max;
+		int ib_free;
+		int ib_put;
 	} dma;
 
 	uint32_t sw_subchannel[8];
@@ -848,7 +853,7 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
 /* nouveau_dma.c */
 extern void nouveau_dma_pre_init(struct nouveau_channel *);
 extern int  nouveau_dma_init(struct nouveau_channel *);
-extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
+extern int  nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
 
 /* nouveau_acpi.c */
 #ifdef CONFIG_ACPI
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 70cc30803e3bec1f28d4143faf1fc7754efb7986..986b67099f6c9d7072001b46675b001fc9c9d65d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -707,7 +707,7 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 		uint32_t retaddy;
 
 		if (chan->dma.free < 4 + NOUVEAU_DMA_SKIPS) {
-			ret = nouveau_dma_wait(chan, 4 + NOUVEAU_DMA_SKIPS);
+			ret = nouveau_dma_wait(chan, 0, 4 + NOUVEAU_DMA_SKIPS);
 			if (ret) {
 				NV_ERROR(dev, "jmp_space: %d\n", ret);
 				goto out;
@@ -754,6 +754,15 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 		}
 	}
 
+	if (chan->dma.ib_max) {
+		ret = nouveau_dma_wait(chan, 2, 6);
+		if (ret) {
+			NV_INFO(dev, "nv50cal_space: %d\n", ret);
+			goto out;
+		}
+
+		nv50_dma_push(chan, pbbo, req->offset, req->nr_dwords);
+	} else
 	if (PUSHBUF_CAL) {
 		ret = RING_SPACE(chan, 2);
 		if (ret) {
@@ -792,6 +801,10 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	kfree(bo);
 
 out_next:
+	if (chan->dma.ib_max) {
+		req->suffix0 = 0x00000000;
+		req->suffix1 = 0x00000000;
+	} else
 	if (PUSHBUF_CAL) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 369ecb4cee57f84bd7a94555f5880862dd1ec878..e20c0e2474f3b56035b3c6391b86a17e4c693f0f 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -283,17 +283,17 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
 
-	nv_wo32(dev, ramfc, 0x08/4, chan->pushbuf_base);
-	nv_wo32(dev, ramfc, 0x10/4, chan->pushbuf_base);
 	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
 	nv_wo32(dev, ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4));
-	nv_wo32(dev, ramfc, 0x3c/4, 0x00086078);
 	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
 	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
 	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
 	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
 	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
-	nv_wo32(dev, ramfc, 0x4c/4, 0xffffffff);
+	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
+	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
+				    chan->dma.ib_base * 4);
+	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
 
 	if (!IS_G80) {
 		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);