diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile
index 1086e9876f9118b10d341d511aa3629ce7a21356..4f76c992043f69fc2d5bd8a72b13c50aaa3986af 100644
--- a/drivers/gpu/drm/etnaviv/Makefile
+++ b/drivers/gpu/drm/etnaviv/Makefile
@@ -1,6 +1,7 @@
 etnaviv-y := \
 	etnaviv_buffer.o \
 	etnaviv_cmd_parser.o \
+	etnaviv_cmdbuf.o \
 	etnaviv_drv.o \
 	etnaviv_dump.o \
 	etnaviv_gem_prime.o \
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index d9230132dfbcc51d1da070769617b1841ee3a248..ed9588f36bc9b4a6214eca1e2954f520a0ce8ef4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -15,6 +15,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
@@ -125,7 +126,7 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
 	u32 *ptr = buf->vaddr + off;
 
 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
-			ptr, etnaviv_iommu_get_cmdbuf_va(gpu, buf) + off, size - len * 4 - off);
+			ptr, etnaviv_cmdbuf_get_va(buf) + off, size - len * 4 - off);
 
 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
 			ptr, len * 4, 0);
@@ -158,7 +159,7 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
 		buffer->user_size = 0;
 
-	return etnaviv_iommu_get_cmdbuf_va(gpu, buffer) + buffer->user_size;
+	return etnaviv_cmdbuf_get_va(buffer) + buffer->user_size;
 }
 
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
@@ -169,7 +170,7 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;
 
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) +
+	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
 		 buffer->user_size - 4);
 
 	return buffer->user_size / 8;
@@ -261,7 +262,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 
-	link_target = etnaviv_iommu_get_cmdbuf_va(gpu, cmdbuf);
+	link_target = etnaviv_cmdbuf_get_va(cmdbuf);
 	link_dwords = cmdbuf->size / 8;
 
 	/*
@@ -355,12 +356,13 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
 		       VIVS_GL_EVENT_FROM_PE);
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) +
+	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
 			    buffer->user_size - 4);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
-			return_target, etnaviv_iommu_get_cmdbuf_va(gpu, cmdbuf), cmdbuf->vaddr);
+			return_target, etnaviv_cmdbuf_get_va(cmdbuf),
+			cmdbuf->vaddr);
 
 	if (drm_debug & DRM_UT_DRIVER) {
 		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
index 2a2e5e366ab7c6374459a0a02f6ea7328d70954e..6e3bbcf24160eb297d7269e79828a0bfdac7bdc5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -56,6 +56,8 @@ static const struct {
 	ST(0x0644, 1),
 	ST(0x064c, 1),
 	ST(0x0680, 8),
+	ST(0x086c, 1),
+	ST(0x1028, 1),
 	ST(0x1410, 1),
 	ST(0x1430, 1),
 	ST(0x1458, 1),
@@ -73,8 +75,12 @@ static const struct {
 	ST(0x16c0, 8),
 	ST(0x16e0, 8),
 	ST(0x1740, 8),
+	ST(0x17c0, 8),
+	ST(0x17e0, 8),
 	ST(0x2400, 14 * 16),
 	ST(0x10800, 32 * 16),
+	ST(0x14600, 16),
+	ST(0x14800, 8 * 8),
 #undef ST
 };
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
new file mode 100644
index 0000000000000000000000000000000000000000..633e0f07cbac175df737e62113a6a351219ef899
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <drm/drm_mm.h>
+
+#include "etnaviv_cmdbuf.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+#define SUBALLOC_SIZE		SZ_256K
+#define SUBALLOC_GRANULE	SZ_4K
+#define SUBALLOC_GRANULES	(SUBALLOC_SIZE / SUBALLOC_GRANULE)
+
+struct etnaviv_cmdbuf_suballoc {
+	/* suballocated dma buffer properties */
+	struct etnaviv_gpu *gpu;
+	void *vaddr;
+	dma_addr_t paddr;
+
+	/* GPU mapping */
+	u32 iova;
+	struct drm_mm_node vram_node; /* only used on MMUv2 */
+
+	/* allocation management */
+	struct mutex lock;
+	DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
+	int free_space;
+	wait_queue_head_t free_event;
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+{
+	struct etnaviv_cmdbuf_suballoc *suballoc;
+	int ret;
+
+	suballoc = kzalloc(sizeof(*suballoc), GFP_KERNEL);
+	if (!suballoc)
+		return ERR_PTR(-ENOMEM);
+
+	suballoc->gpu = gpu;
+	mutex_init(&suballoc->lock);
+	init_waitqueue_head(&suballoc->free_event);
+
+	suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+				       &suballoc->paddr, GFP_KERNEL);
+	if (!suballoc->vaddr)
+		goto free_suballoc;
+
+	ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
+					    &suballoc->vram_node, SUBALLOC_SIZE,
+					    &suballoc->iova);
+	if (ret)
+		goto free_dma;
+
+	return suballoc;
+
+free_dma:
+	dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
+free_suballoc:
+	kfree(suballoc);
+
+	return NULL;
+}
+
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
+{
+	etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
+				      SUBALLOC_SIZE, suballoc->iova);
+	dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+		    suballoc->paddr);
+	kfree(suballoc);
+}
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos)
+{
+	struct etnaviv_cmdbuf *cmdbuf;
+	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
+				 sizeof(*cmdbuf));
+	int granule_offs, order, ret;
+
+	cmdbuf = kzalloc(sz, GFP_KERNEL);
+	if (!cmdbuf)
+		return NULL;
+
+	cmdbuf->suballoc = suballoc;
+	cmdbuf->size = size;
+
+	order = order_base_2(ALIGN(size, SUBALLOC_GRANULE) / SUBALLOC_GRANULE);
+retry:
+	mutex_lock(&suballoc->lock);
+	granule_offs = bitmap_find_free_region(suballoc->granule_map,
+					SUBALLOC_GRANULES, order);
+	if (granule_offs < 0) {
+		suballoc->free_space = 0;
+		mutex_unlock(&suballoc->lock);
+		ret = wait_event_interruptible_timeout(suballoc->free_event,
+						       suballoc->free_space,
+						       msecs_to_jiffies(10 * 1000));
+		if (!ret) {
+			dev_err(suballoc->gpu->dev,
+				"Timeout waiting for cmdbuf space\n");
+			return NULL;
+		}
+		goto retry;
+	}
+	mutex_unlock(&suballoc->lock);
+	cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
+	cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
+
+	return cmdbuf;
+}
+
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct etnaviv_cmdbuf_suballoc *suballoc = cmdbuf->suballoc;
+	int order = order_base_2(ALIGN(cmdbuf->size, SUBALLOC_GRANULE) /
+				 SUBALLOC_GRANULE);
+
+	mutex_lock(&suballoc->lock);
+	bitmap_release_region(suballoc->granule_map,
+			      cmdbuf->suballoc_offset / SUBALLOC_GRANULE,
+			      order);
+	suballoc->free_space = 1;
+	mutex_unlock(&suballoc->lock);
+	wake_up_all(&suballoc->free_event);
+	kfree(cmdbuf);
+}
+
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
+{
+	return buf->suballoc->iova + buf->suballoc_offset;
+}
+
+dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
+{
+	return buf->suballoc->paddr + buf->suballoc_offset;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
new file mode 100644
index 0000000000000000000000000000000000000000..80d78076c679c041fd03c0ed5fad2518c1ed7d67
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_CMDBUF_H__
+#define __ETNAVIV_CMDBUF_H__
+
+#include <linux/types.h>
+
+struct etnaviv_gpu;
+struct etnaviv_cmdbuf_suballoc;
+
+struct etnaviv_cmdbuf {
+	/* suballocator this cmdbuf is allocated from */
+	struct etnaviv_cmdbuf_suballoc *suballoc;
+	/* user context key, must be unique between all active users */
+	struct etnaviv_file_private *ctx;
+	/* cmdbuf properties */
+	int suballoc_offset;
+	void *vaddr;
+	u32 size;
+	u32 user_size;
+	/* fence after which this buffer is to be disposed */
+	struct dma_fence *fence;
+	/* target exec state */
+	u32 exec_state;
+	/* per GPU in-flight list */
+	struct list_head node;
+	/* BOs attached to this command buffer */
+	unsigned int nr_bos;
+	struct etnaviv_vram_mapping *bo_map[0];
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos);
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
+dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);
+
+#endif /* __ETNAVIV_CMDBUF_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 590be0d1dd95d9bd9e9725e531ed0d83e960371b..587e45043542b642fa519892d4ec3570b22e5e52 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -18,11 +18,11 @@
 #include <linux/of_platform.h>
 #include <drm/drm_of.h>
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_gem.h"
 
 #ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING
 static bool reglog;
@@ -177,7 +177,8 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
 	u32 i;
 
 	seq_printf(m, "virt %p - phys 0x%llx - free 0x%08x\n",
-			buf->vaddr, (u64)buf->paddr, size - buf->user_size);
+			buf->vaddr, (u64)etnaviv_cmdbuf_get_pa(buf),
+			size - buf->user_size);
 
 	for (i = 0; i < size / 4; i++) {
 		if (i && !(i % 4))
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index af65491a78e204fa6f68ff2e409c151df5f7d8c9..d019b5e311cc5502e2d273525ea4144f5d0a796d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/devcoredump.h>
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
@@ -177,12 +178,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
 	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
 			      gpu->buffer->size,
-			      etnaviv_iommu_get_cmdbuf_va(gpu, gpu->buffer));
+			      etnaviv_cmdbuf_get_va(gpu->buffer));
 
 	list_for_each_entry(cmd, &gpu->active_cmd_list, node)
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
-				      cmd->size,
-				      etnaviv_iommu_get_cmdbuf_va(gpu, cmd));
+				      cmd->size, etnaviv_cmdbuf_get_va(cmd));
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index afdd55ddf821c55c70f2bdb893abaa508d1497b0..726090d7a6acef60e2a30db90ff5a79a1cf40940 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/reservation.h>
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@@ -332,8 +333,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
 	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
 	stream = drm_malloc_ab(1, args->stream_size);
-	cmdbuf = etnaviv_gpu_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
-					args->nr_bos);
+	cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
+				    ALIGN(args->stream_size, 8) + 8,
+				    args->nr_bos);
 	if (!bos || !relocs || !stream || !cmdbuf) {
 		ret = -ENOMEM;
 		goto err_submit_cmds;
@@ -422,7 +424,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 err_submit_cmds:
 	/* if we still own the cmdbuf */
 	if (cmdbuf)
-		etnaviv_gpu_cmdbuf_free(cmdbuf);
+		etnaviv_cmdbuf_free(cmdbuf);
 	if (stream)
 		drm_free_large(stream);
 	if (bos)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 0a67124bb2a421cbfa21ec726fedf287294f2e9c..130d7d517a19a180ca7f2e744131aeabc554564b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -18,6 +18,8 @@
 #include <linux/dma-fence.h>
 #include <linux/moduleparam.h>
 #include <linux/of_device.h>
+
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@@ -546,6 +548,37 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
 		  VIVS_FE_COMMAND_CONTROL_PREFETCH(prefetch));
 }
 
+static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
+{
+	/*
+	 * Base value for VIVS_PM_PULSE_EATER register on models where it
+	 * cannot be read, extracted from vivante kernel driver.
+	 */
+	u32 pulse_eater = 0x01590880;
+
+	if (etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5222)) {
+		pulse_eater |= BIT(23);
+
+	}
+
+	if (etnaviv_is_model_rev(gpu, GC1000, 0x5039) ||
+	    etnaviv_is_model_rev(gpu, GC1000, 0x5040)) {
+		pulse_eater &= ~BIT(16);
+		pulse_eater |= BIT(17);
+	}
+
+	if ((gpu->identity.revision > 0x5420) &&
+	    (gpu->identity.features & chipFeatures_PIPE_3D))
+	{
+		/* Performance fix: disable internal DFS */
+		pulse_eater = gpu_read(gpu, VIVS_PM_PULSE_EATER);
+		pulse_eater |= BIT(18);
+	}
+
+	gpu_write(gpu, VIVS_PM_PULSE_EATER, pulse_eater);
+}
+
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
@@ -586,6 +619,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
 	}
 
+	/* setup the pulse eater */
+	etnaviv_gpu_setup_pulse_eater(gpu);
+
 	/* setup the MMU */
 	etnaviv_iommu_restore(gpu);
 
@@ -593,7 +629,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_init(gpu);
 
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-	etnaviv_gpu_start_fe(gpu, etnaviv_iommu_get_cmdbuf_va(gpu, gpu->buffer),
+	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer),
 			     prefetch);
 }
 
@@ -658,8 +694,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
+	gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
+	if (IS_ERR(gpu->cmdbuf_suballoc)) {
+		dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
+		ret = PTR_ERR(gpu->cmdbuf_suballoc);
+		goto fail;
+	}
+
 	/* Create buffer: */
-	gpu->buffer = etnaviv_gpu_cmdbuf_new(gpu, PAGE_SIZE, 0);
+	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0);
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
@@ -667,7 +710,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
-	    gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
+	    etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) {
 		ret = -EINVAL;
 		dev_err(gpu->dev,
 			"command buffer outside valid memory window\n");
@@ -694,7 +737,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	return 0;
 
 free_buffer:
-	etnaviv_gpu_cmdbuf_free(gpu->buffer);
+	etnaviv_cmdbuf_free(gpu->buffer);
 	gpu->buffer = NULL;
 destroy_iommu:
 	etnaviv_iommu_destroy(gpu->mmu);
@@ -1117,41 +1160,6 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
  * Cmdstream submission/retirement:
  */
 
-struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
-	size_t nr_bos)
-{
-	struct etnaviv_cmdbuf *cmdbuf;
-	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
-				 sizeof(*cmdbuf));
-
-	cmdbuf = kzalloc(sz, GFP_KERNEL);
-	if (!cmdbuf)
-		return NULL;
-
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V2)
-		size = ALIGN(size, SZ_4K);
-
-	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
-				     GFP_KERNEL);
-	if (!cmdbuf->vaddr) {
-		kfree(cmdbuf);
-		return NULL;
-	}
-
-	cmdbuf->gpu = gpu;
-	cmdbuf->size = size;
-
-	return cmdbuf;
-}
-
-void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
-{
-	etnaviv_iommu_put_cmdbuf_va(cmdbuf->gpu, cmdbuf);
-	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
-		    cmdbuf->paddr);
-	kfree(cmdbuf);
-}
-
 static void retire_worker(struct work_struct *work)
 {
 	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
@@ -1177,7 +1185,7 @@ static void retire_worker(struct work_struct *work)
 			etnaviv_gem_mapping_unreference(mapping);
 		}
 
-		etnaviv_gpu_cmdbuf_free(cmdbuf);
+		etnaviv_cmdbuf_free(cmdbuf);
 		/*
 		 * We need to balance the runtime PM count caused by
 		 * each submission.  Upon submission, we increment
@@ -1593,10 +1601,15 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 #endif
 
 	if (gpu->buffer) {
-		etnaviv_gpu_cmdbuf_free(gpu->buffer);
+		etnaviv_cmdbuf_free(gpu->buffer);
 		gpu->buffer = NULL;
 	}
 
+	if (gpu->cmdbuf_suballoc) {
+		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+		gpu->cmdbuf_suballoc = NULL;
+	}
+
 	if (gpu->mmu) {
 		etnaviv_iommu_destroy(gpu->mmu);
 		gpu->mmu = NULL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 8c6b824e9d0a5c36633ca9ee5e0742d1dcf855e5..1c0606ea7d5e8f055818a3c23b86a6774d0c0827 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -92,6 +92,7 @@ struct etnaviv_event {
 	struct dma_fence *fence;
 };
 
+struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_cmdbuf;
 
 struct etnaviv_gpu {
@@ -135,6 +136,7 @@ struct etnaviv_gpu {
 	int irq;
 
 	struct etnaviv_iommu *mmu;
+	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
 
 	/* Power Control: */
 	struct clk *clk_bus;
@@ -150,29 +152,6 @@ struct etnaviv_gpu {
 	struct work_struct recover_work;
 };
 
-struct etnaviv_cmdbuf {
-	/* device this cmdbuf is allocated for */
-	struct etnaviv_gpu *gpu;
-	/* user context key, must be unique between all active users */
-	struct etnaviv_file_private *ctx;
-	/* cmdbuf properties */
-	void *vaddr;
-	dma_addr_t paddr;
-	u32 size;
-	u32 user_size;
-	/* vram node used if the cmdbuf is mapped through the MMUv2 */
-	struct drm_mm_node vram_node;
-	/* fence after which this buffer is to be disposed */
-	struct dma_fence *fence;
-	/* target exec state */
-	u32 exec_state;
-	/* per GPU in-flight list */
-	struct list_head node;
-	/* BOs attached to this command buffer */
-	unsigned int nr_bos;
-	struct etnaviv_vram_mapping *bo_map[0];
-};
-
 static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
 {
 	etnaviv_writel(data, gpu->mmio + reg);
@@ -211,9 +190,6 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
-struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu,
-					      u32 size, size_t nr_bos);
-void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 81f1583a79463374d2d36c5b4787e0e7bf32a56f..7a7c97f599d7a645c51c85febb9104ea4752181b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -184,7 +184,7 @@ static void etnaviv_iommuv1_dump(struct iommu_domain *domain, void *buf)
 	memcpy(buf, etnaviv_domain->pgtable.pgtable, PT_SIZE);
 }
 
-static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+static const struct etnaviv_iommu_ops etnaviv_iommu_ops = {
 	.ops = {
 		.domain_free = etnaviv_domain_free,
 		.map = etnaviv_iommuv1_map,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index 7e9c4d210a8486af9e779cb88b4da2fb88a2309b..cbe447ac59747c3a5a8e655308fc74e7721246a0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_iommu.h"
@@ -229,7 +230,7 @@ static void etnaviv_iommuv2_dump(struct iommu_domain *domain, void *buf)
 			memcpy(buf, etnaviv_domain->stlb_cpu[i], SZ_4K);
 }
 
-static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+static const struct etnaviv_iommu_ops etnaviv_iommu_ops = {
 	.ops = {
 		.domain_free = etnaviv_iommuv2_domain_free,
 		.map = etnaviv_iommuv2_map,
@@ -254,7 +255,8 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_config_mmuv2(gpu,
 				(u32)etnaviv_domain->mtlb_dma,
 				(u32)etnaviv_domain->bad_page_dma);
-	etnaviv_gpu_start_fe(gpu, gpu->buffer->paddr, prefetch);
+	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer),
+			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
 
 	gpu_write(gpu, VIVS_MMUv2_CONTROL, VIVS_MMUv2_CONTROL_ENABLE);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index f503af462dadd715afcca7df677fb93a92050161..ff826c16fb8912b05ff29eff669e38da420efd9b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -15,6 +15,7 @@
  */
 
 #include "common.xml.h"
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
@@ -117,14 +118,9 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		struct list_head list;
 		bool found;
 
-		/*
-		 * XXX: The DRM_MM_SEARCH_BELOW is really a hack to trick
-		 * drm_mm into giving out a low IOVA after address space
-		 * rollover. This needs a proper fix.
-		 */
 		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
 			size, 0, mmu->last_iova, ~0UL,
-			mmu->last_iova ? DRM_MM_SEARCH_DEFAULT : DRM_MM_SEARCH_BELOW);
+			DRM_MM_SEARCH_DEFAULT);
 
 		if (ret != -ENOSPC)
 			break;
@@ -194,11 +190,8 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 
 		/*
 		 * We removed enough mappings so that the new allocation will
-		 * succeed.  Ensure that the MMU will be flushed before the
-		 * associated commit requesting this mapping, and retry the
-		 * allocation one more time.
+		 * succeed, retry the allocation one more time.
 		 */
-		mmu->need_flush = true;
 	}
 
 	return ret;
@@ -250,6 +243,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 	}
 
 	list_add_tail(&mapping->mmu_node, &mmu->mappings);
+	mmu->need_flush = true;
 	mutex_unlock(&mmu->lock);
 
 	return ret;
@@ -267,6 +261,7 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
 		etnaviv_iommu_remove_mapping(mmu, mapping);
 
 	list_del(&mapping->mmu_node);
+	mmu->need_flush = true;
 	mutex_unlock(&mmu->lock);
 }
 
@@ -322,55 +317,50 @@ void etnaviv_iommu_restore(struct etnaviv_gpu *gpu)
 		etnaviv_iommuv2_restore(gpu);
 }
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf)
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
 	if (mmu->version == ETNAVIV_IOMMU_V1) {
-		return buf->paddr - gpu->memory_base;
+		*iova = paddr - gpu->memory_base;
+		return 0;
 	} else {
 		int ret;
 
-		if (buf->vram_node.allocated)
-			return (u32)buf->vram_node.start;
-
 		mutex_lock(&mmu->lock);
-		ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node,
-					      buf->size + SZ_64K);
+		ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
 		if (ret < 0) {
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		ret = iommu_map(mmu->domain, buf->vram_node.start, buf->paddr,
-				buf->size, IOMMU_READ);
+		ret = iommu_map(mmu->domain, vram_node->start, paddr, size,
+				IOMMU_READ);
 		if (ret < 0) {
-			drm_mm_remove_node(&buf->vram_node);
+			drm_mm_remove_node(vram_node);
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		/*
-		 * At least on GC3000 the FE MMU doesn't properly flush old TLB
-		 * entries. Make sure to space the command buffers out in a way
-		 * that the FE MMU prefetch won't load invalid entries.
-		 */
-		mmu->last_iova = buf->vram_node.start + buf->size + SZ_64K;
+		mmu->last_iova = vram_node->start + size;
 		gpu->mmu->need_flush = true;
 		mutex_unlock(&mmu->lock);
 
-		return (u32)buf->vram_node.start;
+		*iova = (u32)vram_node->start;
+		return 0;
 	}
 }
 
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf)
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
-	if (mmu->version == ETNAVIV_IOMMU_V2 && buf->vram_node.allocated) {
+	if (mmu->version == ETNAVIV_IOMMU_V2) {
 		mutex_lock(&mmu->lock);
-		iommu_unmap(mmu->domain, buf->vram_node.start, buf->size);
-		drm_mm_remove_node(&buf->vram_node);
+		iommu_unmap(mmu->domain,iova, size);
+		drm_mm_remove_node(vram_node);
 		mutex_unlock(&mmu->lock);
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index e787e49c9693cf74f08177649e1495af8e212f87..54be289e5981c65a9fc2e5a0c11e49071918a21a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -62,10 +62,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
 	struct etnaviv_vram_mapping *mapping);
 void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf);
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova);
 
 size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
 void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);