diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
index 74e1e8add5a1264bfc2ed8db1b26667ed8906903..844e0103fb0dac15220794cfde75f6d58b2c4d73 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@@ -3,6 +3,10 @@ NVIDIA Tegra host1x
 Required properties:
 - compatible: "nvidia,tegra<chip>-host1x"
 - reg: Physical base address and length of the controller's registers.
+  For pre-Tegra186, one entry describing the whole register area.
+  For Tegra186, one entry for each entry in reg-names:
+    "vm" - VM region assigned to Linux
+    "hypervisor" - Hypervisor region (only if Linux acts as hypervisor)
 - interrupts: The interrupt outputs from the controller.
 - #address-cells: The number of cells used to represent physical base addresses
   in the host1x address space. Should be 1.
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index dc58ab140151f24bcde9baed20602428f6b37d83..cf54847a8bd1756411562447bb5665096bce356a 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -9,6 +9,7 @@ config DRM_TEGRA
 	select DRM_PANEL
 	select TEGRA_HOST1X
 	select IOMMU_IOVA if IOMMU_SUPPORT
+	select CEC_CORE if CEC_NOTIFIER
 	help
 	  Choose this option if you have an NVIDIA Tegra SoC.
 
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 4df39112e38ec6052beb2165e0550594922174f6..24a5ef4f5bb81e9bffccaf8a968147717c992a88 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
@@ -23,16 +24,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
-struct tegra_dc_soc_info {
-	bool supports_border_color;
-	bool supports_interlacing;
-	bool supports_cursor;
-	bool supports_block_linear;
-	unsigned int pitch_align;
-	bool has_powergate;
-	bool broken_reset;
-};
-
 struct tegra_plane {
 	struct drm_plane base;
 	unsigned int index;
@@ -559,14 +550,21 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	return 0;
 }
 
-static void tegra_dc_disable_window(struct tegra_dc *dc, int index)
+static void tegra_plane_atomic_disable(struct drm_plane *plane,
+				       struct drm_plane_state *old_state)
 {
+	struct tegra_dc *dc = to_tegra_dc(old_state->crtc);
+	struct tegra_plane *p = to_tegra_plane(plane);
 	unsigned long flags;
 	u32 value;
 
+	/* rien ne va plus */
+	if (!old_state || !old_state->crtc)
+		return;
+
 	spin_lock_irqsave(&dc->lock, flags);
 
-	value = WINDOW_A_SELECT << index;
+	value = WINDOW_A_SELECT << p->index;
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
 	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
@@ -591,7 +589,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 		return;
 
 	if (!plane->state->visible)
-		return tegra_dc_disable_window(dc, p->index);
+		return tegra_plane_atomic_disable(plane, old_state);
 
 	memset(&window, 0, sizeof(window));
 	window.src.x = plane->state->src.x1 >> 16;
@@ -627,25 +625,10 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 	tegra_dc_setup_window(dc, p->index, &window);
 }
 
-static void tegra_plane_atomic_disable(struct drm_plane *plane,
-				       struct drm_plane_state *old_state)
-{
-	struct tegra_plane *p = to_tegra_plane(plane);
-	struct tegra_dc *dc;
-
-	/* rien ne va plus */
-	if (!old_state || !old_state->crtc)
-		return;
-
-	dc = to_tegra_dc(old_state->crtc);
-
-	tegra_dc_disable_window(dc, p->index);
-}
-
-static const struct drm_plane_helper_funcs tegra_primary_plane_helper_funcs = {
+static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
 	.atomic_check = tegra_plane_atomic_check,
-	.atomic_update = tegra_plane_atomic_update,
 	.atomic_disable = tegra_plane_atomic_disable,
+	.atomic_update = tegra_plane_atomic_update,
 };
 
 static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
@@ -685,7 +668,7 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
 		return ERR_PTR(err);
 	}
 
-	drm_plane_helper_add(&plane->base, &tegra_primary_plane_helper_funcs);
+	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 
 	return &plane->base;
 }
@@ -880,12 +863,6 @@ static const uint32_t tegra_overlay_plane_formats[] = {
 	DRM_FORMAT_YUV422,
 };
 
-static const struct drm_plane_helper_funcs tegra_overlay_plane_helper_funcs = {
-	.atomic_check = tegra_plane_atomic_check,
-	.atomic_update = tegra_plane_atomic_update,
-	.atomic_disable = tegra_plane_atomic_disable,
-};
-
 static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 						       struct tegra_dc *dc,
 						       unsigned int index)
@@ -913,7 +890,7 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 		return ERR_PTR(err);
 	}
 
-	drm_plane_helper_add(&plane->base, &tegra_overlay_plane_helper_funcs);
+	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 
 	return &plane->base;
 }
@@ -1161,6 +1138,11 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
 
 	value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
 	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+
+	err = clk_set_rate(dc->clk, state->pclk);
+	if (err < 0)
+		dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
+			dc->clk, state->pclk, err);
 }
 
 static void tegra_dc_stop(struct tegra_dc *dc)
@@ -1756,7 +1738,7 @@ static int tegra_dc_init(struct host1x_client *client)
 	struct drm_plane *cursor = NULL;
 	int err;
 
-	dc->syncpt = host1x_syncpt_request(dc->dev, flags);
+	dc->syncpt = host1x_syncpt_request(client, flags);
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
@@ -1985,7 +1967,6 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
 
 static int tegra_dc_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *id;
 	struct resource *regs;
 	struct tegra_dc *dc;
 	int err;
@@ -1994,14 +1975,11 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	if (!dc)
 		return -ENOMEM;
 
-	id = of_match_node(tegra_dc_of_match, pdev->dev.of_node);
-	if (!id)
-		return -ENODEV;
+	dc->soc = of_device_get_match_data(&pdev->dev);
 
 	spin_lock_init(&dc->lock);
 	INIT_LIST_HEAD(&dc->list);
 	dc->dev = &pdev->dev;
-	dc->soc = id->data;
 
 	err = tegra_dc_parse_dt(dc);
 	if (err < 0)
@@ -2019,8 +1997,22 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return PTR_ERR(dc->rst);
 	}
 
-	if (!dc->soc->broken_reset)
-		reset_control_assert(dc->rst);
+	/* assert reset and disable clock */
+	if (!dc->soc->broken_reset) {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0)
+			return err;
+
+		usleep_range(2000, 4000);
+
+		err = reset_control_assert(dc->rst);
+		if (err < 0)
+			return err;
+
+		usleep_range(2000, 4000);
+
+		clk_disable_unprepare(dc->clk);
+	}
 
 	if (dc->soc->has_powergate) {
 		if (dc->pipe == 0)
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 4a268635749bdc667ef3ec86b3c7e3774f1faf09..cb100b6e32821efb5cf9d2242f4cb0021044b0ea 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -10,6 +10,126 @@
 #ifndef TEGRA_DC_H
 #define TEGRA_DC_H 1
 
+#include <linux/host1x.h>
+
+#include <drm/drm_crtc.h>
+
+#include "drm.h"
+
+struct tegra_output;
+
+struct tegra_dc_stats {
+	unsigned long frames;
+	unsigned long vblank;
+	unsigned long underflow;
+	unsigned long overflow;
+};
+
+struct tegra_dc_soc_info {
+	bool supports_border_color;
+	bool supports_interlacing;
+	bool supports_cursor;
+	bool supports_block_linear;
+	unsigned int pitch_align;
+	bool has_powergate;
+	bool broken_reset;
+};
+
+struct tegra_dc {
+	struct host1x_client client;
+	struct host1x_syncpt *syncpt;
+	struct device *dev;
+	spinlock_t lock;
+
+	struct drm_crtc base;
+	unsigned int powergate;
+	int pipe;
+
+	struct clk *clk;
+	struct reset_control *rst;
+	void __iomem *regs;
+	int irq;
+
+	struct tegra_output *rgb;
+
+	struct tegra_dc_stats stats;
+	struct list_head list;
+
+	struct drm_info_list *debugfs_files;
+	struct drm_minor *minor;
+	struct dentry *debugfs;
+
+	/* page-flip handling */
+	struct drm_pending_vblank_event *event;
+
+	const struct tegra_dc_soc_info *soc;
+
+	struct iommu_domain *domain;
+};
+
+static inline struct tegra_dc *
+host1x_client_to_dc(struct host1x_client *client)
+{
+	return container_of(client, struct tegra_dc, client);
+}
+
+static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc)
+{
+	return crtc ? container_of(crtc, struct tegra_dc, base) : NULL;
+}
+
+static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value,
+				   unsigned int offset)
+{
+	trace_dc_writel(dc->dev, offset, value);
+	writel(value, dc->regs + (offset << 2));
+}
+
+static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset)
+{
+	u32 value = readl(dc->regs + (offset << 2));
+
+	trace_dc_readl(dc->dev, offset, value);
+
+	return value;
+}
+
+struct tegra_dc_window {
+	struct {
+		unsigned int x;
+		unsigned int y;
+		unsigned int w;
+		unsigned int h;
+	} src;
+	struct {
+		unsigned int x;
+		unsigned int y;
+		unsigned int w;
+		unsigned int h;
+	} dst;
+	unsigned int bits_per_pixel;
+	unsigned int stride[2];
+	unsigned long base[3];
+	bool bottom_up;
+
+	struct tegra_bo_tiling tiling;
+	u32 format;
+	u32 swap;
+};
+
+/* from dc.c */
+void tegra_dc_commit(struct tegra_dc *dc);
+int tegra_dc_state_setup_clock(struct tegra_dc *dc,
+			       struct drm_crtc_state *crtc_state,
+			       struct clk *clk, unsigned long pclk,
+			       unsigned int div);
+
+/* from rgb.c */
+int tegra_dc_rgb_probe(struct tegra_dc *dc);
+int tegra_dc_rgb_remove(struct tegra_dc *dc);
+int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
+int tegra_dc_rgb_exit(struct tegra_dc *dc);
+
 #define DC_CMD_GENERAL_INCR_SYNCPT		0x000
 #define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL	0x001
 #define  SYNCPT_CNTRL_NO_STALL   (1 << 8)
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 597d563d636a189fa4bdb790ca842ec9a3740134..943bdf88c4a267214db5f01bda0216be2fb4d5c7 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -386,12 +386,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	unsigned int num_cmdbufs = args->num_cmdbufs;
 	unsigned int num_relocs = args->num_relocs;
 	unsigned int num_waitchks = args->num_waitchks;
-	struct drm_tegra_cmdbuf __user *cmdbufs =
-		(void __user *)(uintptr_t)args->cmdbufs;
-	struct drm_tegra_reloc __user *relocs =
-		(void __user *)(uintptr_t)args->relocs;
-	struct drm_tegra_waitchk __user *waitchks =
-		(void __user *)(uintptr_t)args->waitchks;
+	struct drm_tegra_cmdbuf __user *user_cmdbufs;
+	struct drm_tegra_reloc __user *user_relocs;
+	struct drm_tegra_waitchk __user *user_waitchks;
+	struct drm_tegra_syncpt __user *user_syncpt;
 	struct drm_tegra_syncpt syncpt;
 	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
 	struct drm_gem_object **refs;
@@ -400,6 +398,11 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	unsigned int num_refs;
 	int err;
 
+	user_cmdbufs = u64_to_user_ptr(args->cmdbufs);
+	user_relocs = u64_to_user_ptr(args->relocs);
+	user_waitchks = u64_to_user_ptr(args->waitchks);
+	user_syncpt = u64_to_user_ptr(args->syncpts);
+
 	/* We don't yet support other than one syncpt_incr struct per submit */
 	if (args->num_syncpts != 1)
 		return -EINVAL;
@@ -440,7 +443,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		struct tegra_bo *obj;
 		u64 offset;
 
-		if (copy_from_user(&cmdbuf, cmdbufs, sizeof(cmdbuf))) {
+		if (copy_from_user(&cmdbuf, user_cmdbufs, sizeof(cmdbuf))) {
 			err = -EFAULT;
 			goto fail;
 		}
@@ -476,7 +479,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
 		host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset);
 		num_cmdbufs--;
-		cmdbufs++;
+		user_cmdbufs++;
 	}
 
 	/* copy and resolve relocations from submit */
@@ -485,7 +488,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		struct tegra_bo *obj;
 
 		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
-						  &relocs[num_relocs], drm,
+						  &user_relocs[num_relocs], drm,
 						  file);
 		if (err < 0)
 			goto fail;
@@ -519,9 +522,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		struct host1x_waitchk *wait = &job->waitchk[num_waitchks];
 		struct tegra_bo *obj;
 
-		err = host1x_waitchk_copy_from_user(wait,
-						    &waitchks[num_waitchks],
-						    file);
+		err = host1x_waitchk_copy_from_user(
+			wait, &user_waitchks[num_waitchks], file);
 		if (err < 0)
 			goto fail;
 
@@ -539,8 +541,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		}
 	}
 
-	if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
-			   sizeof(syncpt))) {
+	if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) {
 		err = -EFAULT;
 		goto fail;
 	}
@@ -1317,6 +1318,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra210-sor", },
 	{ .compatible = "nvidia,tegra210-sor1", },
 	{ .compatible = "nvidia,tegra210-vic", },
+	{ .compatible = "nvidia,tegra186-vic", },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 063f5d397526c10af3eb7f4d160f111a11a8124f..ddae331ad8b6b9369c5f5f9ddfb30b6d295ec06a 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -119,104 +119,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *iova);
 void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 		    dma_addr_t iova);
 
-struct tegra_dc_soc_info;
-struct tegra_output;
-
-struct tegra_dc_stats {
-	unsigned long frames;
-	unsigned long vblank;
-	unsigned long underflow;
-	unsigned long overflow;
-};
-
-struct tegra_dc {
-	struct host1x_client client;
-	struct host1x_syncpt *syncpt;
-	struct device *dev;
-	spinlock_t lock;
-
-	struct drm_crtc base;
-	unsigned int powergate;
-	int pipe;
-
-	struct clk *clk;
-	struct reset_control *rst;
-	void __iomem *regs;
-	int irq;
-
-	struct tegra_output *rgb;
-
-	struct tegra_dc_stats stats;
-	struct list_head list;
-
-	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
-
-	/* page-flip handling */
-	struct drm_pending_vblank_event *event;
-
-	const struct tegra_dc_soc_info *soc;
-
-	struct iommu_domain *domain;
-};
-
-static inline struct tegra_dc *
-host1x_client_to_dc(struct host1x_client *client)
-{
-	return container_of(client, struct tegra_dc, client);
-}
-
-static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc)
-{
-	return crtc ? container_of(crtc, struct tegra_dc, base) : NULL;
-}
-
-static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value,
-				   unsigned int offset)
-{
-	trace_dc_writel(dc->dev, offset, value);
-	writel(value, dc->regs + (offset << 2));
-}
-
-static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset)
-{
-	u32 value = readl(dc->regs + (offset << 2));
-
-	trace_dc_readl(dc->dev, offset, value);
-
-	return value;
-}
-
-struct tegra_dc_window {
-	struct {
-		unsigned int x;
-		unsigned int y;
-		unsigned int w;
-		unsigned int h;
-	} src;
-	struct {
-		unsigned int x;
-		unsigned int y;
-		unsigned int w;
-		unsigned int h;
-	} dst;
-	unsigned int bits_per_pixel;
-	unsigned int stride[2];
-	unsigned long base[3];
-	bool bottom_up;
-
-	struct tegra_bo_tiling tiling;
-	u32 format;
-	u32 swap;
-};
-
-/* from dc.c */
-void tegra_dc_commit(struct tegra_dc *dc);
-int tegra_dc_state_setup_clock(struct tegra_dc *dc,
-			       struct drm_crtc_state *crtc_state,
-			       struct clk *clk, unsigned long pclk,
-			       unsigned int div);
+struct cec_notifier;
 
 struct tegra_output {
 	struct device_node *of_node;
@@ -225,6 +128,7 @@ struct tegra_output {
 	struct drm_panel *panel;
 	struct i2c_adapter *ddc;
 	const struct edid *edid;
+	struct cec_notifier *notifier;
 	unsigned int hpd_irq;
 	int hpd_gpio;
 	enum of_gpio_flags hpd_gpio_flags;
@@ -243,12 +147,6 @@ static inline struct tegra_output *connector_to_output(struct drm_connector *c)
 	return container_of(c, struct tegra_output, connector);
 }
 
-/* from rgb.c */
-int tegra_dc_rgb_probe(struct tegra_dc *dc);
-int tegra_dc_rgb_remove(struct tegra_dc *dc);
-int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
-int tegra_dc_rgb_exit(struct tegra_dc *dc);
-
 /* from output.c */
 int tegra_output_probe(struct tegra_output *output);
 void tegra_output_remove(struct tegra_output *output);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 6ea070da7718dbb68e4246544aa04b300a48002c..9a8ea93016a97bf03bb135fd4dfb6fa581054d6b 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -36,7 +36,7 @@ static int gr2d_init(struct host1x_client *client)
 	if (!gr2d->channel)
 		return -ENOMEM;
 
-	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
+	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
 		host1x_channel_put(gr2d->channel);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index cee2ab645cde93eba7dbbcd4cf6b83af9f7fb132..28c4ef63065bd170f03645e7523843c03c9310b8 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -46,7 +46,7 @@ static int gr3d_init(struct host1x_client *client)
 	if (!gr3d->channel)
 		return -ENOMEM;
 
-	client->syncpts[0] = host1x_syncpt_request(client->dev, flags);
+	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
 		host1x_channel_put(gr3d->channel);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 5b9d83b719431d9f2dc9430c0c39657200a1b77c..6434b3d3d1ba4da8bf2b1838df90a89267838098 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -11,6 +11,7 @@
 #include <linux/debugfs.h>
 #include <linux/gpio.h>
 #include <linux/hdmi.h>
+#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -21,6 +22,8 @@
 
 #include <sound/hda_verbs.h>
 
+#include <media/cec-notifier.h>
+
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
@@ -1663,20 +1666,15 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data)
 
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *match;
 	struct tegra_hdmi *hdmi;
 	struct resource *regs;
 	int err;
 
-	match = of_match_node(tegra_hdmi_of_match, pdev->dev.of_node);
-	if (!match)
-		return -ENODEV;
-
 	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
 	if (!hdmi)
 		return -ENOMEM;
 
-	hdmi->config = match->data;
+	hdmi->config = of_device_get_match_data(&pdev->dev);
 	hdmi->dev = &pdev->dev;
 
 	hdmi->audio_source = AUTO;
@@ -1725,6 +1723,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 		return PTR_ERR(hdmi->vdd);
 	}
 
+	hdmi->output.notifier = cec_notifier_get(&pdev->dev);
+	if (hdmi->output.notifier == NULL)
+		return -ENOMEM;
+
 	hdmi->output.dev = &pdev->dev;
 
 	err = tegra_output_probe(&hdmi->output);
@@ -1783,6 +1785,9 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 
 	tegra_output_remove(&hdmi->output);
 
+	if (hdmi->output.notifier)
+		cec_notifier_put(hdmi->output.notifier);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 595d1ec3e02e8ff6e42c0574b0d1d1218bd8210f..1cfbacea811396c9db03927561e619d43da72b72 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -11,6 +11,8 @@
 #include <drm/drm_panel.h>
 #include "drm.h"
 
+#include <media/cec-notifier.h>
+
 int tegra_output_connector_get_modes(struct drm_connector *connector)
 {
 	struct tegra_output *output = connector_to_output(connector);
@@ -32,6 +34,7 @@ int tegra_output_connector_get_modes(struct drm_connector *connector)
 	else if (output->ddc)
 		edid = drm_get_edid(connector, output->ddc);
 
+	cec_notifier_set_phys_addr_from_edid(output->notifier, edid);
 	drm_mode_connector_update_edid_property(connector, edid);
 
 	if (edid) {
@@ -68,6 +71,9 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force)
 			status = connector_status_connected;
 	}
 
+	if (status != connector_status_connected)
+		cec_notifier_phys_addr_invalidate(output->notifier);
+
 	return status;
 }
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 7ab1d1dc7cd7388491f71a17d0627db956d05fd3..4bcacd3f48613d416deea7a32cff7f58c97709b3 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2536,20 +2536,17 @@ MODULE_DEVICE_TABLE(of, tegra_sor_of_match);
 
 static int tegra_sor_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *match;
 	struct device_node *np;
 	struct tegra_sor *sor;
 	struct resource *regs;
 	int err;
 
-	match = of_match_device(tegra_sor_of_match, &pdev->dev);
-
 	sor = devm_kzalloc(&pdev->dev, sizeof(*sor), GFP_KERNEL);
 	if (!sor)
 		return -ENOMEM;
 
+	sor->soc = of_device_get_match_data(&pdev->dev);
 	sor->output.dev = sor->dev = &pdev->dev;
-	sor->soc = match->data;
 
 	sor->settings = devm_kmemdup(&pdev->dev, sor->soc->settings,
 				     sor->soc->num_settings *
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 2448229fa653b2b226fd6e9decb90f873dd10eed..18024183aa2b7af497aa1a487e5da2d27c7a38c6 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client)
 		goto detach_device;
 	}
 
-	client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
+	client->syncpts[0] = host1x_syncpt_request(client, 0);
 	if (!client->syncpts[0]) {
 		err = -ENOMEM;
 		goto free_channel;
@@ -270,29 +270,33 @@ static const struct vic_config vic_t210_config = {
 	.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
 };
 
+#define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
+
+static const struct vic_config vic_t186_config = {
+	.firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
+};
+
 static const struct of_device_id vic_match[] = {
 	{ .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config },
 	{ .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config },
+	{ .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config },
 	{ },
 };
 
 static int vic_probe(struct platform_device *pdev)
 {
-	struct vic_config *vic_config = NULL;
 	struct device *dev = &pdev->dev;
 	struct host1x_syncpt **syncpts;
 	struct resource *regs;
-	const struct of_device_id *match;
 	struct vic *vic;
 	int err;
 
-	match = of_match_device(vic_match, dev);
-	vic_config = (struct vic_config *)match->data;
-
 	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
 	if (!vic)
 		return -ENOMEM;
 
+	vic->config = of_device_get_match_data(dev);
+
 	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
@@ -321,7 +325,7 @@ static int vic_probe(struct platform_device *pdev)
 	if (err < 0)
 		return err;
 
-	err = falcon_read_firmware(&vic->falcon, vic_config->firmware);
+	err = falcon_read_firmware(&vic->falcon, vic->config->firmware);
 	if (err < 0)
 		goto exit_falcon;
 
@@ -334,7 +338,6 @@ static int vic_probe(struct platform_device *pdev)
 	vic->client.base.syncpts = syncpts;
 	vic->client.base.num_syncpts = 1;
 	vic->dev = dev;
-	vic->config = vic_config;
 
 	INIT_LIST_HEAD(&vic->client.list);
 	vic->client.ops = &vic_ops;
@@ -405,3 +408,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_124_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_210_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
+#endif
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index a1d9974cfcb52424f09003e3b5d93c16f6db1ea1..4fb61bd57aeeea34b662e04fcbc1d0b7a4226959 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -11,6 +11,7 @@ host1x-y = \
 	hw/host1x01.o \
 	hw/host1x02.o \
 	hw/host1x04.o \
-	hw/host1x05.o
+	hw/host1x05.o \
+	hw/host1x06.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index f9cde03030fd9042bea5a4528854b786cdd0451a..66ea5acee820fa637883e635cef698804c800dc9 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -403,12 +403,13 @@ static int host1x_device_add(struct host1x *host1x,
 	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
 	device->dev.dma_mask = &device->dev.coherent_dma_mask;
 	dev_set_name(&device->dev, "%s", driver->driver.name);
-	of_dma_configure(&device->dev, host1x->dev->of_node);
 	device->dev.release = host1x_device_release;
 	device->dev.of_node = host1x->dev->of_node;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
 
+	of_dma_configure(&device->dev, host1x->dev->of_node);
+
 	err = host1x_device_parse_dt(device, driver);
 	if (err < 0) {
 		kfree(device);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index db9b91d1384c1f669e5921e65405a08b0a18d182..2fb93c27c1d9dc05802f4509caf2f952b2db0e97 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -128,8 +128,7 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
  * host1x_channel_request() - Allocate a channel
  * @device: Host1x unit this channel will be used to send commands to
  *
- * Allocates a new host1x channel for @device. If there are no free channels,
- * this will sleep until one becomes available. May return NULL if CDMA
+ * Allocates a new host1x channel for @device. May return NULL if CDMA
  * initialization fails.
  */
 struct host1x_channel *host1x_channel_request(struct device *dev)
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c26b59b26c4fa35227846f54aa136..dc77ec452ffc6f796b1984b61f5eaf1c05283193 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
 	va_end(args);
 
-	o->fn(o->ctx, o->buf, len);
+	o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+
+	o->fn(o->ctx, o->buf, len, true);
 }
 
 static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799fc6331a13e8a3c235ee1a747e8427..990cce47e737b4fd1f1fa56f0b8dd07c108d86df 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
 struct host1x;
 
 struct output {
-	void (*fn)(void *ctx, const char *str, size_t len);
+	void (*fn)(void *ctx, const char *str, size_t len, bool cont);
 	void *ctx;
 	char buf[256];
 };
 
-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+				    bool cont)
 {
 	seq_write((struct seq_file *)ctx, str, len);
 }
 
-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+				   bool cont)
 {
-	pr_info("%s", str);
+	if (cont)
+		pr_cont("%s", str);
+	else
+		pr_info("%s", str);
 }
 
 void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
 
 extern unsigned int host1x_debug_trace_cmdbuf;
 
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7f22c5c37660831ec5f2078f4b0717e78947d861..773d6337aa300eb2b4dffbf8eaf646623f30c958 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -39,6 +39,17 @@
 #include "hw/host1x02.h"
 #include "hw/host1x04.h"
 #include "hw/host1x05.h"
+#include "hw/host1x06.h"
+
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	writel(v, host1x->hv_regs + r);
+}
+
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
+{
+	return readl(host1x->hv_regs + r);
+}
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -104,7 +115,19 @@ static const struct host1x_info host1x05_info = {
 	.dma_mask = DMA_BIT_MASK(34),
 };
 
+static const struct host1x_info host1x06_info = {
+	.nb_channels = 63,
+	.nb_pts = 576,
+	.nb_mlocks = 24,
+	.nb_bases = 16,
+	.init = host1x06_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(34),
+	.has_hypervisor = true,
+};
+
 static const struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
 	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
 	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
 	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
@@ -116,20 +139,37 @@ MODULE_DEVICE_TABLE(of, host1x_of_match);
 
 static int host1x_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *id;
 	struct host1x *host;
-	struct resource *regs;
+	struct resource *regs, *hv_regs = NULL;
 	int syncpt_irq;
 	int err;
 
-	id = of_match_device(host1x_of_match, &pdev->dev);
-	if (!id)
-		return -EINVAL;
+	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs) {
-		dev_err(&pdev->dev, "failed to get registers\n");
-		return -ENXIO;
+	host->info = of_device_get_match_data(&pdev->dev);
+
+	if (host->info->has_hypervisor) {
+		regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
+		if (!regs) {
+			dev_err(&pdev->dev, "failed to get vm registers\n");
+			return -ENXIO;
+		}
+
+		hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						       "hypervisor");
+		if (!hv_regs) {
+			dev_err(&pdev->dev,
+				"failed to get hypervisor registers\n");
+			return -ENXIO;
+		}
+	} else {
+		regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!regs) {
+			dev_err(&pdev->dev, "failed to get registers\n");
+			return -ENXIO;
+		}
 	}
 
 	syncpt_irq = platform_get_irq(pdev, 0);
@@ -138,15 +178,10 @@ static int host1x_probe(struct platform_device *pdev)
 		return syncpt_irq;
 	}
 
-	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
-	if (!host)
-		return -ENOMEM;
-
 	mutex_init(&host->devices_lock);
 	INIT_LIST_HEAD(&host->devices);
 	INIT_LIST_HEAD(&host->list);
 	host->dev = &pdev->dev;
-	host->info = id->data;
 
 	/* set common host1x device data */
 	platform_set_drvdata(pdev, host);
@@ -155,6 +190,12 @@ static int host1x_probe(struct platform_device *pdev)
 	if (IS_ERR(host->regs))
 		return PTR_ERR(host->regs);
 
+	if (host->info->has_hypervisor) {
+		host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
+		if (IS_ERR(host->hv_regs))
+			return PTR_ERR(host->hv_regs);
+	}
+
 	dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
 
 	if (host->info->init) {
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ffdbc15b749b9b2b6e41332fed9c60bf276fb978..50276972648060cc81d6ba0a12c42998904c019d 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
 	u32 (*load)(struct host1x_syncpt *syncpt);
 	int (*cpu_incr)(struct host1x_syncpt *syncpt);
 	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+	void (*assign_to_channel)(struct host1x_syncpt *syncpt,
+	                          struct host1x_channel *channel);
+	void (*enable_protection)(struct host1x *host);
 };
 
 struct host1x_intr_ops {
@@ -100,12 +103,14 @@ struct host1x_info {
 	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
 	unsigned int sync_offset; /* offset of syncpoint registers */
 	u64 dma_mask; /* mask of addressable memory */
+	bool has_hypervisor; /* has hypervisor registers */
 };
 
 struct host1x {
 	const struct host1x_info *info;
 
 	void __iomem *regs;
+	void __iomem *hv_regs; /* hypervisor region */
 	struct host1x_syncpt *syncpt;
 	struct host1x_syncpt_base *bases;
 	struct device *dev;
@@ -140,6 +145,8 @@ struct host1x {
 	struct list_head list;
 };
 
+void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
 void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
 u32 host1x_sync_readl(struct host1x *host1x, u32 r);
 void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
@@ -182,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
 	return host->syncpt_op->patch_wait(sp, patch_addr);
 }
 
+static inline void host1x_hw_syncpt_assign_to_channel(
+	struct host1x *host, struct host1x_syncpt *sp,
+	struct host1x_channel *ch)
+{
+	return host->syncpt_op->assign_to_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
+{
+	return host->syncpt_op->enable_protection(host);
+}
+
 static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
 			void (*syncpt_thresh_work)(struct work_struct *))
 {
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 6b231119193ee4025abf4a1fd8aa56ee045cc288..ce320534cbed39aa3fc06aa1473a9187ed671e62 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -172,6 +172,30 @@ static void cdma_stop(struct host1x_cdma *cdma)
 	mutex_unlock(&cdma->lock);
 }
 
+static void cdma_hw_cmdproc_stop(struct host1x *host, struct host1x_channel *ch,
+				 bool stop)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, stop ? 0x1 : 0x0, HOST1X_CHANNEL_CMDPROC_STOP);
+#else
+	u32 cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
+	if (stop)
+		cmdproc_stop |= BIT(ch->id);
+	else
+		cmdproc_stop &= ~BIT(ch->id);
+	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+#endif
+}
+
+static void cdma_hw_teardown(struct host1x *host, struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, 0x1, HOST1X_CHANNEL_TEARDOWN);
+#else
+	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+#endif
+}
+
 /*
  * Stops both channel's command processor and CDMA immediately.
  * Also, tears down the channel and resets corresponding module.
@@ -180,7 +204,6 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 {
 	struct host1x *host = cdma_to_host1x(cdma);
 	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
 
 	if (cdma->torndown && !cdma->running) {
 		dev_warn(host->dev, "Already torn down\n");
@@ -189,9 +212,7 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 
 	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
 
-	cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop |= BIT(ch->id);
-	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+	cdma_hw_cmdproc_stop(host, ch, true);
 
 	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
 		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
@@ -201,7 +222,7 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 			 HOST1X_CHANNEL_DMACTRL);
 
-	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+	cdma_hw_teardown(host, ch);
 
 	cdma->running = false;
 	cdma->torndown = true;
@@ -211,15 +232,12 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
 {
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
 
 	dev_dbg(host1x->dev,
 		"resuming channel (id %u, DMAGET restart = 0x%x)\n",
 		ch->id, getptr);
 
-	cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop &= ~BIT(ch->id);
-	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+	cdma_hw_cmdproc_stop(host1x, ch, false);
 
 	cdma->torndown = false;
 	cdma_timeout_restart(cdma, getptr);
@@ -232,7 +250,7 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
  */
 static void cdma_timeout_handler(struct work_struct *work)
 {
-	u32 prev_cmdproc, cmdproc_stop, syncpt_val;
+	u32 syncpt_val;
 	struct host1x_cdma *cdma;
 	struct host1x *host1x;
 	struct host1x_channel *ch;
@@ -254,12 +272,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 	}
 
 	/* stop processing to get a clean snapshot */
-	prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop = prev_cmdproc | BIT(ch->id);
-	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
-
-	dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
-		prev_cmdproc, cmdproc_stop);
+	cdma_hw_cmdproc_stop(host1x, ch, true);
 
 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
 
@@ -268,9 +281,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 		dev_dbg(host1x->dev,
 			"cdma_timeout: expired, but buffer had completed\n");
 		/* restore */
-		cmdproc_stop = prev_cmdproc & ~(BIT(ch->id));
-		host1x_sync_writel(host1x, cmdproc_stop,
-				   HOST1X_SYNC_CMDPROC_STOP);
+		cdma_hw_cmdproc_stop(host1x, ch, false);
 		mutex_unlock(&cdma->lock);
 		return;
 	}
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41caebd6ea6bb8233b8b761e36e99eb7..9af758785a112733d122e7096a08d3733b5c8942 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,8 @@ static int channel_submit(struct host1x_job *job)
 
 	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
 
+	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+
 	job->syncpt_end = syncval;
 
 	/* add a setclass for modules that require it */
@@ -178,10 +180,32 @@ static int channel_submit(struct host1x_job *job)
 	return err;
 }
 
+static void enable_gather_filter(struct host1x *host,
+				 struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	u32 val;
+
+	if (!host->hv_regs)
+		return;
+
+	val = host1x_hypervisor_readl(
+		host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+	val |= BIT(ch->id % 32);
+	host1x_hypervisor_writel(
+		host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+	host1x_ch_writel(ch,
+			 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+			 HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 			       unsigned int index)
 {
 	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+	enable_gather_filter(dev, ch);
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 7a4a3286e4a7c284e17e4940a846143730f2ad2d..989476801f9dde5595c86ca1812613f09e337dd4 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -30,6 +30,13 @@ enum {
 	HOST1X_OPCODE_IMM	= 0x04,
 	HOST1X_OPCODE_RESTART	= 0x05,
 	HOST1X_OPCODE_GATHER	= 0x06,
+	HOST1X_OPCODE_SETSTRMID = 0x07,
+	HOST1X_OPCODE_SETAPPID  = 0x08,
+	HOST1X_OPCODE_SETPYLD   = 0x09,
+	HOST1X_OPCODE_INCR_W    = 0x0a,
+	HOST1X_OPCODE_NONINCR_W = 0x0b,
+	HOST1X_OPCODE_GATHER_W  = 0x0c,
+	HOST1X_OPCODE_RESTART_W = 0x0d,
 	HOST1X_OPCODE_EXTEND	= 0x0e,
 };
 
@@ -38,67 +45,122 @@ enum {
 	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
 };
 
-static unsigned int show_channel_command(struct output *o, u32 val)
+#define INVALID_PAYLOAD				0xffffffff
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+					 u32 *payload)
 {
-	unsigned int mask, subop;
+	unsigned int mask, subop, num, opcode;
+
+	opcode = val >> 28;
 
-	switch (val >> 28) {
+	switch (opcode) {
 	case HOST1X_OPCODE_SETCLASS:
 		mask = val & 0x3f;
 		if (mask) {
-			host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+			host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
 					    val >> 6 & 0x3ff,
 					    val >> 16 & 0xfff, mask);
 			return hweight8(mask);
 		}
 
-		host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+		host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
 		return 0;
 
 	case HOST1X_OPCODE_INCR:
-		host1x_debug_output(o, "INCR(offset=%03x, [",
+		num = val & 0xffff;
+		host1x_debug_cont(o, "INCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
-		return val & 0xffff;
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
 
 	case HOST1X_OPCODE_NONINCR:
-		host1x_debug_output(o, "NONINCR(offset=%03x, [",
+		num = val & 0xffff;
+		host1x_debug_cont(o, "NONINCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
-		return val & 0xffff;
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
 
 	case HOST1X_OPCODE_MASK:
 		mask = val & 0xffff;
-		host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+		host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [",
 				    val >> 16 & 0xfff, mask);
+		if (!mask)
+			host1x_debug_cont(o, "])\n");
+
 		return hweight16(mask);
 
 	case HOST1X_OPCODE_IMM:
-		host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+		host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n",
 				    val >> 16 & 0xfff, val & 0xffff);
 		return 0;
 
 	case HOST1X_OPCODE_RESTART:
-		host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+		host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4);
 		return 0;
 
 	case HOST1X_OPCODE_GATHER:
-		host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+		host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
 				    val >> 16 & 0xfff, val >> 15 & 0x1,
 				    val >> 14 & 0x1, val & 0x3fff);
 		return 1;
 
+#if HOST1X_HW >= 6
+	case HOST1X_OPCODE_SETSTRMID:
+		host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+				  val & 0x3fffff);
+		return 0;
+
+	case HOST1X_OPCODE_SETAPPID:
+		host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+		return 0;
+
+	case HOST1X_OPCODE_SETPYLD:
+		*payload = val & 0xffff;
+		host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+		return 0;
+
+	case HOST1X_OPCODE_INCR_W:
+	case HOST1X_OPCODE_NONINCR_W:
+		host1x_debug_cont(o, "%s(offset=%06x, ",
+				  opcode == HOST1X_OPCODE_INCR_W ?
+					"INCR_W" : "NONINCR_W",
+				  val & 0x3fffff);
+		if (*payload == 0) {
+			host1x_debug_cont(o, "[])\n");
+			return 0;
+		} else if (*payload == INVALID_PAYLOAD) {
+			host1x_debug_cont(o, "unknown)\n");
+			return 0;
+		} else {
+			host1x_debug_cont(o, "[");
+			return *payload;
+		}
+
+	case HOST1X_OPCODE_GATHER_W:
+		host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+				  val & 0x3fff);
+		return 2;
+#endif
+
 	case HOST1X_OPCODE_EXTEND:
 		subop = val >> 24 & 0xf;
 		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
-			host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+			host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n",
 					    val & 0xff);
 		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
-			host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+			host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n",
 					    val & 0xff);
 		else
-			host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+			host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val);
 		return 0;
 
 	default:
+		host1x_debug_cont(o, "UNKNOWN\n");
 		return 0;
 	}
 }
@@ -110,6 +172,7 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
 	/* Map dmaget cursor to corresponding mem handle */
 	u32 offset = phys_addr - pin_addr;
 	unsigned int data_count = 0, i;
+	u32 payload = INVALID_PAYLOAD;
 
 	/*
 	 * Sometimes we're given different hardware address to the same
@@ -126,11 +189,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
 		u32 val = *(map_addr + offset / 4 + i);
 
 		if (!data_count) {
-			host1x_debug_output(o, "%08x: %08x:", addr, val);
-			data_count = show_channel_command(o, val);
+			host1x_debug_output(o, "%08x: %08x: ", addr, val);
+			data_count = show_channel_command(o, val, &payload);
 		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
+			host1x_debug_cont(o, "%08x%s", val,
+					    data_count > 1 ? ", " : "])\n");
 			data_count--;
 		}
 	}
@@ -174,138 +237,11 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 	}
 }
 
-static void host1x_debug_show_channel_cdma(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	struct host1x_cdma *cdma = &ch->cdma;
-	u32 dmaput, dmaget, dmactrl;
-	u32 cbstat, cbread;
-	u32 val, base, baseval;
-
-	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
-	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
-	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
-	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
-	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
-
-	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
-
-	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
-	    !ch->cdma.push_buffer.mapped) {
-		host1x_debug_output(o, "inactive\n\n");
-		return;
-	}
-
-	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
-	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-			HOST1X_UCLASS_WAIT_SYNCPT)
-		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
-				    cbread >> 24, cbread & 0xffffff);
-	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
-				HOST1X_CLASS_HOST1X &&
-		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
-		base = (cbread >> 16) & 0xff;
-		baseval =
-			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
-		val = cbread & 0xffff;
-		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
-				    cbread >> 24, baseval + val, base,
-				    baseval, val);
-	} else
-		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
-				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
-				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
-				    cbread);
-
-	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
-			    dmaput, dmaget, dmactrl);
-	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
-
-	show_channel_gathers(o, cdma);
-	host1x_debug_output(o, "\n");
-}
-
-static void host1x_debug_show_channel_fifo(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	u32 val, rd_ptr, wr_ptr, start, end;
-	unsigned int data_count = 0;
-
-	host1x_debug_output(o, "%u: fifo:\n", ch->id);
-
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
-	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
-	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
-		host1x_debug_output(o, "[empty]\n");
-		return;
-	}
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
-			   HOST1X_SYNC_CFPEEK_CTRL);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
-	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
-	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
-	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
-	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
-
-	do {
-		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
-				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
-				   HOST1X_SYNC_CFPEEK_CTRL);
-		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
-
-		if (!data_count) {
-			host1x_debug_output(o, "%08x:", val);
-			data_count = show_channel_command(o, val);
-		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
-			data_count--;
-		}
-
-		if (rd_ptr == end)
-			rd_ptr = start;
-		else
-			rd_ptr++;
-	} while (rd_ptr != wr_ptr);
-
-	if (data_count)
-		host1x_debug_output(o, ", ...])\n");
-	host1x_debug_output(o, "\n");
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-}
-
-static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
-{
-	unsigned int i;
-
-	host1x_debug_output(o, "---- mlocks ----\n");
-
-	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
-		u32 owner =
-			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
-		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
-			host1x_debug_output(o, "%u: locked by channel %u\n",
-				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
-		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
-			host1x_debug_output(o, "%u: locked by cpu\n", i);
-		else
-			host1x_debug_output(o, "%u: unlocked\n", i);
-	}
-
-	host1x_debug_output(o, "\n");
-}
+#if HOST1X_HW >= 6
+#include "debug_hw_1x06.c"
+#else
+#include "debug_hw_1x01.c"
+#endif
 
 static const struct host1x_debug_ops host1x_debug_ops = {
 	.show_channel_cdma = host1x_debug_show_channel_cdma,
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
new file mode 100644
index 0000000000000000000000000000000000000000..8790d5fd5f209beed1e3232970647a4329146885
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 dmaput, dmaget, dmactrl;
+	u32 cbstat, cbread;
+	u32 val, base, baseval;
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
+	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
+	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+			HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
+				    cbread >> 24, cbread & 0xffffff);
+	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
+				HOST1X_CLASS_HOST1X &&
+		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
+		base = (cbread >> 16) & 0xff;
+		baseval =
+			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
+		val = cbread & 0xffff;
+		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
+				    cbread >> 24, baseval + val, base,
+				    baseval, val);
+	} else
+		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
+				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
+				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
+				    cbread);
+
+	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
+			   HOST1X_SYNC_CFPEEK_CTRL);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
+	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
+	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
+	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
+	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
+
+	do {
+		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
+				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
+				   HOST1X_SYNC_CFPEEK_CTRL);
+		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: ", val);
+			data_count = show_channel_command(o, val, NULL);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	unsigned int i;
+
+	host1x_debug_output(o, "---- mlocks ----\n");
+
+	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+		u32 owner =
+			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
+		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by channel %u\n",
+				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
+		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by cpu\n", i);
+		else
+			host1x_debug_output(o, "%u: unlocked\n", i);
+	}
+
+	host1x_debug_output(o, "\n");
+}
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
new file mode 100644
index 0000000000000000000000000000000000000000..b503c740c022dae1cb6d9d2bacb79cd9a045e393
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2017 NVIDIA Corporation
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	u32 dmaput, dmaget, dmactrl;
+	u32 offset, class;
+	u32 ch_stat;
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	offset = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET);
+	class = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS);
+	ch_stat = host1x_ch_readl(ch, HOST1X_CHANNEL_CHANNELSTAT);
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (dmactrl & HOST1X_CHANNEL_DMACTRL_DMASTOP ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (class == HOST1X_CLASS_HOST1X && offset == HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt\n");
+	else
+		host1x_debug_output(o, "active class %02x, offset %04x\n",
+				    class, offset);
+
+	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end;
+	u32 payload = INVALID_PAYLOAD;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_STAT);
+	host1x_debug_output(o, "CMDFIFO_STAT %08x\n", val);
+	if (val & HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_RDATA);
+	host1x_debug_output(o, "CMDFIFO_RDATA %08x\n", val);
+
+	/* Peek pointer values are invalid during SLCG, so disable it */
+	host1x_hypervisor_writel(host, 0x1, HOST1X_HV_ICG_EN_OVERRIDE);
+
+	val = 0;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+	host1x_hypervisor_writel(host, val, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_PEEK_PTRS);
+	rd_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(val);
+	wr_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(val);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_SETUP(ch->id));
+	start = HOST1X_HV_CMDFIFO_SETUP_BASE_V(val);
+	end = HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(val);
+
+	do {
+		val = 0;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(rd_ptr);
+		host1x_hypervisor_writel(host, val,
+					 HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+		val = host1x_hypervisor_readl(host,
+					      HOST1X_HV_CMDFIFO_PEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%03x 0x%08x: ",
+					    rd_ptr - start, val);
+			data_count = show_channel_command(o, val, &payload);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_ICG_EN_OVERRIDE);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	/* TODO */
+}
diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c
index 859b73beb4d0c326fcaf24ae1086ff2bc876df09..bb124f8b4af8881dc549cab11d7e71a0d6470f34 100644
--- a/drivers/gpu/host1x/hw/host1x01.c
+++ b/drivers/gpu/host1x/hw/host1x01.c
@@ -21,6 +21,8 @@
 #include "host1x01_hardware.h"
 
 /* include code */
+#define HOST1X_HW 1
+
 #include "cdma_hw.c"
 #include "channel_hw.c"
 #include "debug_hw.c"
diff --git a/drivers/gpu/host1x/hw/host1x02.c b/drivers/gpu/host1x/hw/host1x02.c
index 928946c2144bd17eaa814e1ab9f5449cf17264d8..c5f85dbedb984d3d92113310bafc0ec924158fb8 100644
--- a/drivers/gpu/host1x/hw/host1x02.c
+++ b/drivers/gpu/host1x/hw/host1x02.c
@@ -21,6 +21,8 @@
 #include "host1x02_hardware.h"
 
 /* include code */
+#define HOST1X_HW 2
+
 #include "cdma_hw.c"
 #include "channel_hw.c"
 #include "debug_hw.c"
diff --git a/drivers/gpu/host1x/hw/host1x04.c b/drivers/gpu/host1x/hw/host1x04.c
index 8007c70fa9c402753b3a07569157f375282c7ec1..f102a1a7743f3f00ede873c0dc5afdb4e18d7904 100644
--- a/drivers/gpu/host1x/hw/host1x04.c
+++ b/drivers/gpu/host1x/hw/host1x04.c
@@ -21,6 +21,8 @@
 #include "host1x04_hardware.h"
 
 /* include code */
+#define HOST1X_HW 4
+
 #include "cdma_hw.c"
 #include "channel_hw.c"
 #include "debug_hw.c"
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
index 047097ce3badfd7bb238aa2e3fa448df4a25d100..2b1239d6ec6746c2b3cc847d2c3430eb320ff8d8 100644
--- a/drivers/gpu/host1x/hw/host1x05.c
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -21,6 +21,8 @@
 #include "host1x05_hardware.h"
 
 /* include code */
+#define HOST1X_HW 5
+
 #include "cdma_hw.c"
 #include "channel_hw.c"
 #include "debug_hw.c"
diff --git a/drivers/gpu/host1x/hw/host1x06.c b/drivers/gpu/host1x/hw/host1x06.c
new file mode 100644
index 0000000000000000000000000000000000000000..a66230827c598a7f2350f9cac261473d0e09e1cc
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.c
@@ -0,0 +1,44 @@
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* include hw specification */
+#include "host1x06.h"
+#include "host1x06_hardware.h"
+
+/* include code */
+#define HOST1X_HW 6
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x06_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x06.h b/drivers/gpu/host1x/hw/host1x06.h
new file mode 100644
index 0000000000000000000000000000000000000000..d9abe14892417070bc1fcc10d0c781841195a50d
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.h
@@ -0,0 +1,26 @@
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_HOST1X06_H
+#define HOST1X_HOST1X06_H
+
+struct host1x;
+
+int host1x06_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
new file mode 100644
index 0000000000000000000000000000000000000000..3039c92ea605ca8b2b864f63e6f39da1f70aef64
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -0,0 +1,142 @@
+/*
+ * Tegra host1x Register Offsets for Tegra186
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_HOST1X06_HARDWARE_H
+#define __HOST1X_HOST1X06_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x06_uclass.h"
+#include "hw_host1x06_vm.h"
+#include "hw_host1x06_hypervisor.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9d7ec97eea04338258dd523964454..2e8b635aa6607aeab086c64c510a91e08eb29956 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
 	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c1af968fbf1fe5de9d6c337eb62a7..abbbc2641ce650f966959e1759ad585ba8d7acc9 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
 }
 #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
 	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
 
 #endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
new file mode 100644
index 0000000000000000000000000000000000000000..c05dab8a178bb457f661544a764af6758e64a922
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1ac4
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x2020 + (x * 4))
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL			0x233c
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x)		(x)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x)		((x) << 16)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE		BIT(31)
+#define HOST1X_HV_CMDFIFO_PEEK_READ			0x2340
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS			0x2344
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x)		((x) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP(x)			(0x2588 + (x * 4))
+#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x)		((x) & 0xfff)
+#define HOST1X_HV_ICG_EN_OVERRIDE			0x2aa8
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
new file mode 100644
index 0000000000000000000000000000000000000000..4457486c72b05e0dd5f2b3ddade15276e5fa98c7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X06_UCLASS_H
+#define HOST1X_HW_HOST1X06_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
new file mode 100644
index 0000000000000000000000000000000000000000..e54b33902332084dc3ace55801fbfd9c366f16a7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6464 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_BASE(x)			(0x8000 + 4*(x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x8a00 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0x9384 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index 37ebb51703fa2da571495433eb9ab3cfa321ead1..3292392370900c95534489a6e39602e840ce10ae 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -72,6 +72,23 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 	}
 }
 
+static void intr_hw_init(struct host1x *host, u32 cpm)
+{
+#if HOST1X_HW < 6
+	/* disable the ip_busy_timeout. this prevents write drops */
+	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
+
+	/*
+	 * increase the auto-ack timout to the maximum value. 2d will hang
+	 * otherwise on Tegra2.
+	 */
+	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
+
+	/* update host clocks per usec */
+	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+#endif
+}
+
 static int
 _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
 			    void (*syncpt_thresh_work)(struct work_struct *))
@@ -92,17 +109,7 @@ _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
 		return err;
 	}
 
-	/* disable the ip_busy_timeout. this prevents write drops */
-	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
-
-	/*
-	 * increase the auto-ack timout to the maximum value. 2d will hang
-	 * otherwise on Tegra2.
-	 */
-	host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG);
-
-	/* update host clocks per usec */
-	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+	intr_hw_init(host, cpm);
 
 	return 0;
 }
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d607429ee7d474add9a65738153c6458de..7dfd47d74f89ff94954c33b02b80765c0512cbc0 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
 	return 0;
 }
 
+/**
+ * syncpt_assign_to_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
+				  struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	struct host1x *host = sp->host;
+
+	if (!host->hv_regs)
+		return;
+
+	host1x_sync_writel(host,
+			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+	if (!host->hv_regs)
+		return;
+
+	host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+				 HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.restore = syncpt_restore,
 	.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.load = syncpt_load,
 	.cpu_incr = syncpt_cpu_incr,
 	.patch_wait = syncpt_patch_wait,
+	.assign_to_channel = syncpt_assign_to_channel,
+	.enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce270c6958275f4b65809ffeb8445e..a2a952adc136a42172d39b2866960211468afd00 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -54,7 +54,7 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base *base)
 }
 
 static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
-						 struct device *dev,
+						 struct host1x_client *client,
 						 unsigned long flags)
 {
 	int i;
@@ -76,11 +76,11 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 	}
 
 	name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id,
-			dev ? dev_name(dev) : NULL);
+			 client ? dev_name(client->dev) : NULL);
 	if (!name)
 		goto free_base;
 
-	sp->dev = dev;
+	sp->client = client;
 	sp->name = name;
 
 	if (flags & HOST1X_SYNCPT_CLIENT_MANAGED)
@@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
 	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
+
+		/*
+		 * Unassign syncpt from channels for purposes of Tegra186
+		 * syncpoint protection. This prevents any channel from
+		 * accessing it until it is reassigned.
+		 */
+		host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
 	}
 
 	for (i = 0; i < host->info->nb_bases; i++)
@@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
 	host->bases = bases;
 
 	host1x_syncpt_restore(host);
+	host1x_hw_syncpt_enable_protection(host);
 
 	/* Allocate sync point to use for clearing waits for expired fences */
 	host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
@@ -419,7 +427,7 @@ int host1x_syncpt_init(struct host1x *host)
 
 /**
  * host1x_syncpt_request() - request a syncpoint
- * @dev: device requesting the syncpoint
+ * @client: client requesting the syncpoint
  * @flags: flags
  *
  * host1x client drivers can use this function to allocate a syncpoint for
@@ -427,12 +435,12 @@ int host1x_syncpt_init(struct host1x *host)
  * use by the client exclusively. When no longer using a syncpoint, a host1x
  * client driver needs to release it using host1x_syncpt_free().
  */
-struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
+struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
 					    unsigned long flags)
 {
-	struct host1x *host = dev_get_drvdata(dev->parent);
+	struct host1x *host = dev_get_drvdata(client->parent->parent);
 
-	return host1x_syncpt_alloc(host, dev, flags);
+	return host1x_syncpt_alloc(host, client, flags);
 }
 EXPORT_SYMBOL(host1x_syncpt_request);
 
@@ -456,7 +464,7 @@ void host1x_syncpt_free(struct host1x_syncpt *sp)
 	host1x_syncpt_base_free(sp->base);
 	kfree(sp->name);
 	sp->base = NULL;
-	sp->dev = NULL;
+	sp->client = NULL;
 	sp->name = NULL;
 	sp->client_managed = false;
 
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index f719205105ac19192bd8b7881d17ab87810696eb..9d88d37c2397232cec563221c3fbc0244422e4ea 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -44,7 +44,7 @@ struct host1x_syncpt {
 	const char *name;
 	bool client_managed;
 	struct host1x *host;
-	struct device *dev;
+	struct host1x_client *client;
 	struct host1x_syncpt_base *base;
 
 	/* interrupt data */
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 630b1a98ab58a91ce0554fef3bf5228f460cd11d..ddf7f9ca86ccd6fe3702a14b78322bae7161fe83 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -157,7 +157,7 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp);
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value);
-struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
+struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
 					    unsigned long flags);
 void host1x_syncpt_free(struct host1x_syncpt *sp);