diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 48c290b5da8c2743257cdfa9ca08db565dd7bd7b..32db806f3b5aa53e82a25797c271b8f4ca8fa2c0 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -16,7 +16,7 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
              nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
              nv40_graph.o nv50_graph.o \
-             nv40_grctx.o \
+             nv40_grctx.o nv50_grctx.o \
              nv04_instmem.o nv50_instmem.o \
              nv50_crtc.o nv50_dac.o nv50_sor.o \
              nv50_cursor.o nv50_display.o nv50_fbcon.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 0e9cd1d49130657c4aee725b8b490e75f0b4ba19..71247da17da5cd4e820dbfe953b61f524cce52c4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -311,11 +311,11 @@ valid_reg(struct nvbios *bios, uint32_t reg)
 
 	/* C51 has misaligned regs on purpose. Marvellous */
 	if (reg & 0x2 ||
-	    (reg & 0x1 && dev_priv->VBIOS.pub.chip_version != 0x51))
+	    (reg & 0x1 && dev_priv->vbios.chip_version != 0x51))
 		NV_ERROR(dev, "======= misaligned reg 0x%08X =======\n", reg);
 
 	/* warn on C51 regs that haven't been verified accessible in tracing */
-	if (reg & 0x1 && dev_priv->VBIOS.pub.chip_version == 0x51 &&
+	if (reg & 0x1 && dev_priv->vbios.chip_version == 0x51 &&
 	    reg != 0x130d && reg != 0x1311 && reg != 0x60081d)
 		NV_WARN(dev, "=== C51 misaligned reg 0x%08X not verified ===\n",
 			reg);
@@ -420,7 +420,7 @@ bios_wr32(struct nvbios *bios, uint32_t reg, uint32_t data)
 	LOG_OLD_VALUE(bios_rd32(bios, reg));
 	BIOSLOG(bios, "	Write: Reg: 0x%08X, Data: 0x%08X\n", reg, data);
 
-	if (dev_priv->VBIOS.execute) {
+	if (dev_priv->vbios.execute) {
 		still_alive();
 		nv_wr32(bios->dev, reg, data);
 	}
@@ -647,7 +647,7 @@ nv50_pll_set(struct drm_device *dev, uint32_t reg, uint32_t clk)
 	reg0 = (reg0 & 0xfff8ffff) | (pll.log2P << 16);
 	reg1 = (reg1 & 0xffff0000) | (pll.N1 << 8) | pll.M1;
 
-	if (dev_priv->VBIOS.execute) {
+	if (dev_priv->vbios.execute) {
 		still_alive();
 		nv_wr32(dev, reg + 4, reg1);
 		nv_wr32(dev, reg + 0, reg0);
@@ -689,7 +689,7 @@ setPLL(struct nvbios *bios, uint32_t reg, uint32_t clk)
 static int dcb_entry_idx_from_crtchead(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
 	/*
 	 * For the results of this function to be correct, CR44 must have been
@@ -700,7 +700,7 @@ static int dcb_entry_idx_from_crtchead(struct drm_device *dev)
 
 	uint8_t dcb_entry = NVReadVgaCrtc5758(dev, bios->state.crtchead, 0);
 
-	if (dcb_entry > bios->bdcb.dcb.entries) {
+	if (dcb_entry > bios->dcb.entries) {
 		NV_ERROR(dev, "CR58 doesn't have a valid DCB entry currently "
 				"(%02X)\n", dcb_entry);
 		dcb_entry = 0x7f;	/* unused / invalid marker */
@@ -713,25 +713,26 @@ static struct nouveau_i2c_chan *
 init_i2c_device_find(struct drm_device *dev, int i2c_index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct bios_parsed_dcb *bdcb = &dev_priv->VBIOS.bdcb;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 
 	if (i2c_index == 0xff) {
 		/* note: dcb_entry_idx_from_crtchead needs pre-script set-up */
 		int idx = dcb_entry_idx_from_crtchead(dev), shift = 0;
-		int default_indices = bdcb->i2c_default_indices;
+		int default_indices = dcb->i2c_default_indices;
 
-		if (idx != 0x7f && bdcb->dcb.entry[idx].i2c_upper_default)
+		if (idx != 0x7f && dcb->entry[idx].i2c_upper_default)
 			shift = 4;
 
 		i2c_index = (default_indices >> shift) & 0xf;
 	}
 	if (i2c_index == 0x80)	/* g80+ */
-		i2c_index = bdcb->i2c_default_indices & 0xf;
+		i2c_index = dcb->i2c_default_indices & 0xf;
 
 	return nouveau_i2c_find(dev, i2c_index);
 }
 
-static uint32_t get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
+static uint32_t
+get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
 {
 	/*
 	 * For mlv < 0x80, it is an index into a table of TMDS base addresses.
@@ -744,6 +745,7 @@ static uint32_t get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
 	const int pramdac_offset[13] = {
 		0, 0, 0x8, 0, 0x2000, 0, 0, 0, 0x2008, 0, 0, 0, 0x2000 };
 	const uint32_t pramdac_table[4] = {
@@ -756,13 +758,12 @@ static uint32_t get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
 		dcb_entry = dcb_entry_idx_from_crtchead(dev);
 		if (dcb_entry == 0x7f)
 			return 0;
-		dacoffset = pramdac_offset[
-				dev_priv->VBIOS.bdcb.dcb.entry[dcb_entry].or];
+		dacoffset = pramdac_offset[bios->dcb.entry[dcb_entry].or];
 		if (mlv == 0x81)
 			dacoffset ^= 8;
 		return 0x6808b0 + dacoffset;
 	} else {
-		if (mlv > ARRAY_SIZE(pramdac_table)) {
+		if (mlv >= ARRAY_SIZE(pramdac_table)) {
 			NV_ERROR(dev, "Magic Lookup Value too big (%02X)\n",
 									mlv);
 			return 0;
@@ -2574,19 +2575,19 @@ init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 
 	const uint32_t nv50_gpio_reg[4] = { 0xe104, 0xe108, 0xe280, 0xe284 };
 	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
-	const uint8_t *gpio_table = &bios->data[bios->bdcb.gpio_table_ptr];
+	const uint8_t *gpio_table = &bios->data[bios->dcb.gpio_table_ptr];
 	const uint8_t *gpio_entry;
 	int i;
 
 	if (!iexec->execute)
 		return 1;
 
-	if (bios->bdcb.version != 0x40) {
+	if (bios->dcb.version != 0x40) {
 		NV_ERROR(bios->dev, "DCB table not version 4.0\n");
 		return 0;
 	}
 
-	if (!bios->bdcb.gpio_table_ptr) {
+	if (!bios->dcb.gpio_table_ptr) {
 		NV_WARN(bios->dev, "Invalid pointer to INIT_8E table\n");
 		return 0;
 	}
@@ -3123,7 +3124,7 @@ run_digital_op_script(struct drm_device *dev, uint16_t scriptptr,
 		      struct dcb_entry *dcbent, int head, bool dl)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct init_exec iexec = {true, false};
 
 	NV_TRACE(dev, "0x%04X: Parsing digital output script table\n",
@@ -3140,7 +3141,7 @@ run_digital_op_script(struct drm_device *dev, uint16_t scriptptr,
 static int call_lvds_manufacturer_script(struct drm_device *dev, struct dcb_entry *dcbent, int head, enum LVDS_script script)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t sub = bios->data[bios->fp.xlated_entry + script] + (bios->fp.link_c_increment && dcbent->or & OUTPUT_C ? 1 : 0);
 	uint16_t scriptofs = ROM16(bios->data[bios->init_script_tbls_ptr + sub * 2]);
 
@@ -3194,7 +3195,7 @@ static int run_lvds_table(struct drm_device *dev, struct dcb_entry *dcbent, int
 	 * of a list of pxclks and script pointers.
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	unsigned int outputset = (dcbent->or == 4) ? 1 : 0;
 	uint16_t scriptptr = 0, clktable;
 	uint8_t clktableptr = 0;
@@ -3261,7 +3262,7 @@ int call_lvds_script(struct drm_device *dev, struct dcb_entry *dcbent, int head,
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t lvds_ver = bios->data[bios->fp.lvdsmanufacturerpointer];
 	uint32_t sel_clk_binding, sel_clk;
 	int ret;
@@ -3395,7 +3396,7 @@ static int parse_fp_mode_table(struct drm_device *dev, struct nvbios *bios)
 #ifndef __powerpc__
 		NV_ERROR(dev, "Pointer to flat panel table invalid\n");
 #endif
-		bios->pub.digital_min_front_porch = 0x4b;
+		bios->digital_min_front_porch = 0x4b;
 		return 0;
 	}
 
@@ -3428,7 +3429,7 @@ static int parse_fp_mode_table(struct drm_device *dev, struct nvbios *bios)
 		 * fptable[4] is the minimum
 		 * RAMDAC_FP_HCRTC -> RAMDAC_FP_HSYNC_START gap
 		 */
-		bios->pub.digital_min_front_porch = fptable[4];
+		bios->digital_min_front_porch = fptable[4];
 		ofs = -7;
 		break;
 	default:
@@ -3467,7 +3468,7 @@ static int parse_fp_mode_table(struct drm_device *dev, struct nvbios *bios)
 
 	/* nv4x cards need both a strap value and fpindex of 0xf to use DDC */
 	if (lth.lvds_ver > 0x10)
-		bios->pub.fp_no_ddc = fpstrapping != 0xf || fpindex != 0xf;
+		bios->fp_no_ddc = fpstrapping != 0xf || fpindex != 0xf;
 
 	/*
 	 * If either the strap or xlated fpindex value are 0xf there is no
@@ -3491,7 +3492,7 @@ static int parse_fp_mode_table(struct drm_device *dev, struct nvbios *bios)
 bool nouveau_bios_fp_mode(struct drm_device *dev, struct drm_display_mode *mode)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *mode_entry = &bios->data[bios->fp.mode_ptr];
 
 	if (!mode)	/* just checking whether we can produce a mode */
@@ -3562,11 +3563,11 @@ int nouveau_bios_parse_lvds_table(struct drm_device *dev, int pxclk, bool *dl, b
 	 * until later, when this function should be called with non-zero pxclk
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int fpstrapping = get_fp_strap(dev, bios), lvdsmanufacturerindex = 0;
 	struct lvdstableheader lth;
 	uint16_t lvdsofs;
-	int ret, chip_version = bios->pub.chip_version;
+	int ret, chip_version = bios->chip_version;
 
 	ret = parse_lvds_manufacturer_table_header(dev, bios, &lth);
 	if (ret)
@@ -3682,7 +3683,7 @@ bios_output_config_match(struct drm_device *dev, struct dcb_entry *dcbent,
 			 uint16_t record, int record_len, int record_nr)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t entry;
 	uint16_t table;
 	int i, v;
@@ -3716,7 +3717,7 @@ nouveau_bios_dp_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		      int *length)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *table;
 
 	if (!bios->display.dp_table_ptr) {
@@ -3725,7 +3726,7 @@ nouveau_bios_dp_table(struct drm_device *dev, struct dcb_entry *dcbent,
 	}
 	table = &bios->data[bios->display.dp_table_ptr];
 
-	if (table[0] != 0x21) {
+	if (table[0] != 0x20 && table[0] != 0x21) {
 		NV_ERROR(dev, "DisplayPort table version 0x%02x unknown\n",
 			 table[0]);
 		return NULL;
@@ -3765,7 +3766,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *table = &bios->data[bios->display.script_table_ptr];
 	uint8_t *otable = NULL;
 	uint16_t script;
@@ -3918,8 +3919,8 @@ int run_tmds_table(struct drm_device *dev, struct dcb_entry *dcbent, int head, i
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
-	int cv = bios->pub.chip_version;
+	struct nvbios *bios = &dev_priv->vbios;
+	int cv = bios->chip_version;
 	uint16_t clktable = 0, scriptptr;
 	uint32_t sel_clk_binding, sel_clk;
 
@@ -3978,8 +3979,8 @@ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
-	int cv = bios->pub.chip_version, pllindex = 0;
+	struct nvbios *bios = &dev_priv->vbios;
+	int cv = bios->chip_version, pllindex = 0;
 	uint8_t pll_lim_ver = 0, headerlen = 0, recordlen = 0, entries = 0;
 	uint32_t crystal_strap_mask, crystal_straps;
 
@@ -4332,7 +4333,7 @@ static void parse_bios_version(struct drm_device *dev, struct nvbios *bios, uint
 	 */
 
 	bios->major_version = bios->data[offset + 3];
-	bios->pub.chip_version = bios->data[offset + 2];
+	bios->chip_version = bios->data[offset + 2];
 	NV_TRACE(dev, "Bios version %02x.%02x.%02x.%02x\n",
 		 bios->data[offset + 3], bios->data[offset + 2],
 		 bios->data[offset + 1], bios->data[offset]);
@@ -4402,7 +4403,7 @@ static int parse_bit_A_tbl_entry(struct drm_device *dev, struct nvbios *bios, st
 	}
 
 	/* First entry is normal dac, 2nd tv-out perhaps? */
-	bios->pub.dactestval = ROM32(bios->data[load_table_ptr + headerlen]) & 0x3ff;
+	bios->dactestval = ROM32(bios->data[load_table_ptr + headerlen]) & 0x3ff;
 
 	return 0;
 }
@@ -4526,8 +4527,8 @@ static int parse_bit_i_tbl_entry(struct drm_device *dev, struct nvbios *bios, st
 		return -ENOSYS;
 	}
 
-	bios->pub.dactestval = ROM32(bios->data[daccmpoffset + dacheaderlen]);
-	bios->pub.tvdactestval = ROM32(bios->data[daccmpoffset + dacheaderlen + 4]);
+	bios->dactestval = ROM32(bios->data[daccmpoffset + dacheaderlen]);
+	bios->tvdactestval = ROM32(bios->data[daccmpoffset + dacheaderlen + 4]);
 
 	return 0;
 }
@@ -4796,11 +4797,11 @@ static int parse_bmp_structure(struct drm_device *dev, struct nvbios *bios, unsi
 	uint16_t legacy_scripts_offset, legacy_i2c_offset;
 
 	/* load needed defaults in case we can't parse this info */
-	bios->bdcb.dcb.i2c[0].write = NV_CIO_CRE_DDC_WR__INDEX;
-	bios->bdcb.dcb.i2c[0].read = NV_CIO_CRE_DDC_STATUS__INDEX;
-	bios->bdcb.dcb.i2c[1].write = NV_CIO_CRE_DDC0_WR__INDEX;
-	bios->bdcb.dcb.i2c[1].read = NV_CIO_CRE_DDC0_STATUS__INDEX;
-	bios->pub.digital_min_front_porch = 0x4b;
+	bios->dcb.i2c[0].write = NV_CIO_CRE_DDC_WR__INDEX;
+	bios->dcb.i2c[0].read = NV_CIO_CRE_DDC_STATUS__INDEX;
+	bios->dcb.i2c[1].write = NV_CIO_CRE_DDC0_WR__INDEX;
+	bios->dcb.i2c[1].read = NV_CIO_CRE_DDC0_STATUS__INDEX;
+	bios->digital_min_front_porch = 0x4b;
 	bios->fmaxvco = 256000;
 	bios->fminvco = 128000;
 	bios->fp.duallink_transition_clk = 90000;
@@ -4907,10 +4908,10 @@ static int parse_bmp_structure(struct drm_device *dev, struct nvbios *bios, unsi
 	bios->legacy.i2c_indices.crt = bios->data[legacy_i2c_offset];
 	bios->legacy.i2c_indices.tv = bios->data[legacy_i2c_offset + 1];
 	bios->legacy.i2c_indices.panel = bios->data[legacy_i2c_offset + 2];
-	bios->bdcb.dcb.i2c[0].write = bios->data[legacy_i2c_offset + 4];
-	bios->bdcb.dcb.i2c[0].read = bios->data[legacy_i2c_offset + 5];
-	bios->bdcb.dcb.i2c[1].write = bios->data[legacy_i2c_offset + 6];
-	bios->bdcb.dcb.i2c[1].read = bios->data[legacy_i2c_offset + 7];
+	bios->dcb.i2c[0].write = bios->data[legacy_i2c_offset + 4];
+	bios->dcb.i2c[0].read = bios->data[legacy_i2c_offset + 5];
+	bios->dcb.i2c[1].write = bios->data[legacy_i2c_offset + 6];
+	bios->dcb.i2c[1].read = bios->data[legacy_i2c_offset + 7];
 
 	if (bmplength > 74) {
 		bios->fmaxvco = ROM32(bmp[67]);
@@ -4984,7 +4985,8 @@ read_dcb_i2c_entry(struct drm_device *dev, int dcb_version, uint8_t *i2ctable, i
 		else
 			NV_WARN(dev,
 				"DCB I2C table has more entries than indexable "
-				"(%d entries, max index 15)\n", i2ctable[2]);
+				"(%d entries, max %d)\n", i2ctable[2],
+				DCB_MAX_NUM_I2C_ENTRIES);
 		entry_len = i2ctable[3];
 		/* [4] is i2c_default_indices, read in parse_dcb_table() */
 	}
@@ -5000,8 +5002,8 @@ read_dcb_i2c_entry(struct drm_device *dev, int dcb_version, uint8_t *i2ctable, i
 
 	if (index == 0xf)
 		return 0;
-	if (index > i2c_entries) {
-		NV_ERROR(dev, "DCB I2C index too big (%d > %d)\n",
+	if (index >= i2c_entries) {
+		NV_ERROR(dev, "DCB I2C index too big (%d >= %d)\n",
 			 index, i2ctable[2]);
 		return -ENOENT;
 	}
@@ -5036,7 +5038,7 @@ read_dcb_i2c_entry(struct drm_device *dev, int dcb_version, uint8_t *i2ctable, i
 static struct dcb_gpio_entry *
 new_gpio_entry(struct nvbios *bios)
 {
-	struct parsed_dcb_gpio *gpio = &bios->bdcb.gpio;
+	struct dcb_gpio_table *gpio = &bios->dcb.gpio;
 
 	return &gpio->entry[gpio->entries++];
 }
@@ -5045,14 +5047,14 @@ struct dcb_gpio_entry *
 nouveau_bios_gpio_entry(struct drm_device *dev, enum dcb_gpio_tag tag)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int i;
 
-	for (i = 0; i < bios->bdcb.gpio.entries; i++) {
-		if (bios->bdcb.gpio.entry[i].tag != tag)
+	for (i = 0; i < bios->dcb.gpio.entries; i++) {
+		if (bios->dcb.gpio.entry[i].tag != tag)
 			continue;
 
-		return &bios->bdcb.gpio.entry[i];
+		return &bios->dcb.gpio.entry[i];
 	}
 
 	return NULL;
@@ -5100,7 +5102,7 @@ static void
 parse_dcb_gpio_table(struct nvbios *bios)
 {
 	struct drm_device *dev = bios->dev;
-	uint16_t gpio_table_ptr = bios->bdcb.gpio_table_ptr;
+	uint16_t gpio_table_ptr = bios->dcb.gpio_table_ptr;
 	uint8_t *gpio_table = &bios->data[gpio_table_ptr];
 	int header_len = gpio_table[1],
 	    entries = gpio_table[2],
@@ -5108,7 +5110,7 @@ parse_dcb_gpio_table(struct nvbios *bios)
 	void (*parse_entry)(struct nvbios *, uint16_t) = NULL;
 	int i;
 
-	if (bios->bdcb.version >= 0x40) {
+	if (bios->dcb.version >= 0x40) {
 		if (gpio_table_ptr && entry_len != 4) {
 			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
 			return;
@@ -5116,7 +5118,7 @@ parse_dcb_gpio_table(struct nvbios *bios)
 
 		parse_entry = parse_dcb40_gpio_entry;
 
-	} else if (bios->bdcb.version >= 0x30) {
+	} else if (bios->dcb.version >= 0x30) {
 		if (gpio_table_ptr && entry_len != 2) {
 			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
 			return;
@@ -5124,7 +5126,7 @@ parse_dcb_gpio_table(struct nvbios *bios)
 
 		parse_entry = parse_dcb30_gpio_entry;
 
-	} else if (bios->bdcb.version >= 0x22) {
+	} else if (bios->dcb.version >= 0x22) {
 		/*
 		 * DCBs older than v3.0 don't really have a GPIO
 		 * table, instead they keep some GPIO info at fixed
@@ -5158,30 +5160,67 @@ struct dcb_connector_table_entry *
 nouveau_bios_connector_entry(struct drm_device *dev, int index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct dcb_connector_table_entry *cte;
 
-	if (index >= bios->bdcb.connector.entries)
+	if (index >= bios->dcb.connector.entries)
 		return NULL;
 
-	cte = &bios->bdcb.connector.entry[index];
+	cte = &bios->dcb.connector.entry[index];
 	if (cte->type == 0xff)
 		return NULL;
 
 	return cte;
 }
 
+static enum dcb_connector_type
+divine_connector_type(struct nvbios *bios, int index)
+{
+	struct dcb_table *dcb = &bios->dcb;
+	unsigned encoders = 0, type = DCB_CONNECTOR_NONE;
+	int i;
+
+	for (i = 0; i < dcb->entries; i++) {
+		if (dcb->entry[i].connector == index)
+			encoders |= (1 << dcb->entry[i].type);
+	}
+
+	if (encoders & (1 << OUTPUT_DP)) {
+		if (encoders & (1 << OUTPUT_TMDS))
+			type = DCB_CONNECTOR_DP;
+		else
+			type = DCB_CONNECTOR_eDP;
+	} else
+	if (encoders & (1 << OUTPUT_TMDS)) {
+		if (encoders & (1 << OUTPUT_ANALOG))
+			type = DCB_CONNECTOR_DVI_I;
+		else
+			type = DCB_CONNECTOR_DVI_D;
+	} else
+	if (encoders & (1 << OUTPUT_ANALOG)) {
+		type = DCB_CONNECTOR_VGA;
+	} else
+	if (encoders & (1 << OUTPUT_LVDS)) {
+		type = DCB_CONNECTOR_LVDS;
+	} else
+	if (encoders & (1 << OUTPUT_TV)) {
+		type = DCB_CONNECTOR_TV_0;
+	}
+
+	return type;
+}
+
 static void
 parse_dcb_connector_table(struct nvbios *bios)
 {
 	struct drm_device *dev = bios->dev;
-	struct dcb_connector_table *ct = &bios->bdcb.connector;
+	struct dcb_connector_table *ct = &bios->dcb.connector;
 	struct dcb_connector_table_entry *cte;
-	uint8_t *conntab = &bios->data[bios->bdcb.connector_table_ptr];
+	uint8_t *conntab = &bios->data[bios->dcb.connector_table_ptr];
 	uint8_t *entry;
 	int i;
 
-	if (!bios->bdcb.connector_table_ptr) {
+	if (!bios->dcb.connector_table_ptr) {
 		NV_DEBUG_KMS(dev, "No DCB connector table present\n");
 		return;
 	}
@@ -5203,6 +5242,7 @@ parse_dcb_connector_table(struct nvbios *bios)
 			cte->entry = ROM16(entry[0]);
 		else
 			cte->entry = ROM32(entry[0]);
+
 		cte->type  = (cte->entry & 0x000000ff) >> 0;
 		cte->index = (cte->entry & 0x00000f00) >> 8;
 		switch (cte->entry & 0x00033000) {
@@ -5228,10 +5268,33 @@ parse_dcb_connector_table(struct nvbios *bios)
 
 		NV_INFO(dev, "  %d: 0x%08x: type 0x%02x idx %d tag 0x%02x\n",
 			i, cte->entry, cte->type, cte->index, cte->gpio_tag);
+
+		/* check for known types, fallback to guessing the type
+		 * from attached encoders if we hit an unknown.
+		 */
+		switch (cte->type) {
+		case DCB_CONNECTOR_VGA:
+		case DCB_CONNECTOR_TV_0:
+		case DCB_CONNECTOR_TV_1:
+		case DCB_CONNECTOR_TV_3:
+		case DCB_CONNECTOR_DVI_I:
+		case DCB_CONNECTOR_DVI_D:
+		case DCB_CONNECTOR_LVDS:
+		case DCB_CONNECTOR_DP:
+		case DCB_CONNECTOR_eDP:
+		case DCB_CONNECTOR_HDMI_0:
+		case DCB_CONNECTOR_HDMI_1:
+			break;
+		default:
+			cte->type = divine_connector_type(bios, cte->index);
+			NV_WARN(dev, "unknown type, using 0x%02x", cte->type);
+			break;
+		}
+
 	}
 }
 
-static struct dcb_entry *new_dcb_entry(struct parsed_dcb *dcb)
+static struct dcb_entry *new_dcb_entry(struct dcb_table *dcb)
 {
 	struct dcb_entry *entry = &dcb->entry[dcb->entries];
 
@@ -5241,7 +5304,7 @@ static struct dcb_entry *new_dcb_entry(struct parsed_dcb *dcb)
 	return entry;
 }
 
-static void fabricate_vga_output(struct parsed_dcb *dcb, int i2c, int heads)
+static void fabricate_vga_output(struct dcb_table *dcb, int i2c, int heads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5252,7 +5315,7 @@ static void fabricate_vga_output(struct parsed_dcb *dcb, int i2c, int heads)
 	/* "or" mostly unused in early gen crt modesetting, 0 is fine */
 }
 
-static void fabricate_dvi_i_output(struct parsed_dcb *dcb, bool twoHeads)
+static void fabricate_dvi_i_output(struct dcb_table *dcb, bool twoHeads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5279,7 +5342,7 @@ static void fabricate_dvi_i_output(struct parsed_dcb *dcb, bool twoHeads)
 #endif
 }
 
-static void fabricate_tv_output(struct parsed_dcb *dcb, bool twoHeads)
+static void fabricate_tv_output(struct dcb_table *dcb, bool twoHeads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5290,13 +5353,13 @@ static void fabricate_tv_output(struct parsed_dcb *dcb, bool twoHeads)
 }
 
 static bool
-parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
+parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
 		  uint32_t conn, uint32_t conf, struct dcb_entry *entry)
 {
 	entry->type = conn & 0xf;
 	entry->i2c_index = (conn >> 4) & 0xf;
 	entry->heads = (conn >> 8) & 0xf;
-	if (bdcb->version >= 0x40)
+	if (dcb->version >= 0x40)
 		entry->connector = (conn >> 12) & 0xf;
 	entry->bus = (conn >> 16) & 0xf;
 	entry->location = (conn >> 20) & 0x3;
@@ -5314,7 +5377,7 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 		 * Although the rest of a CRT conf dword is usually
 		 * zeros, mac biosen have stuff there so we must mask
 		 */
-		entry->crtconf.maxfreq = (bdcb->version < 0x30) ?
+		entry->crtconf.maxfreq = (dcb->version < 0x30) ?
 					 (conf & 0xffff) * 10 :
 					 (conf & 0xff) * 10000;
 		break;
@@ -5323,7 +5386,7 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 		uint32_t mask;
 		if (conf & 0x1)
 			entry->lvdsconf.use_straps_for_mode = true;
-		if (bdcb->version < 0x22) {
+		if (dcb->version < 0x22) {
 			mask = ~0xd;
 			/*
 			 * The laptop in bug 14567 lies and claims to not use
@@ -5347,7 +5410,7 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 			 * Until we even try to use these on G8x, it's
 			 * useless reporting unknown bits.  They all are.
 			 */
-			if (bdcb->version >= 0x40)
+			if (dcb->version >= 0x40)
 				break;
 
 			NV_ERROR(dev, "Unknown LVDS configuration bits, "
@@ -5357,7 +5420,7 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 		}
 	case OUTPUT_TV:
 	{
-		if (bdcb->version >= 0x30)
+		if (dcb->version >= 0x30)
 			entry->tvconf.has_component_output = conf & (0x8 << 4);
 		else
 			entry->tvconf.has_component_output = false;
@@ -5384,8 +5447,10 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 		break;
 	case 0xe:
 		/* weird g80 mobile type that "nv" treats as a terminator */
-		bdcb->dcb.entries--;
+		dcb->entries--;
 		return false;
+	default:
+		break;
 	}
 
 	/* unsure what DCB version introduces this, 3.0? */
@@ -5396,7 +5461,7 @@ parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
 }
 
 static bool
-parse_dcb15_entry(struct drm_device *dev, struct parsed_dcb *dcb,
+parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
 		  uint32_t conn, uint32_t conf, struct dcb_entry *entry)
 {
 	switch (conn & 0x0000000f) {
@@ -5462,27 +5527,27 @@ parse_dcb15_entry(struct drm_device *dev, struct parsed_dcb *dcb,
 	return true;
 }
 
-static bool parse_dcb_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
+static bool parse_dcb_entry(struct drm_device *dev, struct dcb_table *dcb,
 			    uint32_t conn, uint32_t conf)
 {
-	struct dcb_entry *entry = new_dcb_entry(&bdcb->dcb);
+	struct dcb_entry *entry = new_dcb_entry(dcb);
 	bool ret;
 
-	if (bdcb->version >= 0x20)
-		ret = parse_dcb20_entry(dev, bdcb, conn, conf, entry);
+	if (dcb->version >= 0x20)
+		ret = parse_dcb20_entry(dev, dcb, conn, conf, entry);
 	else
-		ret = parse_dcb15_entry(dev, &bdcb->dcb, conn, conf, entry);
+		ret = parse_dcb15_entry(dev, dcb, conn, conf, entry);
 	if (!ret)
 		return ret;
 
-	read_dcb_i2c_entry(dev, bdcb->version, bdcb->i2c_table,
-			   entry->i2c_index, &bdcb->dcb.i2c[entry->i2c_index]);
+	read_dcb_i2c_entry(dev, dcb->version, dcb->i2c_table,
+			   entry->i2c_index, &dcb->i2c[entry->i2c_index]);
 
 	return true;
 }
 
 static
-void merge_like_dcb_entries(struct drm_device *dev, struct parsed_dcb *dcb)
+void merge_like_dcb_entries(struct drm_device *dev, struct dcb_table *dcb)
 {
 	/*
 	 * DCB v2.0 lists each output combination separately.
@@ -5534,8 +5599,7 @@ static int
 parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct bios_parsed_dcb *bdcb = &bios->bdcb;
-	struct parsed_dcb *dcb;
+	struct dcb_table *dcb = &bios->dcb;
 	uint16_t dcbptr = 0, i2ctabptr = 0;
 	uint8_t *dcbtable;
 	uint8_t headerlen = 0x4, entries = DCB_MAX_NUM_ENTRIES;
@@ -5543,9 +5607,6 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 	int recordlength = 8, confofs = 4;
 	int i;
 
-	dcb = bios->pub.dcb = &bdcb->dcb;
-	dcb->entries = 0;
-
 	/* get the offset from 0x36 */
 	if (dev_priv->card_type > NV_04) {
 		dcbptr = ROM16(bios->data[0x36]);
@@ -5567,21 +5628,21 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 	dcbtable = &bios->data[dcbptr];
 
 	/* get DCB version */
-	bdcb->version = dcbtable[0];
+	dcb->version = dcbtable[0];
 	NV_TRACE(dev, "Found Display Configuration Block version %d.%d\n",
-		 bdcb->version >> 4, bdcb->version & 0xf);
+		 dcb->version >> 4, dcb->version & 0xf);
 
-	if (bdcb->version >= 0x20) { /* NV17+ */
+	if (dcb->version >= 0x20) { /* NV17+ */
 		uint32_t sig;
 
-		if (bdcb->version >= 0x30) { /* NV40+ */
+		if (dcb->version >= 0x30) { /* NV40+ */
 			headerlen = dcbtable[1];
 			entries = dcbtable[2];
 			recordlength = dcbtable[3];
 			i2ctabptr = ROM16(dcbtable[4]);
 			sig = ROM32(dcbtable[6]);
-			bdcb->gpio_table_ptr = ROM16(dcbtable[10]);
-			bdcb->connector_table_ptr = ROM16(dcbtable[20]);
+			dcb->gpio_table_ptr = ROM16(dcbtable[10]);
+			dcb->connector_table_ptr = ROM16(dcbtable[20]);
 		} else {
 			i2ctabptr = ROM16(dcbtable[2]);
 			sig = ROM32(dcbtable[4]);
@@ -5593,7 +5654,7 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 					"signature (%08X)\n", sig);
 			return -EINVAL;
 		}
-	} else if (bdcb->version >= 0x15) { /* some NV11 and NV20 */
+	} else if (dcb->version >= 0x15) { /* some NV11 and NV20 */
 		char sig[8] = { 0 };
 
 		strncpy(sig, (char *)&dcbtable[-7], 7);
@@ -5641,14 +5702,11 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 	if (!i2ctabptr)
 		NV_WARN(dev, "No pointer to DCB I2C port table\n");
 	else {
-		bdcb->i2c_table = &bios->data[i2ctabptr];
-		if (bdcb->version >= 0x30)
-			bdcb->i2c_default_indices = bdcb->i2c_table[4];
+		dcb->i2c_table = &bios->data[i2ctabptr];
+		if (dcb->version >= 0x30)
+			dcb->i2c_default_indices = dcb->i2c_table[4];
 	}
 
-	parse_dcb_gpio_table(bios);
-	parse_dcb_connector_table(bios);
-
 	if (entries > DCB_MAX_NUM_ENTRIES)
 		entries = DCB_MAX_NUM_ENTRIES;
 
@@ -5673,7 +5731,7 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 		NV_TRACEWARN(dev, "Raw DCB entry %d: %08x %08x\n",
 			     dcb->entries, connection, config);
 
-		if (!parse_dcb_entry(dev, bdcb, connection, config))
+		if (!parse_dcb_entry(dev, dcb, connection, config))
 			break;
 	}
 
@@ -5681,18 +5739,22 @@ parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 	 * apart for v2.1+ not being known for requiring merging, this
 	 * guarantees dcbent->index is the index of the entry in the rom image
 	 */
-	if (bdcb->version < 0x21)
+	if (dcb->version < 0x21)
 		merge_like_dcb_entries(dev, dcb);
 
-	return dcb->entries ? 0 : -ENXIO;
+	if (!dcb->entries)
+		return -ENXIO;
+
+	parse_dcb_gpio_table(bios);
+	parse_dcb_connector_table(bios);
+	return 0;
 }
 
 static void
 fixup_legacy_connector(struct nvbios *bios)
 {
-	struct bios_parsed_dcb *bdcb = &bios->bdcb;
-	struct parsed_dcb *dcb = &bdcb->dcb;
-	int high = 0, i;
+	struct dcb_table *dcb = &bios->dcb;
+	int i, i2c, i2c_conn[DCB_MAX_NUM_I2C_ENTRIES] = { };
 
 	/*
 	 * DCB 3.0 also has the table in most cases, but there are some cards
@@ -5700,9 +5762,11 @@ fixup_legacy_connector(struct nvbios *bios)
 	 * indices are all 0.  We don't need the connector indices on pre-G80
 	 * chips (yet?) so limit the use to DCB 4.0 and above.
 	 */
-	if (bdcb->version >= 0x40)
+	if (dcb->version >= 0x40)
 		return;
 
+	dcb->connector.entries = 0;
+
 	/*
 	 * No known connector info before v3.0, so make it up.  the rule here
 	 * is: anything on the same i2c bus is considered to be on the same
@@ -5710,37 +5774,38 @@ fixup_legacy_connector(struct nvbios *bios)
 	 * its own unique connector index.
 	 */
 	for (i = 0; i < dcb->entries; i++) {
-		if (dcb->entry[i].i2c_index == 0xf)
-			continue;
-
 		/*
 		 * Ignore the I2C index for on-chip TV-out, as there
 		 * are cards with bogus values (nv31m in bug 23212),
 		 * and it's otherwise useless.
 		 */
 		if (dcb->entry[i].type == OUTPUT_TV &&
-		    dcb->entry[i].location == DCB_LOC_ON_CHIP) {
+		    dcb->entry[i].location == DCB_LOC_ON_CHIP)
 			dcb->entry[i].i2c_index = 0xf;
+		i2c = dcb->entry[i].i2c_index;
+
+		if (i2c_conn[i2c]) {
+			dcb->entry[i].connector = i2c_conn[i2c] - 1;
 			continue;
 		}
 
-		dcb->entry[i].connector = dcb->entry[i].i2c_index;
-		if (dcb->entry[i].connector > high)
-			high = dcb->entry[i].connector;
+		dcb->entry[i].connector = dcb->connector.entries++;
+		if (i2c != 0xf)
+			i2c_conn[i2c] = dcb->connector.entries;
 	}
 
-	for (i = 0; i < dcb->entries; i++) {
-		if (dcb->entry[i].i2c_index != 0xf)
-			continue;
-
-		dcb->entry[i].connector = ++high;
+	/* Fake the connector table as well as just connector indices */
+	for (i = 0; i < dcb->connector.entries; i++) {
+		dcb->connector.entry[i].index = i;
+		dcb->connector.entry[i].type = divine_connector_type(bios, i);
+		dcb->connector.entry[i].gpio_tag = 0xff;
 	}
 }
 
 static void
 fixup_legacy_i2c(struct nvbios *bios)
 {
-	struct parsed_dcb *dcb = &bios->bdcb.dcb;
+	struct dcb_table *dcb = &bios->dcb;
 	int i;
 
 	for (i = 0; i < dcb->entries; i++) {
@@ -5826,7 +5891,7 @@ static int load_nv17_hw_sequencer_ucode(struct drm_device *dev,
 uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	const uint8_t edid_sig[] = {
 			0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
 	uint16_t offset = 0;
@@ -5859,7 +5924,7 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 			    struct dcb_entry *dcbent)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct init_exec iexec = { true, false };
 
 	mutex_lock(&bios->lock);
@@ -5872,7 +5937,7 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
 	memset(bios, 0, sizeof(struct nvbios));
 	mutex_init(&bios->lock);
@@ -5888,7 +5953,7 @@ static bool NVInitVBIOS(struct drm_device *dev)
 static int nouveau_parse_vbios_struct(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	const uint8_t bit_signature[] = { 0xff, 0xb8, 'B', 'I', 'T' };
 	const uint8_t bmp_signature[] = { 0xff, 0x7f, 'N', 'V', 0x0 };
 	int offset;
@@ -5915,7 +5980,7 @@ int
 nouveau_run_vbios_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int i, ret = 0;
 
 	NVLockVgaCrtcs(dev, false);
@@ -5946,9 +6011,9 @@ nouveau_run_vbios_init(struct drm_device *dev)
 	}
 
 	if (dev_priv->card_type >= NV_50) {
-		for (i = 0; i < bios->bdcb.dcb.entries; i++) {
+		for (i = 0; i < bios->dcb.entries; i++) {
 			nouveau_bios_run_display_table(dev,
-						       &bios->bdcb.dcb.entry[i],
+						       &bios->dcb.entry[i],
 						       0, 0);
 		}
 	}
@@ -5962,11 +6027,11 @@ static void
 nouveau_bios_i2c_devices_takedown(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct dcb_i2c_entry *entry;
 	int i;
 
-	entry = &bios->bdcb.dcb.i2c[0];
+	entry = &bios->dcb.i2c[0];
 	for (i = 0; i < DCB_MAX_NUM_I2C_ENTRIES; i++, entry++)
 		nouveau_i2c_fini(dev, entry);
 }
@@ -5975,13 +6040,11 @@ int
 nouveau_bios_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t saved_nv_pextdev_boot_0;
 	bool was_locked;
 	int ret;
 
-	dev_priv->vbios = &bios->pub;
-
 	if (!NVInitVBIOS(dev))
 		return -ENODEV;
 
@@ -6023,10 +6086,8 @@ nouveau_bios_init(struct drm_device *dev)
 	bios_wr32(bios, NV_PEXTDEV_BOOT_0, saved_nv_pextdev_boot_0);
 
 	ret = nouveau_run_vbios_init(dev);
-	if (ret) {
-		dev_priv->vbios = NULL;
+	if (ret)
 		return ret;
-	}
 
 	/* feature_byte on BMP is poor, but init always sets CR4B */
 	was_locked = NVLockVgaCrtcs(dev, false);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index fd94bd6dc2642221c572e698d0dab87402907b7d..9f688aa9a65559cb3c1febd4183e8c15b8032396 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -34,9 +34,67 @@
 
 #define DCB_LOC_ON_CHIP 0
 
+struct dcb_i2c_entry {
+	uint8_t port_type;
+	uint8_t read, write;
+	struct nouveau_i2c_chan *chan;
+};
+
+enum dcb_gpio_tag {
+	DCB_GPIO_TVDAC0 = 0xc,
+	DCB_GPIO_TVDAC1 = 0x2d,
+};
+
+struct dcb_gpio_entry {
+	enum dcb_gpio_tag tag;
+	int line;
+	bool invert;
+};
+
+struct dcb_gpio_table {
+	int entries;
+	struct dcb_gpio_entry entry[DCB_MAX_NUM_GPIO_ENTRIES];
+};
+
+enum dcb_connector_type {
+	DCB_CONNECTOR_VGA = 0x00,
+	DCB_CONNECTOR_TV_0 = 0x10,
+	DCB_CONNECTOR_TV_1 = 0x11,
+	DCB_CONNECTOR_TV_3 = 0x13,
+	DCB_CONNECTOR_DVI_I = 0x30,
+	DCB_CONNECTOR_DVI_D = 0x31,
+	DCB_CONNECTOR_LVDS = 0x40,
+	DCB_CONNECTOR_DP = 0x46,
+	DCB_CONNECTOR_eDP = 0x47,
+	DCB_CONNECTOR_HDMI_0 = 0x60,
+	DCB_CONNECTOR_HDMI_1 = 0x61,
+	DCB_CONNECTOR_NONE = 0xff
+};
+
+struct dcb_connector_table_entry {
+	uint32_t entry;
+	enum dcb_connector_type type;
+	uint8_t index;
+	uint8_t gpio_tag;
+};
+
+struct dcb_connector_table {
+	int entries;
+	struct dcb_connector_table_entry entry[DCB_MAX_NUM_CONNECTOR_ENTRIES];
+};
+
+enum dcb_type {
+	OUTPUT_ANALOG = 0,
+	OUTPUT_TV = 1,
+	OUTPUT_TMDS = 2,
+	OUTPUT_LVDS = 3,
+	OUTPUT_DP = 6,
+	OUTPUT_ANY = -1
+};
+
 struct dcb_entry {
 	int index;	/* may not be raw dcb index if merging has happened */
-	uint8_t type;
+	enum dcb_type type;
 	uint8_t i2c_index;
 	uint8_t heads;
 	uint8_t connector;
@@ -71,69 +129,22 @@ struct dcb_entry {
 	bool i2c_upper_default;
 };
 
-struct dcb_i2c_entry {
-	uint8_t port_type;
-	uint8_t read, write;
-	struct nouveau_i2c_chan *chan;
-};
+struct dcb_table {
+	uint8_t version;
 
-struct parsed_dcb {
 	int entries;
 	struct dcb_entry entry[DCB_MAX_NUM_ENTRIES];
-	struct dcb_i2c_entry i2c[DCB_MAX_NUM_I2C_ENTRIES];
-};
-
-enum dcb_gpio_tag {
-	DCB_GPIO_TVDAC0 = 0xc,
-	DCB_GPIO_TVDAC1 = 0x2d,
-};
-
-struct dcb_gpio_entry {
-	enum dcb_gpio_tag tag;
-	int line;
-	bool invert;
-};
-
-struct parsed_dcb_gpio {
-	int entries;
-	struct dcb_gpio_entry entry[DCB_MAX_NUM_GPIO_ENTRIES];
-};
-
-struct dcb_connector_table_entry {
-	uint32_t entry;
-	uint8_t type;
-	uint8_t index;
-	uint8_t gpio_tag;
-};
-
-struct dcb_connector_table {
-	int entries;
-	struct dcb_connector_table_entry entry[DCB_MAX_NUM_CONNECTOR_ENTRIES];
-};
-
-struct bios_parsed_dcb {
-	uint8_t version;
-
-	struct parsed_dcb dcb;
 
 	uint8_t *i2c_table;
 	uint8_t i2c_default_indices;
+	struct dcb_i2c_entry i2c[DCB_MAX_NUM_I2C_ENTRIES];
 
 	uint16_t gpio_table_ptr;
-	struct parsed_dcb_gpio gpio;
+	struct dcb_gpio_table gpio;
 	uint16_t connector_table_ptr;
 	struct dcb_connector_table connector;
 };
 
-enum nouveau_encoder_type {
-	OUTPUT_ANALOG = 0,
-	OUTPUT_TV = 1,
-	OUTPUT_TMDS = 2,
-	OUTPUT_LVDS = 3,
-	OUTPUT_DP = 6,
-	OUTPUT_ANY = -1
-};
-
 enum nouveau_or {
 	OUTPUT_A = (1 << 0),
 	OUTPUT_B = (1 << 1),
@@ -190,8 +201,8 @@ struct pll_lims {
 	int refclk;
 };
 
-struct nouveau_bios_info {
-	struct parsed_dcb *dcb;
+struct nvbios {
+	struct drm_device *dev;
 
 	uint8_t chip_version;
 
@@ -199,11 +210,6 @@ struct nouveau_bios_info {
 	uint32_t tvdactestval;
 	uint8_t digital_min_front_porch;
 	bool fp_no_ddc;
-};
-
-struct nvbios {
-	struct drm_device *dev;
-	struct nouveau_bios_info pub;
 
 	struct mutex lock;
 
@@ -234,7 +240,7 @@ struct nvbios {
 	uint16_t some_script_ptr; /* BIT I + 14 */
 	uint16_t init96_tbl_ptr; /* BIT I + 16 */
 
-	struct bios_parsed_dcb bdcb;
+	struct dcb_table dcb;
 
 	struct {
 		int crtchead;
diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
index ee2b84504d050f2fd68f66a8254c366485816405..88f9bc0941eb293a7cf727f261740900da5e14ba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
@@ -274,7 +274,7 @@ getMNP_single(struct drm_device *dev, struct pll_lims *pll_lim, int clk,
 	 * returns calculated clock
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int cv = dev_priv->vbios->chip_version;
+	int cv = dev_priv->vbios.chip_version;
 	int minvco = pll_lim->vco1.minfreq, maxvco = pll_lim->vco1.maxfreq;
 	int minM = pll_lim->vco1.min_m, maxM = pll_lim->vco1.max_m;
 	int minN = pll_lim->vco1.min_n, maxN = pll_lim->vco1.max_n;
@@ -373,7 +373,7 @@ getMNP_double(struct drm_device *dev, struct pll_lims *pll_lim, int clk,
 	 * returns calculated clock
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	int minvco1 = pll_lim->vco1.minfreq, maxvco1 = pll_lim->vco1.maxfreq;
 	int minvco2 = pll_lim->vco2.minfreq, maxvco2 = pll_lim->vco2.maxfreq;
 	int minU1 = pll_lim->vco1.min_inputfreq, minU2 = pll_lim->vco2.min_inputfreq;
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 2281f99da7fcef31da401ea5d31811e78e93cb93..6dfb425cbae9432f4718ae33c77b754c4ac9a8dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -35,22 +35,27 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *pb = chan->pushbuf_bo;
 	struct nouveau_gpuobj *pushbuf = NULL;
-	uint32_t start = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	int ret;
 
+	if (dev_priv->card_type >= NV_50) {
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0,
+					     dev_priv->vm_end, NV_DMA_ACCESS_RO,
+					     NV_DMA_TARGET_AGP, &pushbuf);
+		chan->pushbuf_base = pb->bo.offset;
+	} else
 	if (pb->bo.mem.mem_type == TTM_PL_TT) {
 		ret = nouveau_gpuobj_gart_dma_new(chan, 0,
 						  dev_priv->gart_info.aper_size,
 						  NV_DMA_ACCESS_RO, &pushbuf,
 						  NULL);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	} else
 	if (dev_priv->card_type != NV_04) {
 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0,
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_VIDMEM, &pushbuf);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	} else {
 		/* NV04 cmdbuf hack, from original ddx.. not sure of it's
 		 * exact reason for existing :)  PCI access to cmdbuf in
@@ -61,7 +66,7 @@ nouveau_channel_pushbuf_ctxdma_init(struct nouveau_channel *chan)
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_PCI, &pushbuf);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	}
 
 	ret = nouveau_gpuobj_ref_add(dev, chan, 0, pushbuf, &chan->pushbuf);
@@ -275,9 +280,18 @@ nouveau_channel_free(struct nouveau_channel *chan)
 	 */
 	nouveau_fence_fini(chan);
 
-	/* Ensure the channel is no longer active on the GPU */
+	/* This will prevent pfifo from switching channels. */
 	pfifo->reassign(dev, false);
 
+	/* We want to give pgraph a chance to idle and get rid of all potential
+	 * errors. We need to do this before the lock, otherwise the irq handler
+	 * is unable to process them.
+	 */
+	if (pgraph->channel(dev) == chan)
+		nouveau_wait_for_idle(dev);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	pgraph->fifo_access(dev, false);
 	if (pgraph->channel(dev) == chan)
 		pgraph->unload_context(dev);
@@ -293,6 +307,8 @@ nouveau_channel_free(struct nouveau_channel *chan)
 
 	pfifo->reassign(dev, true);
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
 	/* Release the channel's resources */
 	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
 	if (chan->pushbuf_bo) {
@@ -369,6 +385,14 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
 		return ret;
 	init->channel  = chan->id;
 
+	if (chan->dma.ib_max)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+					NOUVEAU_GEM_DOMAIN_GART;
+	else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
+	else
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+
 	init->subchan[0].handle = NvM2MF;
 	if (dev_priv->card_type < NV_50)
 		init->subchan[0].grclass = 0x0039;
@@ -408,7 +432,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
  ***********************************/
 
 struct drm_ioctl_desc nouveau_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CARD_INIT, nouveau_ioctl_card_init, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
@@ -418,13 +441,9 @@ struct drm_ioctl_desc nouveau_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL, nouveau_gem_ioctl_pushbuf_call, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PIN, nouveau_gem_ioctl_pin, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_UNPIN, nouveau_gem_ioctl_unpin, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL2, nouveau_gem_ioctl_pushbuf_call2, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index d2f63353ea9715f3be342d369e282beec55d86fb..24327f468c4b864b459fc0af3034c6548c2d3840 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -218,7 +218,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 			connector->interlace_allowed = true;
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+	if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 		drm_connector_property_set_value(connector,
 			dev->mode_config.dvi_i_subconnector_property,
 			nv_encoder->dcb->type == OUTPUT_TMDS ?
@@ -236,15 +236,17 @@ nouveau_connector_detect(struct drm_connector *connector)
 	struct nouveau_i2c_chan *i2c;
 	int type, flags;
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+	if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS)
 		nv_encoder = find_encoder_by_type(connector, OUTPUT_LVDS);
 	if (nv_encoder && nv_connector->native_mode) {
+		unsigned status = connector_status_connected;
+
 #ifdef CONFIG_ACPI
 		if (!nouveau_ignorelid && !acpi_lid_open())
-			return connector_status_disconnected;
+			status = connector_status_unknown;
 #endif
 		nouveau_connector_set_encoder(connector, nv_encoder);
-		return connector_status_connected;
+		return status;
 	}
 
 	/* Cleanup the previous EDID block. */
@@ -279,7 +281,7 @@ nouveau_connector_detect(struct drm_connector *connector)
 		 * same i2c channel so the value returned from ddc_detect
 		 * isn't necessarily correct.
 		 */
-		if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+		if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 			if (nv_connector->edid->input & DRM_EDID_INPUT_DIGITAL)
 				type = OUTPUT_TMDS;
 			else
@@ -321,11 +323,11 @@ nouveau_connector_detect(struct drm_connector *connector)
 static void
 nouveau_connector_force(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
+	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_encoder *nv_encoder;
 	int type;
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+	if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 		if (connector->force == DRM_FORCE_ON_DIGITAL)
 			type = OUTPUT_TMDS;
 		else
@@ -335,7 +337,7 @@ nouveau_connector_force(struct drm_connector *connector)
 
 	nv_encoder = find_encoder_by_type(connector, type);
 	if (!nv_encoder) {
-		NV_ERROR(dev, "can't find encoder to force %s on!\n",
+		NV_ERROR(connector->dev, "can't find encoder to force %s on!\n",
 			 drm_get_connector_name(connector));
 		connector->status = connector_status_disconnected;
 		return;
@@ -369,7 +371,7 @@ nouveau_connector_set_property(struct drm_connector *connector,
 		}
 
 		/* LVDS always needs gpu scaling */
-		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS &&
+		if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS &&
 		    value == DRM_MODE_SCALE_NONE)
 			return -EINVAL;
 
@@ -535,7 +537,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 	/* If we're not LVDS, destroy the previous native mode, the attached
 	 * monitor could have changed.
 	 */
-	if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
+	if (nv_connector->dcb->type != DCB_CONNECTOR_LVDS &&
 	    nv_connector->native_mode) {
 		drm_mode_destroy(dev, nv_connector->native_mode);
 		nv_connector->native_mode = NULL;
@@ -563,7 +565,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 		ret = get_slave_funcs(nv_encoder)->
 			get_modes(to_drm_encoder(nv_encoder), connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+	if (nv_encoder->dcb->type == OUTPUT_LVDS)
 		ret += nouveau_connector_scaler_modes_add(connector);
 
 	return ret;
@@ -613,6 +615,9 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 
 		clock *= 3;
 		break;
+	default:
+		BUG_ON(1);
+		return MODE_BAD;
 	}
 
 	if (clock < min_clock)
@@ -680,7 +685,7 @@ nouveau_connector_create_lvds(struct drm_device *dev,
 	/* Firstly try getting EDID over DDC, if allowed and I2C channel
 	 * is available.
 	 */
-	if (!dev_priv->VBIOS.pub.fp_no_ddc && nv_encoder->dcb->i2c_index < 0xf)
+	if (!dev_priv->vbios.fp_no_ddc && nv_encoder->dcb->i2c_index < 0xf)
 		i2c = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
 
 	if (i2c) {
@@ -695,7 +700,7 @@ nouveau_connector_create_lvds(struct drm_device *dev,
 	 */
 	if (!nv_connector->edid && nouveau_bios_fp_mode(dev, &native) &&
 	     (nv_encoder->dcb->lvdsconf.use_straps_for_mode ||
-	      dev_priv->VBIOS.pub.fp_no_ddc)) {
+	      dev_priv->vbios.fp_no_ddc)) {
 		nv_connector->native_mode = drm_mode_duplicate(dev, &native);
 		goto out;
 	}
@@ -704,7 +709,7 @@ nouveau_connector_create_lvds(struct drm_device *dev,
 	 * stored for the panel stored in them.
 	 */
 	if (!nv_connector->edid && !nv_connector->native_mode &&
-	    !dev_priv->VBIOS.pub.fp_no_ddc) {
+	    !dev_priv->vbios.fp_no_ddc) {
 		struct edid *edid =
 			(struct edid *)nouveau_bios_embedded_edid(dev);
 		if (edid) {
@@ -739,46 +744,66 @@ nouveau_connector_create_lvds(struct drm_device *dev,
 }
 
 int
-nouveau_connector_create(struct drm_device *dev, int index, int type)
+nouveau_connector_create(struct drm_device *dev,
+			 struct dcb_connector_table_entry *dcb)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_connector *nv_connector = NULL;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
-	int ret;
+	int ret, type;
 
 	NV_DEBUG_KMS(dev, "\n");
 
-	nv_connector = kzalloc(sizeof(*nv_connector), GFP_KERNEL);
-	if (!nv_connector)
-		return -ENOMEM;
-	nv_connector->dcb = nouveau_bios_connector_entry(dev, index);
-	connector = &nv_connector->base;
-
-	switch (type) {
-	case DRM_MODE_CONNECTOR_VGA:
+	switch (dcb->type) {
+	case DCB_CONNECTOR_NONE:
+		return 0;
+	case DCB_CONNECTOR_VGA:
 		NV_INFO(dev, "Detected a VGA connector\n");
+		type = DRM_MODE_CONNECTOR_VGA;
 		break;
-	case DRM_MODE_CONNECTOR_DVID:
-		NV_INFO(dev, "Detected a DVI-D connector\n");
+	case DCB_CONNECTOR_TV_0:
+	case DCB_CONNECTOR_TV_1:
+	case DCB_CONNECTOR_TV_3:
+		NV_INFO(dev, "Detected a TV connector\n");
+		type = DRM_MODE_CONNECTOR_TV;
 		break;
-	case DRM_MODE_CONNECTOR_DVII:
+	case DCB_CONNECTOR_DVI_I:
 		NV_INFO(dev, "Detected a DVI-I connector\n");
+		type = DRM_MODE_CONNECTOR_DVII;
 		break;
-	case DRM_MODE_CONNECTOR_LVDS:
-		NV_INFO(dev, "Detected a LVDS connector\n");
+	case DCB_CONNECTOR_DVI_D:
+		NV_INFO(dev, "Detected a DVI-D connector\n");
+		type = DRM_MODE_CONNECTOR_DVID;
 		break;
-	case DRM_MODE_CONNECTOR_TV:
-		NV_INFO(dev, "Detected a TV connector\n");
+	case DCB_CONNECTOR_HDMI_0:
+	case DCB_CONNECTOR_HDMI_1:
+		NV_INFO(dev, "Detected a HDMI connector\n");
+		type = DRM_MODE_CONNECTOR_HDMIA;
+		break;
+	case DCB_CONNECTOR_LVDS:
+		NV_INFO(dev, "Detected a LVDS connector\n");
+		type = DRM_MODE_CONNECTOR_LVDS;
 		break;
-	case DRM_MODE_CONNECTOR_DisplayPort:
+	case DCB_CONNECTOR_DP:
 		NV_INFO(dev, "Detected a DisplayPort connector\n");
+		type = DRM_MODE_CONNECTOR_DisplayPort;
 		break;
-	default:
-		NV_ERROR(dev, "Unknown connector, this is not good.\n");
+	case DCB_CONNECTOR_eDP:
+		NV_INFO(dev, "Detected an eDP connector\n");
+		type = DRM_MODE_CONNECTOR_eDP;
 		break;
+	default:
+		NV_ERROR(dev, "unknown connector type: 0x%02x!!\n", dcb->type);
+		return -EINVAL;
 	}
 
+	nv_connector = kzalloc(sizeof(*nv_connector), GFP_KERNEL);
+	if (!nv_connector)
+		return -ENOMEM;
+	nv_connector->dcb = dcb;
+	connector = &nv_connector->base;
+
 	/* defaults, will get overridden in detect() */
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
@@ -786,55 +811,65 @@ nouveau_connector_create(struct drm_device *dev, int index, int type)
 	drm_connector_init(dev, connector, &nouveau_connector_funcs, type);
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
 
+	/* attach encoders */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+
+		if (nv_encoder->dcb->connector != dcb->index)
+			continue;
+
+		if (get_slave_funcs(nv_encoder))
+			get_slave_funcs(nv_encoder)->create_resources(encoder, connector);
+
+		drm_mode_connector_attach_encoder(connector, encoder);
+	}
+
+	if (!connector->encoder_ids[0]) {
+		NV_WARN(dev, "  no encoders, ignoring\n");
+		drm_connector_cleanup(connector);
+		kfree(connector);
+		return 0;
+	}
+
 	/* Init DVI-I specific properties */
-	if (type == DRM_MODE_CONNECTOR_DVII) {
+	if (dcb->type == DCB_CONNECTOR_DVI_I) {
 		drm_mode_create_dvi_i_properties(dev);
 		drm_connector_attach_property(connector, dev->mode_config.dvi_i_subconnector_property, 0);
 		drm_connector_attach_property(connector, dev->mode_config.dvi_i_select_subconnector_property, 0);
 	}
 
-	if (type != DRM_MODE_CONNECTOR_LVDS)
+	if (dcb->type != DCB_CONNECTOR_LVDS)
 		nv_connector->use_dithering = false;
 
-	if (type == DRM_MODE_CONNECTOR_DVID ||
-	    type == DRM_MODE_CONNECTOR_DVII ||
-	    type == DRM_MODE_CONNECTOR_LVDS ||
-	    type == DRM_MODE_CONNECTOR_DisplayPort) {
-		nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
-
-		drm_connector_attach_property(connector, dev->mode_config.scaling_mode_property,
-					      nv_connector->scaling_mode);
-		drm_connector_attach_property(connector, dev->mode_config.dithering_mode_property,
-					      nv_connector->use_dithering ? DRM_MODE_DITHERING_ON
-					      : DRM_MODE_DITHERING_OFF);
-
-	} else {
-		nv_connector->scaling_mode = DRM_MODE_SCALE_NONE;
-
-		if (type == DRM_MODE_CONNECTOR_VGA  &&
-				dev_priv->card_type >= NV_50) {
+	switch (dcb->type) {
+	case DCB_CONNECTOR_VGA:
+		if (dev_priv->card_type >= NV_50) {
 			drm_connector_attach_property(connector,
 					dev->mode_config.scaling_mode_property,
 					nv_connector->scaling_mode);
 		}
-	}
-
-	/* attach encoders */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-
-		if (nv_encoder->dcb->connector != index)
-			continue;
-
-		if (get_slave_funcs(nv_encoder))
-			get_slave_funcs(nv_encoder)->create_resources(encoder, connector);
+		/* fall-through */
+	case DCB_CONNECTOR_TV_0:
+	case DCB_CONNECTOR_TV_1:
+	case DCB_CONNECTOR_TV_3:
+		nv_connector->scaling_mode = DRM_MODE_SCALE_NONE;
+		break;
+	default:
+		nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
 
-		drm_mode_connector_attach_encoder(connector, encoder);
+		drm_connector_attach_property(connector,
+				dev->mode_config.scaling_mode_property,
+				nv_connector->scaling_mode);
+		drm_connector_attach_property(connector,
+				dev->mode_config.dithering_mode_property,
+				nv_connector->use_dithering ?
+				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
+		break;
 	}
 
 	drm_sysfs_connector_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+	if (dcb->type == DCB_CONNECTOR_LVDS) {
 		ret = nouveau_connector_create_lvds(dev, connector);
 		if (ret) {
 			connector->funcs->destroy(connector);
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index 728b8090e5ff87ed39f3f91337344dbbe727e011..4ef38abc2d9cfa85b2f569b22e01d4c3f596c753 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -49,6 +49,7 @@ static inline struct nouveau_connector *nouveau_connector(
 	return container_of(con, struct nouveau_connector, base);
 }
 
-int nouveau_connector_create(struct drm_device *dev, int i2c_index, int type);
+int nouveau_connector_create(struct drm_device *,
+			     struct dcb_connector_table_entry *);
 
 #endif /* __NOUVEAU_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index d79db3698f16699e5edd3a9432357cb7b1cc10f5..8ff9ef5d4b47d1bd1df4287e0c4e109b4aaed52a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -47,12 +47,23 @@ nouveau_debugfs_channel_info(struct seq_file *m, void *data)
 	seq_printf(m, "           cur: 0x%08x\n", chan->dma.cur << 2);
 	seq_printf(m, "           put: 0x%08x\n", chan->dma.put << 2);
 	seq_printf(m, "          free: 0x%08x\n", chan->dma.free << 2);
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib max: 0x%08x\n", chan->dma.ib_max);
+		seq_printf(m, "        ib put: 0x%08x\n", chan->dma.ib_put);
+		seq_printf(m, "       ib free: 0x%08x\n", chan->dma.ib_free);
+	}
 
 	seq_printf(m, "gpu fifo state:\n");
 	seq_printf(m, "           get: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_get));
 	seq_printf(m, "           put: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_put));
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib get: 0x%08x\n",
+			   nvchan_rd32(chan, 0x88));
+		seq_printf(m, "        ib put: 0x%08x\n",
+			   nvchan_rd32(chan, 0x8c));
+	}
 
 	seq_printf(m, "last fence    : %d\n", chan->fence.sequence);
 	seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack);
@@ -133,9 +144,22 @@ nouveau_debugfs_memory_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int
+nouveau_debugfs_vbios_image(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_nouveau_private *dev_priv = node->minor->dev->dev_private;
+	int i;
+
+	for (i = 0; i < dev_priv->vbios.length; i++)
+		seq_printf(m, "%c", dev_priv->vbios.data[i]);
+	return 0;
+}
+
 static struct drm_info_list nouveau_debugfs_list[] = {
 	{ "chipset", nouveau_debugfs_chipset_info, 0, NULL },
 	{ "memory", nouveau_debugfs_memory_info, 0, NULL },
+	{ "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL },
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 50d9e67745af1e7d1c5872f39db736d2c76dfe05..c8482a108a7801c321875ecbf5ff4e041563d3e9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -32,7 +32,22 @@
 void
 nouveau_dma_pre_init(struct nouveau_channel *chan)
 {
-	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_bo *pushbuf = chan->pushbuf_bo;
+
+	if (dev_priv->card_type == NV_50) {
+		const int ib_size = pushbuf->bo.mem.size / 2;
+
+		chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2;
+		chan->dma.ib_max = (ib_size / 8) - 1;
+		chan->dma.ib_put = 0;
+		chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put;
+
+		chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2;
+	} else {
+		chan->dma.max  = (pushbuf->bo.mem.size >> 2) - 2;
+	}
+
 	chan->dma.put  = 0;
 	chan->dma.cur  = chan->dma.put;
 	chan->dma.free = chan->dma.max - chan->dma.cur;
@@ -162,12 +177,101 @@ READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 	return (val - chan->pushbuf_base) >> 2;
 }
 
+void
+nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
+	      int delta, int length)
+{
+	struct nouveau_bo *pb = chan->pushbuf_bo;
+	uint64_t offset = bo->bo.offset + delta;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
+	BUG_ON(chan->dma.ib_free < 1);
+	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
+	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+
+	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
+	nvchan_wr32(chan, 0x8c, chan->dma.ib_put);
+	chan->dma.ib_free--;
+}
+
+static int
+nv50_dma_push_wait(struct nouveau_channel *chan, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+
+	while (chan->dma.ib_free < count) {
+		uint32_t get = nvchan_rd32(chan, 0x88);
+		if (get != prev_get) {
+			prev_get = get;
+			cnt = 0;
+		}
+
+		if ((++cnt & 0xff) == 0) {
+			DRM_UDELAY(1);
+			if (cnt > 100000)
+				return -EBUSY;
+		}
+
+		chan->dma.ib_free = get - chan->dma.ib_put;
+		if (chan->dma.ib_free <= 0)
+			chan->dma.ib_free += chan->dma.ib_max + 1;
+	}
+
+	return 0;
+}
+
+static int
+nv50_dma_wait(struct nouveau_channel *chan, int slots, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+	int ret;
+
+	ret = nv50_dma_push_wait(chan, slots + 1);
+	if (unlikely(ret))
+		return ret;
+
+	while (chan->dma.free < count) {
+		int get = READ_GET(chan, &prev_get, &cnt);
+		if (unlikely(get < 0)) {
+			if (get == -EINVAL)
+				continue;
+
+			return get;
+		}
+
+		if (get <= chan->dma.cur) {
+			chan->dma.free = chan->dma.max - chan->dma.cur;
+			if (chan->dma.free >= count)
+				break;
+
+			FIRE_RING(chan);
+			do {
+				get = READ_GET(chan, &prev_get, &cnt);
+				if (unlikely(get < 0)) {
+					if (get == -EINVAL)
+						continue;
+					return get;
+				}
+			} while (get == 0);
+			chan->dma.cur = 0;
+			chan->dma.put = 0;
+		}
+
+		chan->dma.free = get - chan->dma.cur - 1;
+	}
+
+	return 0;
+}
+
 int
-nouveau_dma_wait(struct nouveau_channel *chan, int size)
+nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size)
 {
 	uint32_t prev_get = 0, cnt = 0;
 	int get;
 
+	if (chan->dma.ib_max)
+		return nv50_dma_wait(chan, slots, size);
+
 	while (chan->dma.free < size) {
 		get = READ_GET(chan, &prev_get, &cnt);
 		if (unlikely(get == -EBUSY))
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index dabfd655f93ec84e0a7f74a27f031c87c6234bfb..8b05c15866d5b9ad83bc52f78378944f90412fe9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,6 +31,9 @@
 #define NOUVEAU_DMA_DEBUG 0
 #endif
 
+void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
+		   int delta, int length);
+
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
  * to avoid this we jump to offset + SKIPS and fill the difference with
@@ -96,13 +99,11 @@ enum {
 static __must_check inline int
 RING_SPACE(struct nouveau_channel *chan, int size)
 {
-	if (chan->dma.free < size) {
-		int ret;
+	int ret;
 
-		ret = nouveau_dma_wait(chan, size);
-		if (ret)
-			return ret;
-	}
+	ret = nouveau_dma_wait(chan, 1, size);
+	if (ret)
+		return ret;
 
 	chan->dma.free -= size;
 	return 0;
@@ -146,7 +147,13 @@ FIRE_RING(struct nouveau_channel *chan)
 		return;
 	chan->accel_done = true;
 
-	WRITE_PUT(chan->dma.cur);
+	if (chan->dma.ib_max) {
+		nv50_dma_push(chan, chan->pushbuf_bo, chan->dma.put << 2,
+			      (chan->dma.cur - chan->dma.put) << 2);
+	} else {
+		WRITE_PUT(chan->dma.cur);
+	}
+
 	chan->dma.put = chan->dma.cur;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index da3b93b84502de4721f84193bbcbe1598c2b959b..874adf55a43f6778878fd15a65eca77440af0283 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -75,11 +75,11 @@ MODULE_PARM_DESC(ignorelid, "Ignore ACPI lid status");
 int nouveau_ignorelid = 0;
 module_param_named(ignorelid, nouveau_ignorelid, int, 0400);
 
-MODULE_PARM_DESC(noagp, "Disable all acceleration");
+MODULE_PARM_DESC(noaccel, "Disable all acceleration");
 int nouveau_noaccel = 0;
 module_param_named(noaccel, nouveau_noaccel, int, 0400);
 
-MODULE_PARM_DESC(noagp, "Disable fbcon acceleration");
+MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration");
 int nouveau_nofbaccel = 0;
 module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 1c15ef37b71cffc9c234f08b0337e50e1af4ff6e..2f8ce42f07257a27e1a19e620430967e079615a7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -34,7 +34,7 @@
 
 #define DRIVER_MAJOR		0
 #define DRIVER_MINOR		0
-#define DRIVER_PATCHLEVEL	15
+#define DRIVER_PATCHLEVEL	16
 
 #define NOUVEAU_FAMILY   0x0000FFFF
 #define NOUVEAU_FLAGS    0xFFFF0000
@@ -83,6 +83,7 @@ struct nouveau_bo {
 	struct drm_file *reserved_by;
 	struct list_head entry;
 	int pbbo_index;
+	bool validate_mapped;
 
 	struct nouveau_channel *channel;
 
@@ -239,6 +240,11 @@ struct nouveau_channel {
 		int cur;
 		int put;
 		/* access via pushbuf_bo */
+
+		int ib_base;
+		int ib_max;
+		int ib_free;
+		int ib_put;
 	} dma;
 
 	uint32_t sw_subchannel[8];
@@ -533,6 +539,9 @@ struct drm_nouveau_private {
 	struct nouveau_engine engine;
 	struct nouveau_channel *channel;
 
+	/* For PFIFO and PGRAPH. */
+	spinlock_t context_switch_lock;
+
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
 	struct nouveau_gpuobj *ramht;
 	uint32_t ramin_rsvd_vram;
@@ -596,8 +605,7 @@ struct drm_nouveau_private {
 
 	struct list_head gpuobj_list;
 
-	struct nvbios VBIOS;
-	struct nouveau_bios_info *vbios;
+	struct nvbios vbios;
 
 	struct nv04_mode_state mode_reg;
 	struct nv04_mode_state saved_reg;
@@ -696,12 +704,6 @@ extern bool nouveau_wait_until(struct drm_device *, uint64_t timeout,
 			       uint32_t reg, uint32_t mask, uint32_t val);
 extern bool nouveau_wait_for_idle(struct drm_device *);
 extern int  nouveau_card_init(struct drm_device *);
-extern int  nouveau_ioctl_card_init(struct drm_device *, void *data,
-				    struct drm_file *);
-extern int  nouveau_ioctl_suspend(struct drm_device *, void *data,
-				  struct drm_file *);
-extern int  nouveau_ioctl_resume(struct drm_device *, void *data,
-				 struct drm_file *);
 
 /* nouveau_mem.c */
 extern int  nouveau_mem_init_heap(struct mem_block **, uint64_t start,
@@ -845,7 +847,7 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
 /* nouveau_dma.c */
 extern void nouveau_dma_pre_init(struct nouveau_channel *);
 extern int  nouveau_dma_init(struct nouveau_channel *);
-extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
+extern int  nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
 
 /* nouveau_acpi.c */
 #ifdef CONFIG_ACPI
@@ -1027,6 +1029,7 @@ extern void nv50_graph_destroy_context(struct nouveau_channel *);
 extern int  nv50_graph_load_context(struct nouveau_channel *);
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern void nv50_graph_context_switch(struct drm_device *);
+extern int  nv50_grctx_init(struct nouveau_grctx *);
 
 /* nouveau_grctx.c */
 extern int  nouveau_grctx_prog_load(struct drm_device *);
@@ -1152,16 +1155,6 @@ extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call(struct drm_device *, void *,
-					  struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call2(struct drm_device *, void *,
-					   struct drm_file *);
-extern int nouveau_gem_ioctl_pin(struct drm_device *, void *,
-				 struct drm_file *);
-extern int nouveau_gem_ioctl_unpin(struct drm_device *, void *,
-				   struct drm_file *);
-extern int nouveau_gem_ioctl_tile(struct drm_device *, void *,
-				  struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 34063c5618999dedd131b19a582844852808ce6d..0d22f66f1c795c1784cd3078c8e87f88dd4b8044 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -241,6 +241,11 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 			nouveau_fence_unref((void *)&prev_fence);
 		}
 
+		if (unlikely(nvbo->validate_mapped)) {
+			ttm_bo_kunmap(&nvbo->kmap);
+			nvbo->validate_mapped = false;
+		}
+
 		list_del(&nvbo->entry);
 		nvbo->reserved_by = NULL;
 		ttm_bo_unreserve(&nvbo->bo);
@@ -300,11 +305,14 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
 			if (ret == -EAGAIN)
 				ret = ttm_bo_wait_unreserved(&nvbo->bo, false);
 			drm_gem_object_unreference(gem);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail reserve\n");
 				return ret;
+			}
 			goto retry;
 		}
 
+		b->user_priv = (uint64_t)(unsigned long)nvbo;
 		nvbo->reserved_by = file_priv;
 		nvbo->pbbo_index = i;
 		if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
@@ -334,8 +342,10 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
 			}
 
 			ret = ttm_bo_wait_cpu(&nvbo->bo, false);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail wait_cpu\n");
 				return ret;
+			}
 			goto retry;
 		}
 	}
@@ -349,6 +359,7 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
 {
 	struct drm_nouveau_gem_pushbuf_bo __user *upbbo =
 				(void __force __user *)(uintptr_t)user_pbbo_ptr;
+	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *nvbo;
 	int ret, relocs = 0;
 
@@ -360,39 +371,46 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
 			spin_lock(&nvbo->bo.lock);
 			ret = ttm_bo_wait(&nvbo->bo, false, false, false);
 			spin_unlock(&nvbo->bo.lock);
-			if (unlikely(ret))
+			if (unlikely(ret)) {
+				NV_ERROR(dev, "fail wait other chan\n");
 				return ret;
+			}
 		}
 
 		ret = nouveau_gem_set_domain(nvbo->gem, b->read_domains,
 					     b->write_domains,
 					     b->valid_domains);
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail set_domain\n");
 			return ret;
+		}
 
 		nvbo->channel = chan;
 		ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
 				      false, false);
 		nvbo->channel = NULL;
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail ttm_validate\n");
 			return ret;
+		}
 
-		if (nvbo->bo.offset == b->presumed_offset &&
+		if (nvbo->bo.offset == b->presumed.offset &&
 		    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
 		     (nvbo->bo.mem.mem_type == TTM_PL_TT &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_GART)))
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART)))
 			continue;
 
 		if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_GART;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART;
 		else
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_VRAM;
-		b->presumed_offset = nvbo->bo.offset;
-		b->presumed_ok = 0;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM;
+		b->presumed.offset = nvbo->bo.offset;
+		b->presumed.valid = 0;
 		relocs++;
 
-		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index], b, sizeof(*b)))
+		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed,
+				     &b->presumed, sizeof(b->presumed)))
 			return -EFAULT;
 	}
 
@@ -406,6 +424,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 			     uint64_t user_buffers, int nr_buffers,
 			     struct validate_op *op, int *apply_relocs)
 {
+	struct drm_device *dev = chan->dev;
 	int ret, relocs = 0;
 
 	INIT_LIST_HEAD(&op->vram_list);
@@ -416,11 +435,14 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		return 0;
 
 	ret = validate_init(chan, file_priv, pbbo, nr_buffers, op);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		NV_ERROR(dev, "validate_init\n");
 		return ret;
+	}
 
 	ret = validate_list(chan, &op->vram_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate vram_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -428,6 +450,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 
 	ret = validate_list(chan, &op->gart_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate gart_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -435,6 +458,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 
 	ret = validate_list(chan, &op->both_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate both_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -463,59 +487,82 @@ u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
 }
 
 static int
-nouveau_gem_pushbuf_reloc_apply(struct nouveau_channel *chan, int nr_bo,
-				struct drm_nouveau_gem_pushbuf_bo *bo,
-				unsigned nr_relocs, uint64_t ptr_relocs,
-				unsigned nr_dwords, unsigned first_dword,
-				uint32_t *pushbuf, bool is_iomem)
+nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
+				struct drm_nouveau_gem_pushbuf *req,
+				struct drm_nouveau_gem_pushbuf_bo *bo)
 {
 	struct drm_nouveau_gem_pushbuf_reloc *reloc = NULL;
-	struct drm_device *dev = chan->dev;
 	int ret = 0;
 	unsigned i;
 
-	reloc = u_memcpya(ptr_relocs, nr_relocs, sizeof(*reloc));
+	reloc = u_memcpya(req->relocs, req->nr_relocs, sizeof(*reloc));
 	if (IS_ERR(reloc))
 		return PTR_ERR(reloc);
 
-	for (i = 0; i < nr_relocs; i++) {
+	for (i = 0; i < req->nr_relocs; i++) {
 		struct drm_nouveau_gem_pushbuf_reloc *r = &reloc[i];
 		struct drm_nouveau_gem_pushbuf_bo *b;
+		struct nouveau_bo *nvbo;
 		uint32_t data;
 
-		if (r->bo_index >= nr_bo || r->reloc_index < first_dword ||
-		    r->reloc_index >= first_dword + nr_dwords) {
-			NV_ERROR(dev, "Bad relocation %d\n", i);
-			NV_ERROR(dev, "  bo: %d max %d\n", r->bo_index, nr_bo);
-			NV_ERROR(dev, "  id: %d max %d\n", r->reloc_index, nr_dwords);
+		if (unlikely(r->bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc bo index invalid\n");
 			ret = -EINVAL;
 			break;
 		}
 
 		b = &bo[r->bo_index];
-		if (b->presumed_ok)
+		if (b->presumed.valid)
 			continue;
 
+		if (unlikely(r->reloc_bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc container bo index invalid\n");
+			ret = -EINVAL;
+			break;
+		}
+		nvbo = (void *)(unsigned long)bo[r->reloc_bo_index].user_priv;
+
+		if (unlikely(r->reloc_bo_offset + 4 >
+			     nvbo->bo.mem.num_pages << PAGE_SHIFT)) {
+			NV_ERROR(dev, "reloc outside of bo\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!nvbo->kmap.virtual) {
+			ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
+					  &nvbo->kmap);
+			if (ret) {
+				NV_ERROR(dev, "failed kmap for reloc\n");
+				break;
+			}
+			nvbo->validate_mapped = true;
+		}
+
 		if (r->flags & NOUVEAU_GEM_RELOC_LOW)
-			data = b->presumed_offset + r->data;
+			data = b->presumed.offset + r->data;
 		else
 		if (r->flags & NOUVEAU_GEM_RELOC_HIGH)
-			data = (b->presumed_offset + r->data) >> 32;
+			data = (b->presumed.offset + r->data) >> 32;
 		else
 			data = r->data;
 
 		if (r->flags & NOUVEAU_GEM_RELOC_OR) {
-			if (b->presumed_domain == NOUVEAU_GEM_DOMAIN_GART)
+			if (b->presumed.domain == NOUVEAU_GEM_DOMAIN_GART)
 				data |= r->tor;
 			else
 				data |= r->vor;
 		}
 
-		if (is_iomem)
-			iowrite32_native(data, (void __force __iomem *)
-						&pushbuf[r->reloc_index]);
-		else
-			pushbuf[r->reloc_index] = data;
+		spin_lock(&nvbo->bo.lock);
+		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+		spin_unlock(&nvbo->bo.lock);
+		if (ret) {
+			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
+			break;
+		}
+
+		nouveau_bo_wr32(nvbo, r->reloc_bo_offset >> 2, data);
 	}
 
 	kfree(reloc);
@@ -526,127 +573,50 @@ int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct drm_nouveau_gem_pushbuf *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct drm_nouveau_gem_pushbuf_push *push;
+	struct drm_nouveau_gem_pushbuf_bo *bo;
 	struct nouveau_channel *chan;
 	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	uint32_t *pushbuf = NULL;
-	int ret = 0, do_reloc = 0, i;
+	struct nouveau_fence *fence = 0;
+	int i, j, ret = 0, do_reloc = 0;
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
 	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
 
-	if (req->nr_dwords >= chan->dma.max ||
-	    req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  dwords : %d max %d\n", req->nr_dwords,
-			 chan->dma.max - 1);
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
-		return -EINVAL;
-	}
-
-	pushbuf = u_memcpya(req->dwords, req->nr_dwords, sizeof(uint32_t));
-	if (IS_ERR(pushbuf))
-		return PTR_ERR(pushbuf);
-
-	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
-	if (IS_ERR(bo)) {
-		kfree(pushbuf);
-		return PTR_ERR(bo);
-	}
-
-	mutex_lock(&dev->struct_mutex);
-
-	/* Validate buffer list */
-	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
-					   req->nr_buffers, &op, &do_reloc);
-	if (ret)
-		goto out;
-
-	/* Apply any relocations that are required */
-	if (do_reloc) {
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers,
-						      bo, req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords, 0,
-						      pushbuf, false);
-		if (ret)
-			goto out;
-	}
-
-	/* Emit push buffer to the hw
-	 */
-	ret = RING_SPACE(chan, req->nr_dwords);
-	if (ret)
-		goto out;
-
-	OUT_RINGp(chan, pushbuf, req->nr_dwords);
+	req->vram_available = dev_priv->fb_aper_free;
+	req->gart_available = dev_priv->gart_info.aper_free;
+	if (unlikely(req->nr_push == 0))
+		goto out_next;
 
-	ret = nouveau_fence_new(chan, &fence, true);
-	if (ret) {
-		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
-		WIND_RING(chan);
-		goto out;
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_ERROR(dev, "pushbuf push count exceeds limit: %d max %d\n",
+			 req->nr_push, NOUVEAU_GEM_MAX_PUSH);
+		return -EINVAL;
 	}
 
-	if (nouveau_gem_pushbuf_sync(chan)) {
-		ret = nouveau_fence_wait(fence, NULL, false, false);
-		if (ret) {
-			for (i = 0; i < req->nr_dwords; i++)
-				NV_ERROR(dev, "0x%08x\n", pushbuf[i]);
-			NV_ERROR(dev, "^^ above push buffer is fail :(\n");
-		}
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_ERROR(dev, "pushbuf bo count exceeds limit: %d max %d\n",
+			 req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return -EINVAL;
 	}
 
-out:
-	validate_fini(&op, fence);
-	nouveau_fence_unref((void**)&fence);
-	mutex_unlock(&dev->struct_mutex);
-	kfree(pushbuf);
-	kfree(bo);
-	return ret;
-}
-
-#define PUSHBUF_CAL (dev_priv->card_type >= NV_20)
-
-int
-nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
-	struct nouveau_channel *chan;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *pbbo;
-	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	int i, ret = 0, do_reloc = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
-
-	if (unlikely(req->handle == 0))
-		goto out_next;
-
-	if (req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
+	if (unlikely(req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS)) {
+		NV_ERROR(dev, "pushbuf reloc count exceeds limit: %d max %d\n",
+			 req->nr_relocs, NOUVEAU_GEM_MAX_RELOCS);
 		return -EINVAL;
 	}
 
+	push = u_memcpya(req->push, req->nr_push, sizeof(*push));
+	if (IS_ERR(push))
+		return PTR_ERR(push);
+
 	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
-	if (IS_ERR(bo))
+	if (IS_ERR(bo)) {
+		kfree(push);
 		return PTR_ERR(bo);
+	}
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -658,122 +628,84 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* Validate DMA push buffer */
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem) {
-		NV_ERROR(dev, "Unknown pb handle 0x%08x\n", req->handle);
-		ret = -EINVAL;
-		goto out;
-	}
-	pbbo = nouveau_gem_object(gem);
-
-	if ((req->offset & 3) || req->nr_dwords < 2 ||
-	    (unsigned long)req->offset > (unsigned long)pbbo->bo.mem.size ||
-	    (unsigned long)req->nr_dwords >
-	     ((unsigned long)(pbbo->bo.mem.size - req->offset ) >> 2)) {
-		NV_ERROR(dev, "pb call misaligned or out of bounds: "
-			      "%d + %d * 4 > %ld\n",
-			 req->offset, req->nr_dwords, pbbo->bo.mem.size);
-		ret = -EINVAL;
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	ret = ttm_bo_reserve(&pbbo->bo, false, false, true,
-			     chan->fence.sequence);
-	if (ret) {
-		NV_ERROR(dev, "resv pb: %d\n", ret);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	nouveau_bo_placement_set(pbbo, 1 << chan->pushbuf_bo->bo.mem.mem_type);
-	ret = ttm_bo_validate(&pbbo->bo, &pbbo->placement, false, false);
-	if (ret) {
-		NV_ERROR(dev, "validate pb: %d\n", ret);
-		ttm_bo_unreserve(&pbbo->bo);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	list_add_tail(&pbbo->entry, &op.both_list);
-
-	/* If presumed return address doesn't match, we need to map the
-	 * push buffer and fix it..
-	 */
-	if (!PUSHBUF_CAL) {
-		uint32_t retaddy;
-
-		if (chan->dma.free < 4 + NOUVEAU_DMA_SKIPS) {
-			ret = nouveau_dma_wait(chan, 4 + NOUVEAU_DMA_SKIPS);
-			if (ret) {
-				NV_ERROR(dev, "jmp_space: %d\n", ret);
-				goto out;
-			}
-		}
-
-		retaddy  = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
-		retaddy |= 0x20000000;
-		if (retaddy != req->suffix0) {
-			req->suffix0 = retaddy;
-			do_reloc = 1;
-		}
-	}
-
 	/* Apply any relocations that are required */
 	if (do_reloc) {
-		void *pbvirt;
-		bool is_iomem;
-		ret = ttm_bo_kmap(&pbbo->bo, 0, pbbo->bo.mem.num_pages,
-				  &pbbo->kmap);
+		ret = nouveau_gem_pushbuf_reloc_apply(dev, req, bo);
 		if (ret) {
-			NV_ERROR(dev, "kmap pb: %d\n", ret);
+			NV_ERROR(dev, "reloc apply: %d\n", ret);
 			goto out;
 		}
+	}
 
-		pbvirt = ttm_kmap_obj_virtual(&pbbo->kmap, &is_iomem);
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers, bo,
-						      req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords,
-						      req->offset / 4,
-						      pbvirt, is_iomem);
-
-		if (!PUSHBUF_CAL) {
-			nouveau_bo_wr32(pbbo,
-					req->offset / 4 + req->nr_dwords - 2,
-					req->suffix0);
-		}
-
-		ttm_bo_kunmap(&pbbo->kmap);
+	if (chan->dma.ib_max) {
+		ret = nouveau_dma_wait(chan, req->nr_push + 1, 6);
 		if (ret) {
-			NV_ERROR(dev, "reloc apply: %d\n", ret);
+			NV_INFO(dev, "nv50cal_space: %d\n", ret);
 			goto out;
 		}
-	}
 
-	if (PUSHBUF_CAL) {
-		ret = RING_SPACE(chan, 2);
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+
+			nv50_dma_push(chan, nvbo, push[i].offset,
+				      push[i].length);
+		}
+	} else
+	if (dev_priv->card_type >= NV_20) {
+		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_ERROR(dev, "cal_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 2);
-		OUT_RING(chan, 0);
+
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 2);
+			OUT_RING(chan, 0);
+		}
 	} else {
-		ret = RING_SPACE(chan, 2 + NOUVEAU_DMA_SKIPS);
+		ret = RING_SPACE(chan, req->nr_push * (2 + NOUVEAU_DMA_SKIPS));
 		if (ret) {
 			NV_ERROR(dev, "jmp_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 0x20000000);
-		OUT_RING(chan, 0);
 
-		/* Space the jumps apart with NOPs. */
-		for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+			uint32_t cmd;
+
+			cmd = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
+			cmd |= 0x20000000;
+			if (unlikely(cmd != req->suffix0)) {
+				if (!nvbo->kmap.virtual) {
+					ret = ttm_bo_kmap(&nvbo->bo, 0,
+							  nvbo->bo.mem.
+							  num_pages,
+							  &nvbo->kmap);
+					if (ret) {
+						WIND_RING(chan);
+						goto out;
+					}
+					nvbo->validate_mapped = true;
+				}
+
+				nouveau_bo_wr32(nvbo, (push[i].offset +
+						push[i].length - 8) / 4, cmd);
+			}
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 0x20000000);
 			OUT_RING(chan, 0);
+			for (j = 0; j < NOUVEAU_DMA_SKIPS; j++)
+				OUT_RING(chan, 0);
+		}
 	}
 
 	ret = nouveau_fence_new(chan, &fence, true);
@@ -788,9 +720,14 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	nouveau_fence_unref((void**)&fence);
 	mutex_unlock(&dev->struct_mutex);
 	kfree(bo);
+	kfree(push);
 
 out_next:
-	if (PUSHBUF_CAL) {
+	if (chan->dma.ib_max) {
+		req->suffix0 = 0x00000000;
+		req->suffix1 = 0x00000000;
+	} else
+	if (dev_priv->card_type >= NV_20) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
 	} else {
@@ -802,19 +739,6 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	return ret;
 }
 
-int
-nouveau_gem_ioctl_pushbuf_call2(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-
-	req->vram_available = dev_priv->fb_aper_free;
-	req->gart_available = dev_priv->gart_info.aper_free;
-
-	return nouveau_gem_ioctl_pushbuf_call(dev, data, file_priv);
-}
-
 static inline uint32_t
 domain_to_ttm(struct nouveau_bo *nvbo, uint32_t domain)
 {
@@ -828,70 +752,6 @@ domain_to_ttm(struct nouveau_bo *nvbo, uint32_t domain)
 	return flags;
 }
 
-int
-nouveau_gem_ioctl_pin(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *nvbo;
-	int ret = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		NV_ERROR(dev, "pin only allowed without kernel modesetting\n");
-		return -EINVAL;
-	}
-
-	if (!DRM_SUSER(DRM_CURPROC))
-		return -EPERM;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-	nvbo = nouveau_gem_object(gem);
-
-	ret = nouveau_bo_pin(nvbo, domain_to_ttm(nvbo, req->domain));
-	if (ret)
-		goto out;
-
-	req->offset = nvbo->bo.offset;
-	if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-		req->domain = NOUVEAU_GEM_DOMAIN_GART;
-	else
-		req->domain = NOUVEAU_GEM_DOMAIN_VRAM;
-
-out:
-	drm_gem_object_unreference_unlocked(gem);
-
-	return ret;
-}
-
-int
-nouveau_gem_ioctl_unpin(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	int ret;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		return -EINVAL;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-
-	ret = nouveau_bo_unpin(nouveau_gem_object(gem));
-
-	drm_gem_object_unreference_unlocked(gem);
-
-	return ret;
-}
-
 int
 nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
index dc46792a5c96b9f10daf90d4ef83b17a5fdc3f2e..7855b35effc357e2a6db34db2f181a9ea5663f0f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
@@ -160,7 +160,7 @@ static void
 setPLL_single(struct drm_device *dev, uint32_t reg, struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	uint32_t oldpll = NVReadRAMDAC(dev, 0, reg);
 	int oldN = (oldpll >> 8) & 0xff, oldM = oldpll & 0xff;
 	uint32_t pll = (oldpll & 0xfff80000) | pv->log2P << 16 | pv->NM1;
@@ -216,7 +216,7 @@ setPLL_double_highregs(struct drm_device *dev, uint32_t reg1,
 		       struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	bool nv3035 = chip_version == 0x30 || chip_version == 0x35;
 	uint32_t reg2 = reg1 + ((reg1 == NV_RAMDAC_VPLL2) ? 0x5c : 0x70);
 	uint32_t oldpll1 = NVReadRAMDAC(dev, 0, reg1);
@@ -374,7 +374,7 @@ nouveau_hw_setpll(struct drm_device *dev, uint32_t reg1,
 		  struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int cv = dev_priv->vbios->chip_version;
+	int cv = dev_priv->vbios.chip_version;
 
 	if (cv == 0x30 || cv == 0x31 || cv == 0x35 || cv == 0x36 ||
 	    cv >= 0x40) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
index 70e994d281223ef9c69a563d14959c8959851bad..88583e7bf651383237187d73305222351ae7a26c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -254,16 +254,16 @@ struct nouveau_i2c_chan *
 nouveau_i2c_find(struct drm_device *dev, int index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
-	if (index > DCB_MAX_NUM_I2C_ENTRIES)
+	if (index >= DCB_MAX_NUM_I2C_ENTRIES)
 		return NULL;
 
-	if (!bios->bdcb.dcb.i2c[index].chan) {
-		if (nouveau_i2c_init(dev, &bios->bdcb.dcb.i2c[index], index))
+	if (!bios->dcb.i2c[index].chan) {
+		if (nouveau_i2c_init(dev, &bios->dcb.i2c[index], index))
 			return NULL;
 	}
 
-	return bios->bdcb.dcb.i2c[index].chan;
+	return bios->dcb.i2c[index].chan;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 447f9f69d6b14fd28b5d01de67f0771c9aefa913..95220ddebb45a0f1880e3b200e161480588a9133 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -691,11 +691,14 @@ nouveau_irq_handler(DRM_IRQ_ARGS)
 	struct drm_device *dev = (struct drm_device *)arg;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t status, fbdev_flags = 0;
+	unsigned long flags;
 
 	status = nv_rd32(dev, NV03_PMC_INTR_0);
 	if (!status)
 		return IRQ_NONE;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	if (dev_priv->fbdev_info) {
 		fbdev_flags = dev_priv->fbdev_info->flags;
 		dev_priv->fbdev_info->flags |= FBINFO_HWACCEL_DISABLED;
@@ -733,5 +736,7 @@ nouveau_irq_handler(DRM_IRQ_ARGS)
 	if (dev_priv->fbdev_info)
 		dev_priv->fbdev_info->flags = fbdev_flags;
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
 	return IRQ_HANDLED;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index a4851af5b05ec535816eeb98973675c6cbb03aa4..516a8d36cb10a638a701988a4b7f1ecd3f250318 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -391,6 +391,7 @@ nouveau_card_init(struct drm_device *dev)
 		goto out;
 	engine = &dev_priv->engine;
 	dev_priv->init_state = NOUVEAU_CARD_INIT_FAILED;
+	spin_lock_init(&dev_priv->context_switch_lock);
 
 	/* Parse BIOS tables / Run init tables if card not POSTed */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
@@ -776,13 +777,6 @@ int nouveau_unload(struct drm_device *dev)
 	return 0;
 }
 
-int
-nouveau_ioctl_card_init(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return nouveau_card_init(dev);
-}
-
 int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 						struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index 1d73b15d70daf52c52e83dcac5a9200f304c288a..1cb19e3acb554a998765c98a4aa31c708cd36caf 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -230,13 +230,13 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 	if (dcb->type == OUTPUT_TV) {
 		testval = RGB_TEST_DATA(0xa0, 0xa0, 0xa0);
 
-		if (dev_priv->vbios->tvdactestval)
-			testval = dev_priv->vbios->tvdactestval;
+		if (dev_priv->vbios.tvdactestval)
+			testval = dev_priv->vbios.tvdactestval;
 	} else {
 		testval = RGB_TEST_DATA(0x140, 0x140, 0x140); /* 0x94050140 */
 
-		if (dev_priv->vbios->dactestval)
-			testval = dev_priv->vbios->dactestval;
+		if (dev_priv->vbios.dactestval)
+			testval = dev_priv->vbios.dactestval;
 	}
 
 	saved_rtest_ctrl = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index 483f875bdb6a479f04b5387b9d20d0e713915c74..41634d4752fe08f6309d81c0b81ff0b7a65cc64f 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -269,10 +269,10 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 	regp->fp_horiz_regs[FP_TOTAL] = output_mode->htotal - 1;
 	if (!nv_gf4_disp_arch(dev) ||
 	    (output_mode->hsync_start - output_mode->hdisplay) >=
-					dev_priv->vbios->digital_min_front_porch)
+					dev_priv->vbios.digital_min_front_porch)
 		regp->fp_horiz_regs[FP_CRTC] = output_mode->hdisplay;
 	else
-		regp->fp_horiz_regs[FP_CRTC] = output_mode->hsync_start - dev_priv->vbios->digital_min_front_porch - 1;
+		regp->fp_horiz_regs[FP_CRTC] = output_mode->hsync_start - dev_priv->vbios.digital_min_front_porch - 1;
 	regp->fp_horiz_regs[FP_SYNC_START] = output_mode->hsync_start - 1;
 	regp->fp_horiz_regs[FP_SYNC_END] = output_mode->hsync_end - 1;
 	regp->fp_horiz_regs[FP_VALID_START] = output_mode->hskew;
diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index ef77215fa5b98372f780911d2e8006fce458f471..c7898b4f6dfbe0e1fb440452368d22077a0b083d 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c
@@ -93,10 +93,9 @@ int
 nv04_display_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct parsed_dcb *dcb = dev_priv->vbios->dcb;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 	struct drm_encoder *encoder;
 	struct drm_crtc *crtc;
-	uint16_t connector[16] = { 0 };
 	int i, ret;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -154,52 +153,10 @@ nv04_display_create(struct drm_device *dev)
 
 		if (ret)
 			continue;
-
-		connector[dcbent->connector] |= (1 << dcbent->type);
 	}
 
-	for (i = 0; i < dcb->entries; i++) {
-		struct dcb_entry *dcbent = &dcb->entry[i];
-		uint16_t encoders;
-		int type;
-
-		encoders = connector[dcbent->connector];
-		if (!(encoders & (1 << dcbent->type)))
-			continue;
-		connector[dcbent->connector] = 0;
-
-		switch (dcbent->type) {
-		case OUTPUT_ANALOG:
-			if (!MULTIPLE_ENCODERS(encoders))
-				type = DRM_MODE_CONNECTOR_VGA;
-			else
-				type = DRM_MODE_CONNECTOR_DVII;
-			break;
-		case OUTPUT_TMDS:
-			if (!MULTIPLE_ENCODERS(encoders))
-				type = DRM_MODE_CONNECTOR_DVID;
-			else
-				type = DRM_MODE_CONNECTOR_DVII;
-			break;
-		case OUTPUT_LVDS:
-			type = DRM_MODE_CONNECTOR_LVDS;
-#if 0
-			/* don't create i2c adapter when lvds ddc not allowed */
-			if (dcbent->lvdsconf.use_straps_for_mode ||
-			    dev_priv->vbios->fp_no_ddc)
-				i2c_index = 0xf;
-#endif
-			break;
-		case OUTPUT_TV:
-			type = DRM_MODE_CONNECTOR_TV;
-			break;
-		default:
-			type = DRM_MODE_CONNECTOR_Unknown;
-			continue;
-		}
-
-		nouveau_connector_create(dev, dcbent->connector, type);
-	}
+	for (i = 0; i < dcb->connector.entries; i++)
+		nouveau_connector_create(dev, &dcb->connector.entry[i]);
 
 	/* Save previous state */
 	NVLockVgaCrtcs(dev, false);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index fd01caabd5c3aee9efbc93bda38b3707adb14a84..3da90c2c4e634910eba368bdb691bf9684cd5b95 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -118,7 +118,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		return;
 	}
 
-	width = (image->width + 31) & ~31;
+	width = ALIGN(image->width, 32);
 	dsize = (width * image->height) >> 5;
 
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index f31347b8c9b05da1977fcd00b5108c725434f689..66fe55983b6e3b7b9cf7b14700a7c1ba5b5681c9 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -117,6 +117,7 @@ nv04_fifo_create_context(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 	int ret;
 
 	ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(chan->id), ~0,
@@ -127,6 +128,8 @@ nv04_fifo_create_context(struct nouveau_channel *chan)
 	if (ret)
 		return ret;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	/* Setup initial state */
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
@@ -144,6 +147,8 @@ nv04_fifo_create_context(struct nouveau_channel *chan)
 	/* enable the fifo dma operation */
 	nv_wr32(dev, NV04_PFIFO_MODE,
 		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_tv.c b/drivers/gpu/drm/nouveau/nv04_tv.c
index 9c63099e9c428a74f04f23095950d181c631c142..c4e3404337d4708e7b60c57e538908455f2ac235 100644
--- a/drivers/gpu/drm/nouveau/nv04_tv.c
+++ b/drivers/gpu/drm/nouveau/nv04_tv.c
@@ -262,7 +262,7 @@ int nv04_tv_create(struct drm_device *dev, struct dcb_entry *entry)
 	nv_encoder->or = ffs(entry->or) - 1;
 
 	/* Run the slave-specific initialization */
-	adap = &dev_priv->vbios->dcb->i2c[i2c_index].chan->adapter;
+	adap = &dev_priv->vbios.dcb.i2c[i2c_index].chan->adapter;
 
 	was_locked = NVLockVgaCrtcs(dev, false);
 
diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 21ac6e49b6ee52bd4891f7710e94d9356595c546..74c880374fb92a5593583b4d077bac002ec76c3c 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c
@@ -45,8 +45,8 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
 
 #define RGB_TEST_DATA(r, g, b) (r << 0 | g << 10 | b << 20)
 	testval = RGB_TEST_DATA(0x82, 0xeb, 0x82);
-	if (dev_priv->vbios->tvdactestval)
-		testval = dev_priv->vbios->tvdactestval;
+	if (dev_priv->vbios.tvdactestval)
+		testval = dev_priv->vbios.tvdactestval;
 
 	dacclk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset);
 	head = (dacclk & 0x100) >> 8;
@@ -367,7 +367,7 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
 			     !enc->crtc &&
 			     nv04_dfp_get_bound_head(dev, dcb) == head) {
 				nv04_dfp_bind_head(dev, dcb, head ^ 1,
-						dev_priv->VBIOS.fp.dual_link);
+						dev_priv->vbios.fp.dual_link);
 			}
 		}
 
diff --git a/drivers/gpu/drm/nouveau/nv40_fifo.c b/drivers/gpu/drm/nouveau/nv40_fifo.c
index b4f19ccb8b41a2c9f96f460b734aa2971bdd9ca8..6b2ef4a9fce17c135c17ebe410866ef8f60d4b30 100644
--- a/drivers/gpu/drm/nouveau/nv40_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv40_fifo.c
@@ -37,6 +37,7 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t fc = NV40_RAMFC(chan->id);
+	unsigned long flags;
 	int ret;
 
 	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
@@ -45,6 +46,8 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
 	if (ret)
 		return ret;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	nv_wi32(dev, fc +  0, chan->pushbuf_base);
 	nv_wi32(dev, fc +  4, chan->pushbuf_base);
@@ -63,6 +66,8 @@ nv40_fifo_create_context(struct nouveau_channel *chan)
 	/* enable the fifo dma operation */
 	nv_wr32(dev, NV04_PFIFO_MODE,
 		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
index f08f042a8e10de23a06c14615bac5bcaad4e6af5..1fd9537beff60c404ebded245764edb3e1845802 100644
--- a/drivers/gpu/drm/nouveau/nv50_dac.c
+++ b/drivers/gpu/drm/nouveau/nv50_dac.c
@@ -79,8 +79,8 @@ nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 	}
 
 	/* Use bios provided value if possible. */
-	if (dev_priv->vbios->dactestval) {
-		load_pattern = dev_priv->vbios->dactestval;
+	if (dev_priv->vbios.dactestval) {
+		load_pattern = dev_priv->vbios.dactestval;
 		NV_DEBUG_KMS(dev, "Using bios provided load_pattern of %d\n",
 			  load_pattern);
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 90f0bf59fbcd191dabcb5803f50537938a481405..61a89f2dc5535ecf1d74f08b9c342727997c1766 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -370,9 +370,7 @@ nv50_display_init(struct drm_device *dev)
 		struct nouveau_connector *conn = nouveau_connector(connector);
 		struct dcb_gpio_entry *gpio;
 
-		if (connector->connector_type != DRM_MODE_CONNECTOR_DVII &&
-		    connector->connector_type != DRM_MODE_CONNECTOR_DVID &&
-		    connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
+		if (conn->dcb->gpio_tag == 0xff)
 			continue;
 
 		gpio = nouveau_bios_gpio_entry(dev, conn->dcb->gpio_tag);
@@ -465,8 +463,7 @@ static int nv50_display_disable(struct drm_device *dev)
 int nv50_display_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct parsed_dcb *dcb = dev_priv->vbios->dcb;
-	uint32_t connector[16] = {};
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 	int ret, i;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -522,44 +519,13 @@ int nv50_display_create(struct drm_device *dev)
 			NV_WARN(dev, "DCB encoder %d unknown\n", entry->type);
 			continue;
 		}
-
-		connector[entry->connector] |= (1 << entry->type);
 	}
 
-	/* It appears that DCB 3.0+ VBIOS has a connector table, however,
-	 * I'm not 100% certain how to decode it correctly yet so just
-	 * look at what encoders are present on each connector index and
-	 * attempt to derive the connector type from that.
-	 */
-	for (i = 0 ; i < dcb->entries; i++) {
-		struct dcb_entry *entry = &dcb->entry[i];
-		uint16_t encoders;
-		int type;
-
-		encoders = connector[entry->connector];
-		if (!(encoders & (1 << entry->type)))
+	for (i = 0 ; i < dcb->connector.entries; i++) {
+		if (i != 0 && dcb->connector.entry[i].index ==
+			      dcb->connector.entry[i - 1].index)
 			continue;
-		connector[entry->connector] = 0;
-
-		if (encoders & (1 << OUTPUT_DP)) {
-			type = DRM_MODE_CONNECTOR_DisplayPort;
-		} else if (encoders & (1 << OUTPUT_TMDS)) {
-			if (encoders & (1 << OUTPUT_ANALOG))
-				type = DRM_MODE_CONNECTOR_DVII;
-			else
-				type = DRM_MODE_CONNECTOR_DVID;
-		} else if (encoders & (1 << OUTPUT_ANALOG)) {
-			type = DRM_MODE_CONNECTOR_VGA;
-		} else if (encoders & (1 << OUTPUT_LVDS)) {
-			type = DRM_MODE_CONNECTOR_LVDS;
-		} else {
-			type = DRM_MODE_CONNECTOR_Unknown;
-		}
-
-		if (type == DRM_MODE_CONNECTOR_Unknown)
-			continue;
-
-		nouveau_connector_create(dev, entry->connector, type);
+		nouveau_connector_create(dev, &dcb->connector.entry[i]);
 	}
 
 	ret = nv50_display_init(dev);
@@ -667,8 +633,8 @@ nv50_display_irq_head(struct drm_device *dev, int *phead,
 		return -1;
 	}
 
-	for (i = 0; i < dev_priv->vbios->dcb->entries; i++) {
-		struct dcb_entry *dcbent = &dev_priv->vbios->dcb->entry[i];
+	for (i = 0; i < dev_priv->vbios.dcb.entries; i++) {
+		struct dcb_entry *dcbent = &dev_priv->vbios.dcb.entry[i];
 
 		if (dcbent->type != type)
 			continue;
@@ -692,7 +658,7 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcbent,
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_connector *nv_connector = NULL;
 	struct drm_encoder *encoder;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t mc, script = 0, or;
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
@@ -710,7 +676,7 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcbent,
 	switch (dcbent->type) {
 	case OUTPUT_LVDS:
 		script = (mc >> 8) & 0xf;
-		if (bios->pub.fp_no_ddc) {
+		if (bios->fp_no_ddc) {
 			if (bios->fp.dual_link)
 				script |= 0x0100;
 			if (bios->fp.if_is_24bit)
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 0f57cdf7ccb23dd22ed70d581247f23e14db6596..993c7126fbded104b6ed3a16cac2e5f07f757474 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -109,7 +109,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		return;
 	}
 
-	width = (image->width + 31) & ~31;
+	width = ALIGN(image->width, 32);
 	dwords = (width * image->height) >> 5;
 
 	BEGIN_RING(chan, NvSub2D, 0x0814, 2);
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 204a79ff10f4e529479bc0136f4209282e085d8c..e20c0e2474f3b56035b3c6391b86a17e4c693f0f 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -243,6 +243,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramfc = NULL;
+	unsigned long flags;
 	int ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
@@ -278,19 +279,21 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 			return ret;
 	}
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	dev_priv->engine.instmem.prepare_access(dev, true);
 
-	nv_wo32(dev, ramfc, 0x08/4, chan->pushbuf_base);
-	nv_wo32(dev, ramfc, 0x10/4, chan->pushbuf_base);
 	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
 	nv_wo32(dev, ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4));
-	nv_wo32(dev, ramfc, 0x3c/4, 0x00086078);
 	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
 	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
 	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
 	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
 	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
-	nv_wo32(dev, ramfc, 0x4c/4, 0xffffffff);
+	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
+	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
+				    chan->dma.ib_base * 4);
+	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
 
 	if (!IS_G80) {
 		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
@@ -306,10 +309,12 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 	ret = nv50_fifo_channel_enable(dev, chan->id, false);
 	if (ret) {
 		NV_ERROR(dev, "error enabling ch%d: %d\n", chan->id, ret);
+		spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 		return ret;
 	}
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 6d504801b514823d09575b51e644395933d53122..857a09671a394c80a534d350b659261d8f56c223 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -28,30 +28,7 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
-MODULE_FIRMWARE("nouveau/nv50.ctxprog");
-MODULE_FIRMWARE("nouveau/nv50.ctxvals");
-MODULE_FIRMWARE("nouveau/nv84.ctxprog");
-MODULE_FIRMWARE("nouveau/nv84.ctxvals");
-MODULE_FIRMWARE("nouveau/nv86.ctxprog");
-MODULE_FIRMWARE("nouveau/nv86.ctxvals");
-MODULE_FIRMWARE("nouveau/nv92.ctxprog");
-MODULE_FIRMWARE("nouveau/nv92.ctxvals");
-MODULE_FIRMWARE("nouveau/nv94.ctxprog");
-MODULE_FIRMWARE("nouveau/nv94.ctxvals");
-MODULE_FIRMWARE("nouveau/nv96.ctxprog");
-MODULE_FIRMWARE("nouveau/nv96.ctxvals");
-MODULE_FIRMWARE("nouveau/nv98.ctxprog");
-MODULE_FIRMWARE("nouveau/nv98.ctxvals");
-MODULE_FIRMWARE("nouveau/nva0.ctxprog");
-MODULE_FIRMWARE("nouveau/nva0.ctxvals");
-MODULE_FIRMWARE("nouveau/nva5.ctxprog");
-MODULE_FIRMWARE("nouveau/nva5.ctxvals");
-MODULE_FIRMWARE("nouveau/nva8.ctxprog");
-MODULE_FIRMWARE("nouveau/nva8.ctxvals");
-MODULE_FIRMWARE("nouveau/nvaa.ctxprog");
-MODULE_FIRMWARE("nouveau/nvaa.ctxvals");
-MODULE_FIRMWARE("nouveau/nvac.ctxprog");
-MODULE_FIRMWARE("nouveau/nvac.ctxvals");
+#include "nouveau_grctx.h"
 
 #define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
 
@@ -111,9 +88,34 @@ nv50_graph_init_ctxctl(struct drm_device *dev)
 
 	NV_DEBUG(dev, "\n");
 
-	nouveau_grctx_prog_load(dev);
-	if (!dev_priv->engine.graph.ctxprog)
-		dev_priv->engine.graph.accel_blocked = true;
+	if (nouveau_ctxfw) {
+		nouveau_grctx_prog_load(dev);
+		dev_priv->engine.graph.grctx_size = 0x70000;
+	}
+	if (!dev_priv->engine.graph.ctxprog) {
+		struct nouveau_grctx ctx = {};
+		uint32_t *cp = kmalloc(512 * 4, GFP_KERNEL);
+		int i;
+		if (!cp) {
+			NV_ERROR(dev, "Couldn't alloc ctxprog! Disabling acceleration.\n");
+			dev_priv->engine.graph.accel_blocked = true;
+			return 0;
+		}
+		ctx.dev = dev;
+		ctx.mode = NOUVEAU_GRCTX_PROG;
+		ctx.data = cp;
+		ctx.ctxprog_max = 512;
+		if (!nv50_grctx_init(&ctx)) {
+			dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
+
+			nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+			for (i = 0; i < ctx.ctxprog_len; i++)
+				nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
+		} else {
+			dev_priv->engine.graph.accel_blocked = true;
+		}
+		kfree(cp);
+	}
 
 	nv_wr32(dev, 0x400320, 4);
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
@@ -193,13 +195,13 @@ nv50_graph_create_context(struct nouveau_channel *chan)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
 	struct nouveau_gpuobj *ctx;
-	uint32_t grctx_size = 0x70000;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	int hdr, ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, grctx_size, 0x1000,
-				     NVOBJ_FLAG_ZERO_ALLOC |
+	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+				     0x1000, NVOBJ_FLAG_ZERO_ALLOC |
 				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
 	if (ret)
 		return ret;
@@ -209,7 +211,7 @@ nv50_graph_create_context(struct nouveau_channel *chan)
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
 	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
-					   grctx_size - 1);
+					   pgraph->grctx_size - 1);
 	nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
 	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
 	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
@@ -217,7 +219,15 @@ nv50_graph_create_context(struct nouveau_channel *chan)
 	dev_priv->engine.instmem.finish_access(dev);
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	nouveau_grctx_vals_load(dev, ctx);
+	if (!pgraph->ctxprog) {
+		struct nouveau_grctx ctx = {};
+		ctx.dev = chan->dev;
+		ctx.mode = NOUVEAU_GRCTX_VALS;
+		ctx.data = chan->ramin_grctx->gpuobj;
+		nv50_grctx_init(&ctx);
+	} else {
+		nouveau_grctx_vals_load(dev, ctx);
+	}
 	nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12);
 	if ((dev_priv->chipset & 0xf0) == 0xa0)
 		nv_wo32(dev, ctx, 0x00004/4, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
new file mode 100644
index 0000000000000000000000000000000000000000..d105fcd42ca0d06febbed6db006b254928f8521e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -0,0 +1,2367 @@
+/*
+ * Copyright 2009 Marcin Kościelnicki
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define CP_FLAG_CLEAR                 0
+#define CP_FLAG_SET                   1
+#define CP_FLAG_SWAP_DIRECTION        ((0 * 32) + 0)
+#define CP_FLAG_SWAP_DIRECTION_LOAD   0
+#define CP_FLAG_SWAP_DIRECTION_SAVE   1
+#define CP_FLAG_UNK01                 ((0 * 32) + 1)
+#define CP_FLAG_UNK01_CLEAR           0
+#define CP_FLAG_UNK01_SET             1
+#define CP_FLAG_UNK03                 ((0 * 32) + 3)
+#define CP_FLAG_UNK03_CLEAR           0
+#define CP_FLAG_UNK03_SET             1
+#define CP_FLAG_USER_SAVE             ((0 * 32) + 5)
+#define CP_FLAG_USER_SAVE_NOT_PENDING 0
+#define CP_FLAG_USER_SAVE_PENDING     1
+#define CP_FLAG_USER_LOAD             ((0 * 32) + 6)
+#define CP_FLAG_USER_LOAD_NOT_PENDING 0
+#define CP_FLAG_USER_LOAD_PENDING     1
+#define CP_FLAG_UNK0B                 ((0 * 32) + 0xb)
+#define CP_FLAG_UNK0B_CLEAR           0
+#define CP_FLAG_UNK0B_SET             1
+#define CP_FLAG_UNK1D                 ((0 * 32) + 0x1d)
+#define CP_FLAG_UNK1D_CLEAR           0
+#define CP_FLAG_UNK1D_SET             1
+#define CP_FLAG_UNK20                 ((1 * 32) + 0)
+#define CP_FLAG_UNK20_CLEAR           0
+#define CP_FLAG_UNK20_SET             1
+#define CP_FLAG_STATUS                ((2 * 32) + 0)
+#define CP_FLAG_STATUS_BUSY           0
+#define CP_FLAG_STATUS_IDLE           1
+#define CP_FLAG_AUTO_SAVE             ((2 * 32) + 4)
+#define CP_FLAG_AUTO_SAVE_NOT_PENDING 0
+#define CP_FLAG_AUTO_SAVE_PENDING     1
+#define CP_FLAG_AUTO_LOAD             ((2 * 32) + 5)
+#define CP_FLAG_AUTO_LOAD_NOT_PENDING 0
+#define CP_FLAG_AUTO_LOAD_PENDING     1
+#define CP_FLAG_XFER                  ((2 * 32) + 11)
+#define CP_FLAG_XFER_IDLE             0
+#define CP_FLAG_XFER_BUSY             1
+#define CP_FLAG_NEWCTX                ((2 * 32) + 12)
+#define CP_FLAG_NEWCTX_BUSY           0
+#define CP_FLAG_NEWCTX_DONE           1
+#define CP_FLAG_ALWAYS                ((2 * 32) + 13)
+#define CP_FLAG_ALWAYS_FALSE          0
+#define CP_FLAG_ALWAYS_TRUE           1
+
+#define CP_CTX                   0x00100000
+#define CP_CTX_COUNT             0x000f0000
+#define CP_CTX_COUNT_SHIFT               16
+#define CP_CTX_REG               0x00003fff
+#define CP_LOAD_SR               0x00200000
+#define CP_LOAD_SR_VALUE         0x000fffff
+#define CP_BRA                   0x00400000
+#define CP_BRA_IP                0x0001ff00
+#define CP_BRA_IP_SHIFT                   8
+#define CP_BRA_IF_CLEAR          0x00000080
+#define CP_BRA_FLAG              0x0000007f
+#define CP_WAIT                  0x00500000
+#define CP_WAIT_SET              0x00000080
+#define CP_WAIT_FLAG             0x0000007f
+#define CP_SET                   0x00700000
+#define CP_SET_1                 0x00000080
+#define CP_SET_FLAG              0x0000007f
+#define CP_NEWCTX                0x00600004
+#define CP_NEXT_TO_SWAP          0x00600005
+#define CP_SET_CONTEXT_POINTER   0x00600006
+#define CP_SET_XFER_POINTER      0x00600007
+#define CP_ENABLE                0x00600009
+#define CP_END                   0x0060000c
+#define CP_NEXT_TO_CURRENT       0x0060000d
+#define CP_DISABLE1              0x0090ffff
+#define CP_DISABLE2              0x0091ffff
+#define CP_XFER_1      0x008000ff
+#define CP_XFER_2      0x008800ff
+#define CP_SEEK_1      0x00c000ff
+#define CP_SEEK_2      0x00c800ff
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_grctx.h"
+
+/*
+ * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
+ * the GPU itself that does context-switching, but it needs a special
+ * microcode to do it. And it's the driver's task to supply this microcode,
+ * further known as ctxprog, as well as the initial context values, known
+ * as ctxvals.
+ *
+ * Without ctxprog, you cannot switch contexts. Not even in software, since
+ * the majority of context [xfer strands] isn't accessible directly. You're
+ * stuck with a single channel, and you also suffer all the problems resulting
+ * from missing ctxvals, since you cannot load them.
+ *
+ * Without ctxvals, you're stuck with PGRAPH's default context. It's enough to
+ * run 2d operations, but trying to utilise 3d or CUDA will just lock you up,
+ * since you don't have... some sort of needed setup.
+ *
+ * Nouveau will just disable acceleration if not given ctxprog + ctxvals, since
+ * it's too much hassle to handle no-ctxprog as a special case.
+ */
+
+/*
+ * How ctxprogs work.
+ *
+ * The ctxprog is written in its own kind of microcode, with very small and
+ * crappy set of available commands. You upload it to a small [512 insns]
+ * area of memory on PGRAPH, and it'll be run when PFIFO wants PGRAPH to
+ * switch channel. or when the driver explicitely requests it. Stuff visible
+ * to ctxprog consists of: PGRAPH MMIO registers, PGRAPH context strands,
+ * the per-channel context save area in VRAM [known as ctxvals or grctx],
+ * 4 flags registers, a scratch register, two grctx pointers, plus many
+ * random poorly-understood details.
+ *
+ * When ctxprog runs, it's supposed to check what operations are asked of it,
+ * save old context if requested, optionally reset PGRAPH and switch to the
+ * new channel, and load the new context. Context consists of three major
+ * parts: subset of MMIO registers and two "xfer areas".
+ */
+
+/* TODO:
+ *  - document unimplemented bits compared to nvidia
+ *  - NVAx: make a TP subroutine, use it.
+ *  - use 0x4008fc instead of 0x1540?
+ */
+
+enum cp_label {
+	cp_check_load = 1,
+	cp_setup_auto_load,
+	cp_setup_load,
+	cp_setup_save,
+	cp_swap_state,
+	cp_prepare_exit,
+	cp_exit,
+};
+
+static void nv50_graph_construct_mmio(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer1(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer2(struct nouveau_grctx *ctx);
+
+/* Main function: construct the ctxprog skeleton, call the other functions. */
+
+int
+nv50_grctx_init(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+
+	switch (dev_priv->chipset) {
+	case 0x50:
+	case 0x84:
+	case 0x86:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+	case 0x98:
+	case 0xa0:
+	case 0xa5:
+	case 0xa8:
+	case 0xaa:
+	case 0xac:
+		break;
+	default:
+		NV_ERROR(ctx->dev, "I don't know how to make a ctxprog for "
+				   "your NV%x card.\n", dev_priv->chipset);
+		NV_ERROR(ctx->dev, "Disabling acceleration. Please contact "
+				   "the devs.\n");
+		return -ENOSYS;
+	}
+	/* decide whether we're loading/unloading the context */
+	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
+	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
+
+	cp_name(ctx, cp_check_load);
+	cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load);
+	cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load);
+	cp_bra (ctx, ALWAYS, TRUE, cp_exit);
+
+	/* setup for context load */
+	cp_name(ctx, cp_setup_auto_load);
+	cp_out (ctx, CP_DISABLE1);
+	cp_out (ctx, CP_DISABLE2);
+	cp_out (ctx, CP_ENABLE);
+	cp_out (ctx, CP_NEXT_TO_SWAP);
+	cp_set (ctx, UNK01, SET);
+	cp_name(ctx, cp_setup_load);
+	cp_out (ctx, CP_NEWCTX);
+	cp_wait(ctx, NEWCTX, BUSY);
+	cp_set (ctx, UNK1D, CLEAR);
+	cp_set (ctx, SWAP_DIRECTION, LOAD);
+	cp_bra (ctx, UNK0B, SET, cp_prepare_exit);
+	cp_bra (ctx, ALWAYS, TRUE, cp_swap_state);
+
+	/* setup for context save */
+	cp_name(ctx, cp_setup_save);
+	cp_set (ctx, UNK1D, SET);
+	cp_wait(ctx, STATUS, BUSY);
+	cp_set (ctx, UNK01, SET);
+	cp_set (ctx, SWAP_DIRECTION, SAVE);
+
+	/* general PGRAPH state */
+	cp_name(ctx, cp_swap_state);
+	cp_set (ctx, UNK03, SET);
+	cp_pos (ctx, 0x00004/4);
+	cp_ctx (ctx, 0x400828, 1); /* needed. otherwise, flickering happens. */
+	cp_pos (ctx, 0x00100/4);
+	nv50_graph_construct_mmio(ctx);
+	nv50_graph_construct_xfer1(ctx);
+	nv50_graph_construct_xfer2(ctx);
+
+	cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_check_load);
+
+	cp_set (ctx, UNK20, SET);
+	cp_set (ctx, SWAP_DIRECTION, SAVE); /* no idea why this is needed, but fixes at least one lockup. */
+	cp_lsr (ctx, ctx->ctxvals_base);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, 4);
+	cp_out (ctx, CP_SEEK_1);
+	cp_out (ctx, CP_XFER_1);
+	cp_wait(ctx, XFER, BUSY);
+
+	/* pre-exit state updates */
+	cp_name(ctx, cp_prepare_exit);
+	cp_set (ctx, UNK01, CLEAR);
+	cp_set (ctx, UNK03, CLEAR);
+	cp_set (ctx, UNK1D, CLEAR);
+
+	cp_bra (ctx, USER_SAVE, PENDING, cp_exit);
+	cp_out (ctx, CP_NEXT_TO_CURRENT);
+
+	cp_name(ctx, cp_exit);
+	cp_set (ctx, USER_SAVE, NOT_PENDING);
+	cp_set (ctx, USER_LOAD, NOT_PENDING);
+	cp_out (ctx, CP_END);
+	ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */
+
+	return 0;
+}
+
+/*
+ * Constructs MMIO part of ctxprog and ctxvals. Just a matter of knowing which
+ * registers to save/restore and the default values for them.
+ */
+
+static void
+nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i, j;
+	int offset, base;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+
+	/* 0800 */
+	cp_ctx(ctx, 0x400808, 7);
+	gr_def(ctx, 0x400814, 0x00000030);
+	cp_ctx(ctx, 0x400834, 0x32);
+	if (dev_priv->chipset == 0x50) {
+		gr_def(ctx, 0x400834, 0xff400040);
+		gr_def(ctx, 0x400838, 0xfff00080);
+		gr_def(ctx, 0x40083c, 0xfff70090);
+		gr_def(ctx, 0x400840, 0xffe806a8);
+	}
+	gr_def(ctx, 0x400844, 0x00000002);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		gr_def(ctx, 0x400894, 0x00001000);
+	gr_def(ctx, 0x4008e8, 0x00000003);
+	gr_def(ctx, 0x4008ec, 0x00001000);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x400908, 0xb);
+	else if (dev_priv->chipset < 0xa0)
+		cp_ctx(ctx, 0x400908, 0xc);
+	else
+		cp_ctx(ctx, 0x400908, 0xe);
+
+	if (dev_priv->chipset >= 0xa0)
+		cp_ctx(ctx, 0x400b00, 0x1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		cp_ctx(ctx, 0x400b10, 0x1);
+		gr_def(ctx, 0x400b10, 0x0001629d);
+		cp_ctx(ctx, 0x400b20, 0x1);
+		gr_def(ctx, 0x400b20, 0x0001629d);
+	}
+
+	/* 0C00 */
+	cp_ctx(ctx, 0x400c08, 0x2);
+	gr_def(ctx, 0x400c08, 0x0000fe0c);
+
+	/* 1000 */
+	if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x401008, 0x4);
+		gr_def(ctx, 0x401014, 0x00001000);
+	} else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa) {
+		cp_ctx(ctx, 0x401008, 0x5);
+		gr_def(ctx, 0x401018, 0x00001000);
+	} else {
+		cp_ctx(ctx, 0x401008, 0x5);
+		gr_def(ctx, 0x401018, 0x00004000);
+	}
+
+	/* 1400 */
+	cp_ctx(ctx, 0x401400, 0x8);
+	cp_ctx(ctx, 0x401424, 0x3);
+	if (dev_priv->chipset == 0x50)
+		gr_def(ctx, 0x40142c, 0x0001fd87);
+	else
+		gr_def(ctx, 0x40142c, 0x00000187);
+	cp_ctx(ctx, 0x401540, 0x5);
+	gr_def(ctx, 0x401550, 0x00001018);
+
+	/* 1800 */
+	cp_ctx(ctx, 0x401814, 0x1);
+	gr_def(ctx, 0x401814, 0x000000ff);
+	if (dev_priv->chipset == 0x50) {
+		cp_ctx(ctx, 0x40181c, 0xe);
+		gr_def(ctx, 0x401850, 0x00000004);
+	} else if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x40181c, 0xf);
+		gr_def(ctx, 0x401854, 0x00000004);
+	} else {
+		cp_ctx(ctx, 0x40181c, 0x13);
+		gr_def(ctx, 0x401864, 0x00000004);
+	}
+
+	/* 1C00 */
+	cp_ctx(ctx, 0x401c00, 0x1);
+	switch (dev_priv->chipset) {
+	case 0x50:
+		gr_def(ctx, 0x401c00, 0x0001005f);
+		break;
+	case 0x84:
+	case 0x86:
+	case 0x94:
+		gr_def(ctx, 0x401c00, 0x044d00df);
+		break;
+	case 0x92:
+	case 0x96:
+	case 0x98:
+	case 0xa0:
+	case 0xaa:
+	case 0xac:
+		gr_def(ctx, 0x401c00, 0x042500df);
+		break;
+	case 0xa5:
+	case 0xa8:
+		gr_def(ctx, 0x401c00, 0x142500df);
+		break;
+	}
+
+	/* 2400 */
+	cp_ctx(ctx, 0x402400, 0x1);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x402408, 0x1);
+	else
+		cp_ctx(ctx, 0x402408, 0x2);
+	gr_def(ctx, 0x402408, 0x00000600);
+
+	/* 2800 */
+	cp_ctx(ctx, 0x402800, 0x1);
+	if (dev_priv->chipset == 0x50)
+		gr_def(ctx, 0x402800, 0x00000006);
+
+	/* 2C00 */
+	cp_ctx(ctx, 0x402c08, 0x6);
+	if (dev_priv->chipset != 0x50)
+		gr_def(ctx, 0x402c14, 0x01000000);
+	gr_def(ctx, 0x402c18, 0x000000ff);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x402ca0, 0x1);
+	else
+		cp_ctx(ctx, 0x402ca0, 0x2);
+	if (dev_priv->chipset < 0xa0)
+		gr_def(ctx, 0x402ca0, 0x00000400);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		gr_def(ctx, 0x402ca0, 0x00000800);
+	else
+		gr_def(ctx, 0x402ca0, 0x00000400);
+	cp_ctx(ctx, 0x402cac, 0x4);
+
+	/* 3000 */
+	cp_ctx(ctx, 0x403004, 0x1);
+	gr_def(ctx, 0x403004, 0x00000001);
+
+	/* 3404 */
+	if (dev_priv->chipset >= 0xa0) {
+		cp_ctx(ctx, 0x403404, 0x1);
+		gr_def(ctx, 0x403404, 0x00000001);
+	}
+
+	/* 5000 */
+	cp_ctx(ctx, 0x405000, 0x1);
+	switch (dev_priv->chipset) {
+	case 0x50:
+		gr_def(ctx, 0x405000, 0x00300080);
+		break;
+	case 0x84:
+	case 0xa0:
+	case 0xa5:
+	case 0xa8:
+	case 0xaa:
+	case 0xac:
+		gr_def(ctx, 0x405000, 0x000e0080);
+		break;
+	case 0x86:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+	case 0x98:
+		gr_def(ctx, 0x405000, 0x00000080);
+		break;
+	}
+	cp_ctx(ctx, 0x405014, 0x1);
+	gr_def(ctx, 0x405014, 0x00000004);
+	cp_ctx(ctx, 0x40501c, 0x1);
+	cp_ctx(ctx, 0x405024, 0x1);
+	cp_ctx(ctx, 0x40502c, 0x1);
+
+	/* 5400 or maybe 4800 */
+	if (dev_priv->chipset == 0x50) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xea);
+	} else if (dev_priv->chipset < 0x94) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xcb);
+	} else if (dev_priv->chipset < 0xa0) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xcc);
+	} else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		offset = 0x404800;
+		cp_ctx(ctx, 0x404800, 0xda);
+	} else {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xd4);
+	}
+	gr_def(ctx, offset + 0x0c, 0x00000002);
+	gr_def(ctx, offset + 0x10, 0x00000001);
+	if (dev_priv->chipset >= 0x94)
+		offset += 4;
+	gr_def(ctx, offset + 0x1c, 0x00000001);
+	gr_def(ctx, offset + 0x20, 0x00000100);
+	gr_def(ctx, offset + 0x38, 0x00000002);
+	gr_def(ctx, offset + 0x3c, 0x00000001);
+	gr_def(ctx, offset + 0x40, 0x00000001);
+	gr_def(ctx, offset + 0x50, 0x00000001);
+	gr_def(ctx, offset + 0x54, 0x003fffff);
+	gr_def(ctx, offset + 0x58, 0x00001fff);
+	gr_def(ctx, offset + 0x60, 0x00000001);
+	gr_def(ctx, offset + 0x64, 0x00000001);
+	gr_def(ctx, offset + 0x6c, 0x00000001);
+	gr_def(ctx, offset + 0x70, 0x00000001);
+	gr_def(ctx, offset + 0x74, 0x00000001);
+	gr_def(ctx, offset + 0x78, 0x00000004);
+	gr_def(ctx, offset + 0x7c, 0x00000001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x80, 0x00000001);
+	gr_def(ctx, offset + 0x84, 0x00000001);
+	gr_def(ctx, offset + 0x88, 0x00000007);
+	gr_def(ctx, offset + 0x8c, 0x00000001);
+	gr_def(ctx, offset + 0x90, 0x00000007);
+	gr_def(ctx, offset + 0x94, 0x00000001);
+	gr_def(ctx, offset + 0x98, 0x00000001);
+	gr_def(ctx, offset + 0x9c, 0x00000001);
+	if (dev_priv->chipset == 0x50) {
+		 gr_def(ctx, offset + 0xb0, 0x00000001);
+		 gr_def(ctx, offset + 0xb4, 0x00000001);
+		 gr_def(ctx, offset + 0xbc, 0x00000001);
+		 gr_def(ctx, offset + 0xc0, 0x0000000a);
+		 gr_def(ctx, offset + 0xd0, 0x00000040);
+		 gr_def(ctx, offset + 0xd8, 0x00000002);
+		 gr_def(ctx, offset + 0xdc, 0x00000100);
+		 gr_def(ctx, offset + 0xe0, 0x00000001);
+		 gr_def(ctx, offset + 0xe4, 0x00000100);
+		 gr_def(ctx, offset + 0x100, 0x00000001);
+		 gr_def(ctx, offset + 0x124, 0x00000004);
+		 gr_def(ctx, offset + 0x13c, 0x00000001);
+		 gr_def(ctx, offset + 0x140, 0x00000100);
+		 gr_def(ctx, offset + 0x148, 0x00000001);
+		 gr_def(ctx, offset + 0x154, 0x00000100);
+		 gr_def(ctx, offset + 0x158, 0x00000001);
+		 gr_def(ctx, offset + 0x15c, 0x00000100);
+		 gr_def(ctx, offset + 0x164, 0x00000001);
+		 gr_def(ctx, offset + 0x170, 0x00000100);
+		 gr_def(ctx, offset + 0x174, 0x00000001);
+		 gr_def(ctx, offset + 0x17c, 0x00000001);
+		 gr_def(ctx, offset + 0x188, 0x00000002);
+		 gr_def(ctx, offset + 0x190, 0x00000001);
+		 gr_def(ctx, offset + 0x198, 0x00000001);
+		 gr_def(ctx, offset + 0x1ac, 0x00000003);
+		 offset += 0xd0;
+	} else {
+		gr_def(ctx, offset + 0xb0, 0x00000001);
+		gr_def(ctx, offset + 0xb4, 0x00000100);
+		gr_def(ctx, offset + 0xbc, 0x00000001);
+		gr_def(ctx, offset + 0xc8, 0x00000100);
+		gr_def(ctx, offset + 0xcc, 0x00000001);
+		gr_def(ctx, offset + 0xd0, 0x00000100);
+		gr_def(ctx, offset + 0xd8, 0x00000001);
+		gr_def(ctx, offset + 0xe4, 0x00000100);
+	}
+	gr_def(ctx, offset + 0xf8, 0x00000004);
+	gr_def(ctx, offset + 0xfc, 0x00000070);
+	gr_def(ctx, offset + 0x100, 0x00000080);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x114, 0x0000000c);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x11c, 0x00000008);
+	gr_def(ctx, offset + 0x120, 0x00000014);
+	if (dev_priv->chipset == 0x50) {
+		gr_def(ctx, offset + 0x124, 0x00000026);
+		offset -= 0x18;
+	} else {
+		gr_def(ctx, offset + 0x128, 0x00000029);
+		gr_def(ctx, offset + 0x12c, 0x00000027);
+		gr_def(ctx, offset + 0x130, 0x00000026);
+		gr_def(ctx, offset + 0x134, 0x00000008);
+		gr_def(ctx, offset + 0x138, 0x00000004);
+		gr_def(ctx, offset + 0x13c, 0x00000027);
+	}
+	gr_def(ctx, offset + 0x148, 0x00000001);
+	gr_def(ctx, offset + 0x14c, 0x00000002);
+	gr_def(ctx, offset + 0x150, 0x00000003);
+	gr_def(ctx, offset + 0x154, 0x00000004);
+	gr_def(ctx, offset + 0x158, 0x00000005);
+	gr_def(ctx, offset + 0x15c, 0x00000006);
+	gr_def(ctx, offset + 0x160, 0x00000007);
+	gr_def(ctx, offset + 0x164, 0x00000001);
+	gr_def(ctx, offset + 0x1a8, 0x000000cf);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x1d8, 0x00000080);
+	gr_def(ctx, offset + 0x1dc, 0x00000004);
+	gr_def(ctx, offset + 0x1e0, 0x00000004);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	else
+		gr_def(ctx, offset + 0x1e4, 0x00000003);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		gr_def(ctx, offset + 0x1ec, 0x00000003);
+		offset += 8;
+	}
+	gr_def(ctx, offset + 0x1e8, 0x00000001);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x1f4, 0x00000012);
+	gr_def(ctx, offset + 0x1f8, 0x00000010);
+	gr_def(ctx, offset + 0x1fc, 0x0000000c);
+	gr_def(ctx, offset + 0x200, 0x00000001);
+	gr_def(ctx, offset + 0x210, 0x00000004);
+	gr_def(ctx, offset + 0x214, 0x00000002);
+	gr_def(ctx, offset + 0x218, 0x00000004);
+	if (dev_priv->chipset >= 0xa0)
+		offset += 4;
+	gr_def(ctx, offset + 0x224, 0x003fffff);
+	gr_def(ctx, offset + 0x228, 0x00001fff);
+	if (dev_priv->chipset == 0x50)
+		offset -= 0x20;
+	else if (dev_priv->chipset >= 0xa0) {
+		gr_def(ctx, offset + 0x250, 0x00000001);
+		gr_def(ctx, offset + 0x254, 0x00000001);
+		gr_def(ctx, offset + 0x258, 0x00000002);
+		offset += 0x10;
+	}
+	gr_def(ctx, offset + 0x250, 0x00000004);
+	gr_def(ctx, offset + 0x254, 0x00000014);
+	gr_def(ctx, offset + 0x258, 0x00000001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x264, 0x00000002);
+	if (dev_priv->chipset >= 0xa0)
+		offset += 8;
+	gr_def(ctx, offset + 0x270, 0x00000001);
+	gr_def(ctx, offset + 0x278, 0x00000002);
+	gr_def(ctx, offset + 0x27c, 0x00001000);
+	if (dev_priv->chipset == 0x50)
+		offset -= 0xc;
+	else {
+		gr_def(ctx, offset + 0x280, 0x00000e00);
+		gr_def(ctx, offset + 0x284, 0x00001000);
+		gr_def(ctx, offset + 0x288, 0x00001e00);
+	}
+	gr_def(ctx, offset + 0x290, 0x00000001);
+	gr_def(ctx, offset + 0x294, 0x00000001);
+	gr_def(ctx, offset + 0x298, 0x00000001);
+	gr_def(ctx, offset + 0x29c, 0x00000001);
+	gr_def(ctx, offset + 0x2a0, 0x00000001);
+	gr_def(ctx, offset + 0x2b0, 0x00000200);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		gr_def(ctx, offset + 0x2b4, 0x00000200);
+		offset += 4;
+	}
+	if (dev_priv->chipset < 0xa0) {
+		gr_def(ctx, offset + 0x2b8, 0x00000001);
+		gr_def(ctx, offset + 0x2bc, 0x00000070);
+		gr_def(ctx, offset + 0x2c0, 0x00000080);
+		gr_def(ctx, offset + 0x2cc, 0x00000001);
+		gr_def(ctx, offset + 0x2d0, 0x00000070);
+		gr_def(ctx, offset + 0x2d4, 0x00000080);
+	} else {
+		gr_def(ctx, offset + 0x2b8, 0x00000001);
+		gr_def(ctx, offset + 0x2bc, 0x000000f0);
+		gr_def(ctx, offset + 0x2c0, 0x000000ff);
+		gr_def(ctx, offset + 0x2cc, 0x00000001);
+		gr_def(ctx, offset + 0x2d0, 0x000000f0);
+		gr_def(ctx, offset + 0x2d4, 0x000000ff);
+		gr_def(ctx, offset + 0x2dc, 0x00000009);
+		offset += 4;
+	}
+	gr_def(ctx, offset + 0x2e4, 0x00000001);
+	gr_def(ctx, offset + 0x2e8, 0x000000cf);
+	gr_def(ctx, offset + 0x2f0, 0x00000001);
+	gr_def(ctx, offset + 0x300, 0x000000cf);
+	gr_def(ctx, offset + 0x308, 0x00000002);
+	gr_def(ctx, offset + 0x310, 0x00000001);
+	gr_def(ctx, offset + 0x318, 0x00000001);
+	gr_def(ctx, offset + 0x320, 0x000000cf);
+	gr_def(ctx, offset + 0x324, 0x000000cf);
+	gr_def(ctx, offset + 0x328, 0x00000001);
+
+	/* 6000? */
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x4063e0, 0x1);
+
+	/* 6800 */
+	if (dev_priv->chipset < 0x90) {
+		cp_ctx(ctx, 0x406814, 0x2b);
+		gr_def(ctx, 0x406818, 0x00000f80);
+		gr_def(ctx, 0x406860, 0x007f0080);
+		gr_def(ctx, 0x40689c, 0x007f0080);
+	} else {
+		cp_ctx(ctx, 0x406814, 0x4);
+		if (dev_priv->chipset == 0x98)
+			gr_def(ctx, 0x406818, 0x00000f80);
+		else
+			gr_def(ctx, 0x406818, 0x00001f80);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			gr_def(ctx, 0x40681c, 0x00000030);
+		cp_ctx(ctx, 0x406830, 0x3);
+	}
+
+	/* 7000: per-ROP group state */
+	for (i = 0; i < 8; i++) {
+		if (units & (1<<(i+16))) {
+			cp_ctx(ctx, 0x407000 + (i<<8), 3);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, 0x407000 + (i<<8), 0x1b74f820);
+			else if (dev_priv->chipset != 0xa5)
+				gr_def(ctx, 0x407000 + (i<<8), 0x3b74f821);
+			else
+				gr_def(ctx, 0x407000 + (i<<8), 0x7b74f821);
+			gr_def(ctx, 0x407004 + (i<<8), 0x89058001);
+
+			if (dev_priv->chipset == 0x50) {
+				cp_ctx(ctx, 0x407010 + (i<<8), 1);
+			} else if (dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, 0x407010 + (i<<8), 2);
+				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
+				gr_def(ctx, 0x407014 + (i<<8), 0x0000001f);
+			} else {
+				cp_ctx(ctx, 0x407010 + (i<<8), 3);
+				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
+				if (dev_priv->chipset != 0xa5)
+					gr_def(ctx, 0x407014 + (i<<8), 0x000000ff);
+				else
+					gr_def(ctx, 0x407014 + (i<<8), 0x000001ff);
+			}
+
+			cp_ctx(ctx, 0x407080 + (i<<8), 4);
+			if (dev_priv->chipset != 0xa5)
+				gr_def(ctx, 0x407080 + (i<<8), 0x027c10fa);
+			else
+				gr_def(ctx, 0x407080 + (i<<8), 0x827c10fa);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, 0x407084 + (i<<8), 0x000000c0);
+			else
+				gr_def(ctx, 0x407084 + (i<<8), 0x400000c0);
+			gr_def(ctx, 0x407088 + (i<<8), 0xb7892080);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, 0x407094 + (i<<8), 1);
+			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+				cp_ctx(ctx, 0x407094 + (i<<8), 3);
+			else {
+				cp_ctx(ctx, 0x407094 + (i<<8), 4);
+				gr_def(ctx, 0x4070a0 + (i<<8), 1);
+			}
+		}
+	}
+
+	cp_ctx(ctx, 0x407c00, 0x3);
+	if (dev_priv->chipset < 0x90)
+		gr_def(ctx, 0x407c00, 0x00010040);
+	else if (dev_priv->chipset < 0xa0)
+		gr_def(ctx, 0x407c00, 0x00390040);
+	else
+		gr_def(ctx, 0x407c00, 0x003d0040);
+	gr_def(ctx, 0x407c08, 0x00000022);
+	if (dev_priv->chipset >= 0xa0) {
+		cp_ctx(ctx, 0x407c10, 0x3);
+		cp_ctx(ctx, 0x407c20, 0x1);
+		cp_ctx(ctx, 0x407c2c, 0x1);
+	}
+
+	if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x407d00, 0x9);
+	} else {
+		cp_ctx(ctx, 0x407d00, 0x15);
+	}
+	if (dev_priv->chipset == 0x98)
+		gr_def(ctx, 0x407d08, 0x00380040);
+	else {
+		if (dev_priv->chipset < 0x90)
+			gr_def(ctx, 0x407d08, 0x00010040);
+		else if (dev_priv->chipset < 0xa0)
+			gr_def(ctx, 0x407d08, 0x00390040);
+		else
+			gr_def(ctx, 0x407d08, 0x003d0040);
+		gr_def(ctx, 0x407d0c, 0x00000022);
+	}
+
+	/* 8000+: per-TP state */
+	for (i = 0; i < 10; i++) {
+		if (units & (1<<i)) {
+			if (dev_priv->chipset < 0xa0)
+				base = 0x408000 + (i<<12);
+			else
+				base = 0x408000 + (i<<11);
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xc00;
+			else
+				offset = base + 0x80;
+			cp_ctx(ctx, offset + 0x00, 1);
+			gr_def(ctx, offset + 0x00, 0x0000ff0a);
+			cp_ctx(ctx, offset + 0x08, 1);
+
+			/* per-MP state */
+			for (j = 0; j < (dev_priv->chipset < 0xa0 ? 2 : 4); j++) {
+				if (!(units & (1 << (j+24)))) continue;
+				if (dev_priv->chipset < 0xa0)
+					offset = base + 0x200 + (j<<7);
+				else
+					offset = base + 0x100 + (j<<7);
+				cp_ctx(ctx, offset, 0x20);
+				gr_def(ctx, offset + 0x00, 0x01800000);
+				gr_def(ctx, offset + 0x04, 0x00160000);
+				gr_def(ctx, offset + 0x08, 0x01800000);
+				gr_def(ctx, offset + 0x18, 0x0003ffff);
+				switch (dev_priv->chipset) {
+				case 0x50:
+					gr_def(ctx, offset + 0x1c, 0x00080000);
+					break;
+				case 0x84:
+					gr_def(ctx, offset + 0x1c, 0x00880000);
+					break;
+				case 0x86:
+					gr_def(ctx, offset + 0x1c, 0x008c0000);
+					break;
+				case 0x92:
+				case 0x96:
+				case 0x98:
+					gr_def(ctx, offset + 0x1c, 0x118c0000);
+					break;
+				case 0x94:
+					gr_def(ctx, offset + 0x1c, 0x10880000);
+					break;
+				case 0xa0:
+				case 0xa5:
+					gr_def(ctx, offset + 0x1c, 0x310c0000);
+					break;
+				case 0xa8:
+				case 0xaa:
+				case 0xac:
+					gr_def(ctx, offset + 0x1c, 0x300c0000);
+					break;
+				}
+				gr_def(ctx, offset + 0x40, 0x00010401);
+				if (dev_priv->chipset == 0x50)
+					gr_def(ctx, offset + 0x48, 0x00000040);
+				else
+					gr_def(ctx, offset + 0x48, 0x00000078);
+				gr_def(ctx, offset + 0x50, 0x000000bf);
+				gr_def(ctx, offset + 0x58, 0x00001210);
+				if (dev_priv->chipset == 0x50)
+					gr_def(ctx, offset + 0x5c, 0x00000080);
+				else
+					gr_def(ctx, offset + 0x5c, 0x08000080);
+				if (dev_priv->chipset >= 0xa0)
+					gr_def(ctx, offset + 0x68, 0x0000003e);
+			}
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x300, 0x4);
+			else
+				cp_ctx(ctx, base + 0x300, 0x5);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, base + 0x304, 0x00007070);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, base + 0x304, 0x00027070);
+			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+				gr_def(ctx, base + 0x304, 0x01127070);
+			else
+				gr_def(ctx, base + 0x304, 0x05127070);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x318, 1);
+			else
+				cp_ctx(ctx, base + 0x320, 1);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, base + 0x318, 0x0003ffff);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, base + 0x318, 0x03ffffff);
+			else
+				gr_def(ctx, base + 0x320, 0x07ffffff);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x324, 5);
+			else
+				cp_ctx(ctx, base + 0x328, 4);
+
+			if (dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, base + 0x340, 9);
+				offset = base + 0x340;
+			} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+				cp_ctx(ctx, base + 0x33c, 0xb);
+				offset = base + 0x344;
+			} else {
+				cp_ctx(ctx, base + 0x33c, 0xd);
+				offset = base + 0x344;
+			}
+			gr_def(ctx, offset + 0x0, 0x00120407);
+			gr_def(ctx, offset + 0x4, 0x05091507);
+			if (dev_priv->chipset == 0x84)
+				gr_def(ctx, offset + 0x8, 0x05100202);
+			else
+				gr_def(ctx, offset + 0x8, 0x05010202);
+			gr_def(ctx, offset + 0xc, 0x00030201);
+
+			cp_ctx(ctx, base + 0x400, 2);
+			gr_def(ctx, base + 0x404, 0x00000040);
+			cp_ctx(ctx, base + 0x40c, 2);
+			gr_def(ctx, base + 0x40c, 0x0d0c0b0a);
+			gr_def(ctx, base + 0x410, 0x00141210);
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0x800;
+			else
+				offset = base + 0x500;
+			cp_ctx(ctx, offset, 6);
+			gr_def(ctx, offset + 0x0, 0x000001f0);
+			gr_def(ctx, offset + 0x4, 0x00000001);
+			gr_def(ctx, offset + 0x8, 0x00000003);
+			if (dev_priv->chipset == 0x50 || dev_priv->chipset >= 0xaa)
+				gr_def(ctx, offset + 0xc, 0x00008000);
+			gr_def(ctx, offset + 0x14, 0x00039e00);
+			cp_ctx(ctx, offset + 0x1c, 2);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, offset + 0x1c, 0x00000040);
+			else
+				gr_def(ctx, offset + 0x1c, 0x00000100);
+			gr_def(ctx, offset + 0x20, 0x00003800);
+
+			if (dev_priv->chipset >= 0xa0) {
+				cp_ctx(ctx, base + 0x54c, 2);
+				if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+					gr_def(ctx, base + 0x54c, 0x003fe006);
+				else
+					gr_def(ctx, base + 0x54c, 0x003fe007);
+				gr_def(ctx, base + 0x550, 0x003fe000);
+			}
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xa00;
+			else
+				offset = base + 0x680;
+			cp_ctx(ctx, offset, 1);
+			gr_def(ctx, offset, 0x00404040);
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xe00;
+			else
+				offset = base + 0x700;
+			cp_ctx(ctx, offset, 2);
+			if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, offset, 0x0077f005);
+			else if (dev_priv->chipset == 0xa5)
+				gr_def(ctx, offset, 0x6cf7f007);
+			else if (dev_priv->chipset == 0xa8)
+				gr_def(ctx, offset, 0x6cfff007);
+			else if (dev_priv->chipset == 0xac)
+				gr_def(ctx, offset, 0x0cfff007);
+			else
+				gr_def(ctx, offset, 0x0cf7f007);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, offset + 0x4, 0x00007fff);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, offset + 0x4, 0x003f7fff);
+			else
+				gr_def(ctx, offset + 0x4, 0x02bf7fff);
+			cp_ctx(ctx, offset + 0x2c, 1);
+			if (dev_priv->chipset == 0x50) {
+				cp_ctx(ctx, offset + 0x50, 9);
+				gr_def(ctx, offset + 0x54, 0x000003ff);
+				gr_def(ctx, offset + 0x58, 0x00000003);
+				gr_def(ctx, offset + 0x5c, 0x00000003);
+				gr_def(ctx, offset + 0x60, 0x000001ff);
+				gr_def(ctx, offset + 0x64, 0x0000001f);
+				gr_def(ctx, offset + 0x68, 0x0000000f);
+				gr_def(ctx, offset + 0x6c, 0x0000000f);
+			} else if(dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, offset + 0x50, 1);
+				cp_ctx(ctx, offset + 0x70, 1);
+			} else {
+				cp_ctx(ctx, offset + 0x50, 1);
+				cp_ctx(ctx, offset + 0x60, 5);
+			}
+		}
+	}
+}
+
+/*
+ * xfer areas. These are a pain.
+ *
+ * There are 2 xfer areas: the first one is big and contains all sorts of
+ * stuff, the second is small and contains some per-TP context.
+ *
+ * Each area is split into 8 "strands". The areas, when saved to grctx,
+ * are made of 8-word blocks. Each block contains a single word from
+ * each strand. The strands are independent of each other, their
+ * addresses are unrelated to each other, and data in them is closely
+ * packed together. The strand layout varies a bit between cards: here
+ * and there, a single word is thrown out in the middle and the whole
+ * strand is offset by a bit from corresponding one on another chipset.
+ * For this reason, addresses of stuff in strands are almost useless.
+ * Knowing sequence of stuff and size of gaps between them is much more
+ * useful, and that's how we build the strands in our generator.
+ *
+ * NVA0 takes this mess to a whole new level by cutting the old strands
+ * into a few dozen pieces [known as genes], rearranging them randomly,
+ * and putting them back together to make new strands. Hopefully these
+ * genes correspond more or less directly to the same PGRAPH subunits
+ * as in 400040 register.
+ *
+ * The most common value in default context is 0, and when the genes
+ * are separated by 0's, gene bounduaries are quite speculative...
+ * some of them can be clearly deduced, others can be guessed, and yet
+ * others won't be resolved without figuring out the real meaning of
+ * given ctxval. For the same reason, ending point of each strand
+ * is unknown. Except for strand 0, which is the longest strand and
+ * its end corresponds to end of the whole xfer.
+ *
+ * An unsolved mystery is the seek instruction: it takes an argument
+ * in bits 8-18, and that argument is clearly the place in strands to
+ * seek to... but the offsets don't seem to correspond to offsets as
+ * seen in grctx. Perhaps there's another, real, not randomly-changing
+ * addressing in strands, and the xfer insn just happens to skip over
+ * the unused bits? NV10-NV30 PIPE comes to mind...
+ *
+ * As far as I know, there's no way to access the xfer areas directly
+ * without the help of ctxprog.
+ */
+
+static inline void
+xf_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
+	int i;
+	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
+		for (i = 0; i < num; i++)
+			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
+	ctx->ctxvals_pos += num << 3;
+}
+
+/* Gene declarations... */
+
+static void nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx);
+
+static void
+nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+	int offset;
+	int size = 0;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+
+	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	ctx->ctxvals_base = offset;
+
+	if (dev_priv->chipset < 0xa0) {
+		/* Strand 0 */
+		ctx->ctxvals_pos = offset;
+		switch (dev_priv->chipset) {
+		case 0x50:
+			xf_emit(ctx, 0x99, 0);
+			break;
+		case 0x84:
+		case 0x86:
+			xf_emit(ctx, 0x384, 0);
+			break;
+		case 0x92:
+		case 0x94:
+		case 0x96:
+		case 0x98:
+			xf_emit(ctx, 0x380, 0);
+			break;
+		}
+		nv50_graph_construct_gene_m2mf (ctx);
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x84:
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0x4c4, 0);
+			break;
+		case 0x92:
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0x984, 0);
+			break;
+		}
+		nv50_graph_construct_gene_unk5(ctx);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 0xa, 0);
+		else
+			xf_emit(ctx, 0xb, 0);
+		nv50_graph_construct_gene_unk4(ctx);
+		nv50_graph_construct_gene_unk3(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 1 */
+		ctx->ctxvals_pos = offset + 0x1;
+		nv50_graph_construct_gene_unk6(ctx);
+		nv50_graph_construct_gene_unk7(ctx);
+		nv50_graph_construct_gene_unk8(ctx);
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x92:
+			xf_emit(ctx, 0xfb, 0);
+			break;
+		case 0x84:
+			xf_emit(ctx, 0xd3, 0);
+			break;
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0xab, 0);
+			break;
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0x6b, 0);
+			break;
+		}
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 4, 0);
+		xf_emit(ctx, 1, 0x0fac6881);
+		xf_emit(ctx, 0xb, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 2 */
+		ctx->ctxvals_pos = offset + 0x2;
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x92:
+			xf_emit(ctx, 0xa80, 0);
+			break;
+		case 0x84:
+			xf_emit(ctx, 0xa7e, 0);
+			break;
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0xa7c, 0);
+			break;
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0xa7a, 0);
+			break;
+		}
+		xf_emit(ctx, 1, 0x3fffff);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1fff);
+		xf_emit(ctx, 0xe, 0);
+		nv50_graph_construct_gene_unk9(ctx);
+		nv50_graph_construct_gene_unk2(ctx);
+		nv50_graph_construct_gene_unk1(ctx);
+		nv50_graph_construct_gene_unk10(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 3: per-ROP group state */
+		ctx->ctxvals_pos = offset + 3;
+		for (i = 0; i < 6; i++)
+			if (units & (1 << (i + 16)))
+				nv50_graph_construct_gene_ropc(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strands 4-7: per-TP state */
+		for (i = 0; i < 4; i++) {
+			ctx->ctxvals_pos = offset + 4 + i;
+			if (units & (1 << (2 * i)))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << (2 * i + 1)))
+				nv50_graph_construct_xfer_tp(ctx);
+			if ((ctx->ctxvals_pos-offset)/8 > size)
+				size = (ctx->ctxvals_pos-offset)/8;
+		}
+	} else {
+		/* Strand 0 */
+		ctx->ctxvals_pos = offset;
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0x385, 0);
+		else
+			xf_emit(ctx, 0x384, 0);
+		nv50_graph_construct_gene_m2mf(ctx);
+		xf_emit(ctx, 0x950, 0);
+		nv50_graph_construct_gene_unk10(ctx);
+		xf_emit(ctx, 1, 0x0fac6881);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+			xf_emit(ctx, 1, 1);
+			xf_emit(ctx, 3, 0);
+		}
+		nv50_graph_construct_gene_unk8(ctx);
+		if (dev_priv->chipset == 0xa0)
+			xf_emit(ctx, 0x189, 0);
+		else if (dev_priv->chipset < 0xa8)
+			xf_emit(ctx, 0x99, 0);
+		else if (dev_priv->chipset == 0xaa)
+			xf_emit(ctx, 0x65, 0);
+		else
+			xf_emit(ctx, 0x6d, 0);
+		nv50_graph_construct_gene_unk9(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 1 */
+		ctx->ctxvals_pos = offset + 1;
+		nv50_graph_construct_gene_unk1(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 2 */
+		ctx->ctxvals_pos = offset + 2;
+		if (dev_priv->chipset == 0xa0) {
+			nv50_graph_construct_gene_unk2(ctx);
+		}
+		xf_emit(ctx, 0x36, 0);
+		nv50_graph_construct_gene_unk5(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 3 */
+		ctx->ctxvals_pos = offset + 3;
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 1);
+		nv50_graph_construct_gene_unk6(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 4 */
+		ctx->ctxvals_pos = offset + 4;
+		if (dev_priv->chipset == 0xa0)
+			xf_emit(ctx, 0xa80, 0);
+		else
+			xf_emit(ctx, 0xa7a, 0);
+		xf_emit(ctx, 1, 0x3fffff);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1fff);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 5 */
+		ctx->ctxvals_pos = offset + 5;
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 0x0fac6881);
+		xf_emit(ctx, 0xb, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 3, 0);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 1, 0x11);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 1, 0x11);
+		xf_emit(ctx, 1, 0);
+		for (i = 0; i < 8; i++)
+			if (units & (1<<(i+16)))
+				nv50_graph_construct_gene_ropc(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 6 */
+		ctx->ctxvals_pos = offset + 6;
+		nv50_graph_construct_gene_unk3(ctx);
+		xf_emit(ctx, 0xb, 0);
+		nv50_graph_construct_gene_unk4(ctx);
+		nv50_graph_construct_gene_unk7(ctx);
+		if (units & (1 << 0))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 1))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 2))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 3))
+			nv50_graph_construct_xfer_tp(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 7 */
+		ctx->ctxvals_pos = offset + 7;
+		if (dev_priv->chipset == 0xa0) {
+			if (units & (1 << 4))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 5))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 6))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 7))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 8))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 9))
+				nv50_graph_construct_xfer_tp(ctx);
+		} else {
+			nv50_graph_construct_gene_unk2(ctx);
+		}
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+	}
+
+	ctx->ctxvals_pos = offset + size * 8;
+	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	cp_lsr (ctx, offset);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, size);
+	cp_out (ctx, CP_SEEK_1);
+	cp_out (ctx, CP_XFER_1);
+	cp_wait(ctx, XFER, BUSY);
+}
+
+/*
+ * non-trivial demagiced parts of ctx init go here
+ */
+
+static void
+nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
+{
+	/* m2mf state */
+	xf_emit (ctx, 1, 0);		/* DMA_NOTIFY instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_IN instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_OUT instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* OFFSET_IN */
+	xf_emit (ctx, 1, 0);		/* OFFSET_OUT */
+	xf_emit (ctx, 1, 0);		/* PITCH_IN */
+	xf_emit (ctx, 1, 0);		/* PITCH_OUT */
+	xf_emit (ctx, 1, 0);		/* LINE_LENGTH */
+	xf_emit (ctx, 1, 0);		/* LINE_COUNT */
+	xf_emit (ctx, 1, 0x21);		/* FORMAT: bits 0-4 INPUT_INC, bits 5-9 OUTPUT_INC */
+	xf_emit (ctx, 1, 1);		/* LINEAR_IN */
+	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_IN: bits 0-2 y tiling, bits 3-5 z tiling */
+	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_IN */
+	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_IN */
+	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_IN */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN_Z */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN */
+	xf_emit (ctx, 1, 1);		/* LINEAR_OUT */
+	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_OUT: bits 0-2 y tiling, bits 3-5 z tiling */
+	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_OUT */
+	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_OUT */
+	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_OUT */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT_Z */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT */
+	xf_emit (ctx, 1, 0);		/* OFFSET_IN_HIGH */
+	xf_emit (ctx, 1, 0);		/* OFFSET_OUT_HIGH */
+}
+
+static void
+nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	switch (dev_priv->chipset) {
+	case 0x50:
+	case 0x86:
+	case 0x98:
+	case 0xaa:
+	case 0xac:
+		xf_emit(ctx, 0x542, 0);
+		break;
+	case 0x84:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+		xf_emit(ctx, 0x942, 0);
+		break;
+	case 0xa0:
+		xf_emit(ctx, 0x2042, 0);
+		break;
+	case 0xa5:
+	case 0xa8:
+		xf_emit(ctx, 0x842, 0);
+		break;
+	}
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x27);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 3, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx)
+{
+	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+	xf_emit(ctx, 0x10, 0x04000000);
+	xf_emit(ctx, 0x24, 0);
+	xf_emit(ctx, 2, 0x04e3bfdf);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x1fe21);
+}
+
+static void
+nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 2 on pre-NVA0, beginning of area 2 on NVA0, area 7 on >NVA0 */
+	if (dev_priv->chipset != 0x50) {
+		xf_emit(ctx, 5, 0);
+		xf_emit(ctx, 1, 0x80c14);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x804);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 2, 4);
+		xf_emit(ctx, 1, 0x8100c12);
+	}
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 3, 0);
+	else
+		xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x804);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x1a);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 0x7f);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 6, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 0x38, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x38, 0);
+	xf_emit(ctx, 2, 0x88);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 0x16, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x3f800000);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 0x28, 0);
+	else
+		xf_emit(ctx, 0x25, 0);
+	xf_emit(ctx, 1, 0x52);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* end of area 0 on pre-NVA0, beginning of area 6 on NVAx */
+	xf_emit(ctx, 1, 0x3f);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x10, 0);
+	else
+		xf_emit(ctx, 0x11, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x1001);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 0x20, 0);
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0);
+	else
+		xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 2, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx)
+{
+	/* middle of area 0 on pre-NVA0, middle of area 6 on NVAx */
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 0 on pre-NVA0 [after m2mf], end of area 2 on NVAx */
+	xf_emit(ctx, 2, 4);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x1c4d, 0);
+	else
+		xf_emit(ctx, 0x1c4b, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x8100c12);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0x27);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x3c1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x16, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* beginning of area 1 on pre-NVA0 [after m2mf], area 3 on NVAx */
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 8, 0);
+	else
+		xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x20);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x11, 0);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 0xf, 0);
+	else
+		xf_emit(ctx, 0xe, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 0xd, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 8);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	if (dev_priv->chipset == 0xa8)
+		xf_emit(ctx, 1, 0x1e00);
+	xf_emit(ctx, 0xc, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x125, 0);
+	else if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x126, 0);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		xf_emit(ctx, 0x124, 0);
+	else
+		xf_emit(ctx, 0x1f7, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 3, 0);
+	else
+		xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xa1, 0);
+	else
+		xf_emit(ctx, 0x5a, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x834, 0);
+	else if (dev_priv->chipset == 0xa0)
+		xf_emit(ctx, 0x1873, 0);
+	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x8ba, 0);
+	else
+		xf_emit(ctx, 0x833, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 0xf, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 6 on NVAx */
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0x100);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 8);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 0xcf);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x15);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x4444480);
+	xf_emit(ctx, 0x37, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx)
+{
+	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 0 on NVAx */
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 2);
+}
+
+static void
+nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 2 on pre-NVA0 [after m2mf], end of area 0 on NVAx */
+	xf_emit(ctx, 1, 0x3f800000);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x12, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 0xf, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 3);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 5);
+	xf_emit(ctx, 1, 0x52);
+	if (dev_priv->chipset == 0x50) {
+		xf_emit(ctx, 0x13, 0);
+	} else {
+		xf_emit(ctx, 4, 0);
+		xf_emit(ctx, 1, 1);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0x11, 0);
+		else
+			xf_emit(ctx, 0x10, 0);
+	}
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x26, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 5);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 4, 0xffff);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 3);
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x1f, 0);
+	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xc, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 1, 0x1a);
+	if (dev_priv->chipset != 0x50) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 3);
+	}
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x26, 0);
+	else
+		xf_emit(ctx, 0x3c, 0);
+	xf_emit(ctx, 1, 0x102);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 4, 4);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x102);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 4, 4);
+	xf_emit(ctx, 0x2c, 0);
+}
+
+static void
+nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic2;
+	if (dev_priv->chipset == 0x50) {
+		magic2 = 0x00003e60;
+	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+		magic2 = 0x001ffe67;
+	} else {
+		magic2 = 0x00087e67;
+	}
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 7, 0);
+	if (dev_priv->chipset >= 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 0x15);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x92 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 1, 4);
+		xf_emit(ctx, 1, 0x400);
+		xf_emit(ctx, 1, 0x300);
+		xf_emit(ctx, 1, 0x1001);
+		if (dev_priv->chipset != 0xa0) {
+			if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+				xf_emit(ctx, 1, 0);
+			else
+				xf_emit(ctx, 1, 0x15);
+		}
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x13, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x10, 0);
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 0x19, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x3f);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1001);
+		xf_emit(ctx, 0xb, 0);
+	} else {
+		xf_emit(ctx, 0xc, 0);
+	}
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x11);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 3, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 5, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 0x16, 0);
+	} else {
+		if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 0x1b, 0);
+		else
+			xf_emit(ctx, 0x15, 0);
+	}
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x5b, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic3;
+	if (dev_priv->chipset == 0x50)
+		magic3 = 0x1000;
+	else if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8)
+		magic3 = 0x1e00;
+	else
+		magic3 = 0;
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x24, 0);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 0x14, 0);
+	else
+		xf_emit(ctx, 0x15, 0);
+	xf_emit(ctx, 2, 4);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 0x03020100);
+	else
+		xf_emit(ctx, 1, 0x00608080);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x80);
+	if (magic3)
+		xf_emit(ctx, 1, magic3);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 0x24, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x03020100);
+	xf_emit(ctx, 1, 3);
+	if (magic3)
+		xf_emit(ctx, 1, magic3);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x94 || dev_priv->chipset == 0x96)
+		xf_emit(ctx, 0x1024, 0);
+	else if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0xa24, 0);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		xf_emit(ctx, 0x214, 0);
+	else
+		xf_emit(ctx, 0x414, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 2, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic1, magic2;
+	if (dev_priv->chipset == 0x50) {
+		magic1 = 0x3ff;
+		magic2 = 0x00003e60;
+	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+		magic1 = 0x7ff;
+		magic2 = 0x001ffe67;
+	} else {
+		magic1 = 0x7ff;
+		magic2 = 0x00087e67;
+	}
+	xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xc, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 0xb, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 3);
+		xf_emit(ctx, 1, 0);
+	} else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 1, 0);
+	}
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 0xcf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 8, 1);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x11);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	if(dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x28, 0);
+	xf_emit(ctx, 8, 8);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 8, 0x400);
+	xf_emit(ctx, 8, 0x300);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x20);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x40);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x400);
+	xf_emit(ctx, 1, 0x300);
+	xf_emit(ctx, 1, 0x1001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 0x15, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 3, 0);
+	} else
+		xf_emit(ctx, 0x17, 0);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 0);
+	else if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x3(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 2, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x2a712488);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x4085c000);
+	xf_emit(ctx, 1, 0x40);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0x10100);
+	xf_emit(ctx, 1, 0x02800000);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x4(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 2, 0x04e3bfdf);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x30201000);
+	xf_emit(ctx, 1, 0x70605040);
+	xf_emit(ctx, 1, 0xb8a89888);
+	xf_emit(ctx, 1, 0xf8e8d8c8);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1a);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x5(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xb, 0);
+	else
+		xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 8, 1);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 6, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 6, 0);
+	} else {
+		xf_emit(ctx, 0xb, 0);
+	}
+}
+
+static void
+nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	if (dev_priv->chipset < 0xa0) {
+		nv50_graph_construct_xfer_tp_x1(ctx);
+		nv50_graph_construct_xfer_tp_x2(ctx);
+		nv50_graph_construct_xfer_tp_x3(ctx);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 0xf, 0);
+		else
+			xf_emit(ctx, 0x12, 0);
+		nv50_graph_construct_xfer_tp_x4(ctx);
+	} else {
+		nv50_graph_construct_xfer_tp_x3(ctx);
+		if (dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0xc, 0);
+		else
+			xf_emit(ctx, 0xa, 0);
+		nv50_graph_construct_xfer_tp_x2(ctx);
+		nv50_graph_construct_xfer_tp_x5(ctx);
+		nv50_graph_construct_xfer_tp_x4(ctx);
+		nv50_graph_construct_xfer_tp_x1(ctx);
+	}
+}
+
+static void
+nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i, mpcnt;
+	if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+		mpcnt = 1;
+	else if (dev_priv->chipset < 0xa0 || dev_priv->chipset >= 0xa8)
+		mpcnt = 2;
+	else
+		mpcnt = 3;
+	for (i = 0; i < mpcnt; i++) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 0x80);
+		xf_emit(ctx, 1, 0x80007004);
+		xf_emit(ctx, 1, 0x04000400);
+		if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 1, 0xc0);
+		xf_emit(ctx, 1, 0x1000);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8) {
+			xf_emit(ctx, 1, 0xe00);
+			xf_emit(ctx, 1, 0x1e00);
+		}
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 2, 0x1000);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 4);
+		xf_emit(ctx, 1, 2);
+		if (dev_priv->chipset >= 0xaa)
+			xf_emit(ctx, 0xb, 0);
+		else if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 0xc, 0);
+		else
+			xf_emit(ctx, 0xa, 0);
+	}
+	xf_emit(ctx, 1, 0x08100c12);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 1, 0x1fe21);
+	}
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1fe21);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x08100c12);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x3a0, 0);
+	else if (dev_priv->chipset < 0x94)
+		xf_emit(ctx, 0x3a2, 0);
+	else if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+		xf_emit(ctx, 0x39f, 0);
+	else
+		xf_emit(ctx, 0x3a3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x2d, 0);
+}
+
+static void
+nv50_graph_construct_xfer2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+	uint32_t offset;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+	int size = 0;
+
+	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
+
+	if (dev_priv->chipset < 0xa0) {
+		for (i = 0; i < 8; i++) {
+			ctx->ctxvals_pos = offset + i;
+			if (i == 0)
+				xf_emit(ctx, 1, 0x08100c12);
+			if (units & (1 << i))
+				nv50_graph_construct_xfer_tp2(ctx);
+			if ((ctx->ctxvals_pos-offset)/8 > size)
+				size = (ctx->ctxvals_pos-offset)/8;
+		}
+	} else {
+		/* Strand 0: TPs 0, 1 */
+		ctx->ctxvals_pos = offset;
+		xf_emit(ctx, 1, 0x08100c12);
+		if (units & (1 << 0))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 1))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 2, 3 */
+		ctx->ctxvals_pos = offset + 1;
+		if (units & (1 << 2))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 3))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 4, 5, 6 */
+		ctx->ctxvals_pos = offset + 2;
+		if (units & (1 << 4))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 5))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 6))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 7, 8, 9 */
+		ctx->ctxvals_pos = offset + 3;
+		if (units & (1 << 7))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 8))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 9))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+	}
+	ctx->ctxvals_pos = offset + size * 8;
+	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	cp_lsr (ctx, offset);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, size);
+	cp_out (ctx, CP_SEEK_2);
+	cp_out (ctx, CP_XFER_2);
+	cp_wait(ctx, XFER, BUSY);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index f0dc4e36ef055c7d711eaf1a54a501d41bbce2d2..de1f5b0062c551c3f9480aa0efeb326634f3f635 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -390,7 +390,7 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
 	if (gpuobj->im_backing)
 		return -EINVAL;
 
-	*sz = (*sz + (NV50_INSTMEM_PAGE_SIZE-1)) & ~(NV50_INSTMEM_PAGE_SIZE-1);
+	*sz = ALIGN(*sz, NV50_INSTMEM_PAGE_SIZE);
 	if (*sz == 0)
 		return -EINVAL;
 
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index f745948b61e480e31bca363a033f7a622487dabe..a6a9f4af5ebd8257d92dcd592551b6fb05f91931 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -25,13 +25,14 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
-#define NOUVEAU_DRM_HEADER_PATCHLEVEL 15
+#define NOUVEAU_DRM_HEADER_PATCHLEVEL 16
 
 struct drm_nouveau_channel_alloc {
 	uint32_t     fb_ctxdma_handle;
 	uint32_t     tt_ctxdma_handle;
 
 	int          channel;
+	uint32_t     pushbuf_domains;
 
 	/* Notifier memory */
 	uint32_t     notifier_handle;
@@ -109,68 +110,58 @@ struct drm_nouveau_gem_new {
 	uint32_t align;
 };
 
+#define NOUVEAU_GEM_MAX_BUFFERS 1024
+struct drm_nouveau_gem_pushbuf_bo_presumed {
+	uint32_t valid;
+	uint32_t domain;
+	uint64_t offset;
+};
+
 struct drm_nouveau_gem_pushbuf_bo {
 	uint64_t user_priv;
 	uint32_t handle;
 	uint32_t read_domains;
 	uint32_t write_domains;
 	uint32_t valid_domains;
-	uint32_t presumed_ok;
-	uint32_t presumed_domain;
-	uint64_t presumed_offset;
+	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
 #define NOUVEAU_GEM_RELOC_LOW  (1 << 0)
 #define NOUVEAU_GEM_RELOC_HIGH (1 << 1)
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
+#define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
+	uint32_t reloc_bo_index;
+	uint32_t reloc_bo_offset;
 	uint32_t bo_index;
-	uint32_t reloc_index;
 	uint32_t flags;
 	uint32_t data;
 	uint32_t vor;
 	uint32_t tor;
 };
 
-#define NOUVEAU_GEM_MAX_BUFFERS 1024
-#define NOUVEAU_GEM_MAX_RELOCS 1024
+#define NOUVEAU_GEM_MAX_PUSH 512
+struct drm_nouveau_gem_pushbuf_push {
+	uint32_t bo_index;
+	uint32_t pad;
+	uint64_t offset;
+	uint64_t length;
+};
 
 struct drm_nouveau_gem_pushbuf {
 	uint32_t channel;
-	uint32_t nr_dwords;
 	uint32_t nr_buffers;
-	uint32_t nr_relocs;
-	uint64_t dwords;
 	uint64_t buffers;
-	uint64_t relocs;
-};
-
-struct drm_nouveau_gem_pushbuf_call {
-	uint32_t channel;
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t nr_buffers;
 	uint32_t nr_relocs;
-	uint32_t nr_dwords;
-	uint64_t buffers;
+	uint32_t nr_push;
 	uint64_t relocs;
+	uint64_t push;
 	uint32_t suffix0;
 	uint32_t suffix1;
-	/* below only accessed for CALL2 */
 	uint64_t vram_available;
 	uint64_t gart_available;
 };
 
-struct drm_nouveau_gem_pin {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t offset;
-};
-
-struct drm_nouveau_gem_unpin {
-	uint32_t handle;
-};
-
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
@@ -183,14 +174,6 @@ struct drm_nouveau_gem_cpu_fini {
 	uint32_t handle;
 };
 
-struct drm_nouveau_gem_tile {
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t size;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
-};
-
 enum nouveau_bus_type {
 	NV_AGP     = 0,
 	NV_PCI     = 1,
@@ -200,22 +183,17 @@ enum nouveau_bus_type {
 struct drm_nouveau_sarea {
 };
 
-#define DRM_NOUVEAU_CARD_INIT          0x00
-#define DRM_NOUVEAU_GETPARAM           0x01
-#define DRM_NOUVEAU_SETPARAM           0x02
-#define DRM_NOUVEAU_CHANNEL_ALLOC      0x03
-#define DRM_NOUVEAU_CHANNEL_FREE       0x04
-#define DRM_NOUVEAU_GROBJ_ALLOC        0x05
-#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x06
-#define DRM_NOUVEAU_GPUOBJ_FREE        0x07
+#define DRM_NOUVEAU_GETPARAM           0x00
+#define DRM_NOUVEAU_SETPARAM           0x01
+#define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
+#define DRM_NOUVEAU_CHANNEL_FREE       0x03
+#define DRM_NOUVEAU_GROBJ_ALLOC        0x04
+#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05
+#define DRM_NOUVEAU_GPUOBJ_FREE        0x06
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL   0x42
-#define DRM_NOUVEAU_GEM_PIN            0x43 /* !KMS only */
-#define DRM_NOUVEAU_GEM_UNPIN          0x44 /* !KMS only */
-#define DRM_NOUVEAU_GEM_CPU_PREP       0x45
-#define DRM_NOUVEAU_GEM_CPU_FINI       0x46
-#define DRM_NOUVEAU_GEM_INFO           0x47
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL2  0x48
+#define DRM_NOUVEAU_GEM_CPU_PREP       0x42
+#define DRM_NOUVEAU_GEM_CPU_FINI       0x43
+#define DRM_NOUVEAU_GEM_INFO           0x44
 
 #endif /* __NOUVEAU_DRM_H__ */