Merge branch 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6

* 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: (35 commits) drm/radeon: set fb aperture sizes for framebuffer handoff. drm/ttm: fix highuser vs dma32 confusion. drm/radeon: Fix size used for benchmarking BO copies. drm/radeon: Add radeon.test parameter for running BO GPU copy tests. drm/radeon/kms: allow interruptible waits for objects. drm/ttm: powerpc: Fix Highmem cache flushing. x86: Export kmap_atomic_prot() needed for TTM. drm/ttm: Fix ttm in-kernel copying of pages with non-standard caching attributes. drm/ttm: Fix an oops and sync object leak. drm/radeon/kms: vram sizing on certain r100 chips needs workaround. drm/radeon: Pay more attention to object placement requested by userspace. drm/radeon: Fall back to evicting BOs with memcpy if necessary. drm/radeon: Don't unreserve twice on failure to validate. drm/radeon/kms: fix bandwidth computation on avivo hardware drm/radeon/kms: add initial colortiling support. drm/radeon/kms: fix hotspot handling on pre-avivo chips drm/radeon/kms: enable frac fb divs on rs600/rs690/rs740 drm/radeon/kms: add PLL flag to prefer frequencies <= the target freq drm/radeon/kms: block RN50 from using 3D engine. drm/radeon/kms: fix VRAM sizing like DDX does it. ...

Merge branch 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6
* 'drm-radeon-kms' of git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6: (35 commits) drm/radeon: set fb aperture sizes for framebuffer handoff. drm/ttm: fix highuser vs dma32 confusion. drm/radeon: Fix size used for benchmarking BO copies. drm/radeon: Add radeon.test parameter for running BO GPU copy tests. drm/radeon/kms: allow interruptible waits for objects. drm/ttm: powerpc: Fix Highmem cache flushing. x86: Export kmap_atomic_prot() needed for TTM. drm/ttm: Fix ttm in-kernel copying of pages with non-standard caching attributes. drm/ttm: Fix an oops and sync object leak. drm/radeon/kms: vram sizing on certain r100 chips needs workaround. drm/radeon: Pay more attention to object placement requested by userspace. drm/radeon: Fall back to evicting BOs with memcpy if necessary. drm/radeon: Don't unreserve twice on failure to validate. drm/radeon/kms: fix bandwidth computation on avivo hardware drm/radeon/kms: add initial colortiling support. drm/radeon/kms: fix hotspot handling on pre-avivo chips drm/radeon/kms: enable frac fb divs on rs600/rs690/rs740 drm/radeon/kms: add PLL flag to prefer frequencies <= the target freq drm/radeon/kms: block RN50 from using 3D engine. drm/radeon/kms: fix VRAM sizing like DDX does it. ...
84210aeb · Linus Torvalds · 7d4dd028 · ed8f0d9e · 84210aeb · 84210aeb
46 changed file
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -103,6 +103,7 @@ EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);

 void __init set_highmem_pages_init(void)
 {

--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -13,7 +13,8 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
 	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
 	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
-	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o
+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
+	radeon_test.o

 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o


--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -31,6 +31,132 @@
 #include "atom.h"
 #include "atom-bits.h"

+static void atombios_overscan_setup(struct drm_crtc *crtc,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
+	int a1, a2;
+
+	memset(&args, 0, sizeof(args));
+
+	args.usOverscanRight = 0;
+	args.usOverscanLeft = 0;
+	args.usOverscanBottom = 0;
+	args.usOverscanTop = 0;
+	args.ucCRTC = radeon_crtc->crtc_id;
+
+	switch (radeon_crtc->rmx_type) {
+	case RMX_CENTER:
+		args.usOverscanTop = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+		args.usOverscanBottom = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+		args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+		args.usOverscanRight = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	case RMX_ASPECT:
+		a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
+		a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
+
+		if (a1 > a2) {
+			args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+			args.usOverscanRight = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+		} else if (a2 > a1) {
+			args.usOverscanLeft = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+			args.usOverscanRight = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+		}
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	case RMX_FULL:
+	default:
+		args.usOverscanRight = 0;
+		args.usOverscanLeft = 0;
+		args.usOverscanBottom = 0;
+		args.usOverscanTop = 0;
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		break;
+	}
+}
+
+static void atombios_scaler_setup(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	ENABLE_SCALER_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
+	/* fixme - fill in enc_priv for atom dac */
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	if (!ASIC_IS_AVIVO(rdev) && radeon_crtc->crtc_id)
+		return;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucScaler = radeon_crtc->crtc_id;
+
+	if (radeon_crtc->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+		switch (tv_std) {
+		case TV_STD_NTSC:
+		default:
+			args.ucTVStandard = ATOM_TV_NTSC;
+			break;
+		case TV_STD_PAL:
+			args.ucTVStandard = ATOM_TV_PAL;
+			break;
+		case TV_STD_PAL_M:
+			args.ucTVStandard = ATOM_TV_PALM;
+			break;
+		case TV_STD_PAL_60:
+			args.ucTVStandard = ATOM_TV_PAL60;
+			break;
+		case TV_STD_NTSC_J:
+			args.ucTVStandard = ATOM_TV_NTSCJ;
+			break;
+		case TV_STD_SCART_PAL:
+			args.ucTVStandard = ATOM_TV_PAL; /* ??? */
+			break;
+		case TV_STD_SECAM:
+			args.ucTVStandard = ATOM_TV_SECAM;
+			break;
+		case TV_STD_PAL_CN:
+			args.ucTVStandard = ATOM_TV_PALCN;
+			break;
+		}
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else if (radeon_crtc->devices & (ATOM_DEVICE_CV_SUPPORT)) {
+		args.ucTVStandard = ATOM_TV_CV;
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else {
+		switch (radeon_crtc->rmx_type) {
+		case RMX_FULL:
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+			break;
+		case RMX_CENTER:
+			args.ucEnable = ATOM_SCALER_CENTER;
+			break;
+		case RMX_ASPECT:
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+			break;
+		default:
+			if (ASIC_IS_AVIVO(rdev))
+				args.ucEnable = ATOM_SCALER_DISABLE;
+			else
+				args.ucEnable = ATOM_SCALER_CENTER;
+			break;
+		}
+	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+	if (radeon_crtc->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)
+	    && rdev->family >= CHIP_RV515 && rdev->family <= CHIP_RV570) {
+		atom_rv515_force_tv_scaler(rdev);
+	}
+}
+
 static void atombios_lock_crtc(struct drm_crtc *crtc, int lock)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -203,6 +329,12 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	if (ASIC_IS_AVIVO(rdev)) {
 		uint32_t ss_cntl;

+		if ((rdev->family == CHIP_RS600) ||
+		    (rdev->family == CHIP_RS690) ||
+		    (rdev->family == CHIP_RS740))
+			pll_flags |= (RADEON_PLL_USE_FRAC_FB_DIV |
+				      RADEON_PLL_PREFER_CLOSEST_LOWER);
+
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 200000)	/* range limits??? */
 			pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
 		else
@@ -321,7 +453,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	struct drm_gem_object *obj;
 	struct drm_radeon_gem_object *obj_priv;
 	uint64_t fb_location;
-	uint32_t fb_format, fb_pitch_pixels;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;

 	if (!crtc->fb)
 		return -EINVAL;
@@ -358,7 +490,14 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		return -EINVAL;
 	}

-	/* TODO tiling */
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MACRO)
+		fb_format |= AVIVO_D1GRPH_MACRO_ADDRESS_MODE;
+
+	if (tiling_flags & RADEON_TILING_MICRO)
+		fb_format |= AVIVO_D1GRPH_TILED;
+
 	if (radeon_crtc->crtc_id == 0)
 		WREG32(AVIVO_D1VGA_CONTROL, 0);
 	else
@@ -509,6 +648,9 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
 		radeon_crtc_set_base(crtc, x, y, old_fb);
 		radeon_legacy_atom_set_surface(crtc);
 	}
+	atombios_overscan_setup(crtc, mode, adjusted_mode);
+	atombios_scaler_setup(crtc);
+	radeon_bandwidth_update(rdev);
 	return 0;
 }

@@ -516,6 +658,8 @@ static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc,
 				     struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
+	if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
 	return true;
 }

@@ -548,148 +692,3 @@ void radeon_atombios_init_crtc(struct drm_device *dev,
 		    AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
 }
-
-void radeon_init_disp_bw_avivo(struct drm_device *dev,
-			       struct drm_display_mode *mode1,
-			       uint32_t pixel_bytes1,
-			       struct drm_display_mode *mode2,
-			       uint32_t pixel_bytes2)
-{
-	struct radeon_device *rdev = dev->dev_private;
-	fixed20_12 min_mem_eff;
-	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
-	fixed20_12 sclk_ff, mclk_ff;
-	uint32_t dc_lb_memory_split, temp;
-
-	min_mem_eff.full = rfixed_const_8(0);
-	if (rdev->disp_priority == 2) {
-		uint32_t mc_init_misc_lat_timer = 0;
-		if (rdev->family == CHIP_RV515)
-			mc_init_misc_lat_timer =
-			    RREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER);
-		else if (rdev->family == CHIP_RS690)
-			mc_init_misc_lat_timer =
-			    RREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER);
-
-		mc_init_misc_lat_timer &=
-		    ~(R300_MC_DISP1R_INIT_LAT_MASK <<
-		      R300_MC_DISP1R_INIT_LAT_SHIFT);
-		mc_init_misc_lat_timer &=
-		    ~(R300_MC_DISP0R_INIT_LAT_MASK <<
-		      R300_MC_DISP0R_INIT_LAT_SHIFT);
-
-		if (mode2)
-			mc_init_misc_lat_timer |=
-			    (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
-		if (mode1)
-			mc_init_misc_lat_timer |=
-			    (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
-
-		if (rdev->family == CHIP_RV515)
-			WREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER,
-				  mc_init_misc_lat_timer);
-		else if (rdev->family == CHIP_RS690)
-			WREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER,
-				  mc_init_misc_lat_timer);
-	}
-
-	/*
-	 * determine is there is enough bw for current mode
-	 */
-	temp_ff.full = rfixed_const(100);
-	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
-	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
-	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
-	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
-
-	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
-	temp_ff.full = rfixed_const(temp);
-	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
-	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
-
-	pix_clk.full = 0;
-	pix_clk2.full = 0;
-	peak_disp_bw.full = 0;
-	if (mode1) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk.full = rfixed_const(mode1->clock);	/* convert to fixed point */
-		pix_clk.full = rfixed_div(pix_clk, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes1);
-		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
-	}
-	if (mode2) {
-		temp_ff.full = rfixed_const(1000);
-		pix_clk2.full = rfixed_const(mode2->clock);	/* convert to fixed point */
-		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
-		temp_ff.full = rfixed_const(pixel_bytes2);
-		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
-	}
-
-	if (peak_disp_bw.full >= mem_bw.full) {
-		DRM_ERROR
-		    ("You may not have enough display bandwidth for current mode\n"
-		     "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
-		printk("peak disp bw %d, mem_bw %d\n",
-		       rfixed_trunc(peak_disp_bw), rfixed_trunc(mem_bw));
-	}
-
-	/*
-	 * Line Buffer Setup
-	 * There is a single line buffer shared by both display controllers.
-	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between the display
-	 * controllers.  The paritioning can either be done manually or via one of four
-	 * preset allocations specified in bits 1:0:
-	 * 0 - line buffer is divided in half and shared between each display controller
-	 * 1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
-	 * 2 - D1 gets the whole buffer
-	 * 3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
-	 * Setting bit 2 of DC_LB_MEMORY_SPLIT controls switches to manual allocation mode.
-	 * In manual allocation mode, D1 always starts at 0, D1 end/2 is specified in bits
-	 * 14:4; D2 allocation follows D1.
-	 */
-
-	/* is auto or manual better ? */
-	dc_lb_memory_split =
-	    RREG32(AVIVO_DC_LB_MEMORY_SPLIT) & ~AVIVO_DC_LB_MEMORY_SPLIT_MASK;
-	dc_lb_memory_split &= ~AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
-#if 1
-	/* auto */
-	if (mode1 && mode2) {
-		if (mode1->hdisplay > mode2->hdisplay) {
-			if (mode1->hdisplay > 2560)
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
-			else
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-		} else if (mode2->hdisplay > mode1->hdisplay) {
-			if (mode2->hdisplay > 2560)
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
-			else
-				dc_lb_memory_split |=
-				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-		} else
-			dc_lb_memory_split |=
-			    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
-	} else if (mode1) {
-		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_ONLY;
-	} else if (mode2) {
-		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
-	}
-#else
-	/* manual */
-	dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
-	dc_lb_memory_split &=
-	    ~(AVIVO_DC_LB_DISP1_END_ADR_MASK <<
-	      AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	if (mode1) {
-		dc_lb_memory_split |=
-		    ((((mode1->hdisplay / 2) + 64) & AVIVO_DC_LB_DISP1_END_ADR_MASK)
-		     << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	} else if (mode2) {
-		dc_lb_memory_split |= (0 << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
-	}
-#endif
-	WREG32(AVIVO_DC_LB_MEMORY_SPLIT, dc_lb_memory_split);
-}
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -30,6 +30,8 @@
 #include "drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_drm.h"
+#include "radeon_share.h"

 /* r300,r350,rv350,rv370,rv380 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -44,6 +46,7 @@ int r100_gui_wait_for_idle(struct radeon_device *rdev);
 int r100_cs_packet_parse(struct radeon_cs_parser *p,
 			 struct radeon_cs_packet *pkt,
 			 unsigned idx);
+int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
 			      struct radeon_cs_reloc **cs_reloc);
 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
@@ -150,8 +153,13 @@ int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
 		return -EINVAL;
 	}
-	addr = (((u32)addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 4) | 0xC;
-	writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
+	addr = (lower_32_bits(addr) >> 8) |
+	       ((upper_32_bits(addr) & 0xff) << 24) |
+	       0xc;
+	/* on x86 we want this to be CPU endian, on powerpc
+	 * on powerpc without HW swappers, it'll get swapped on way
+	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
+	writel(addr, ((void __iomem *)ptr) + (i * 4));
 	return 0;
 }

@@ -579,10 +587,8 @@ void r300_vram_info(struct radeon_device *rdev)
 	} else {
 		rdev->mc.vram_width = 64;
 	}
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);

-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
 }


@@ -970,7 +976,7 @@ static inline void r300_cs_track_clear(struct r300_cs_track *track)

 static const unsigned r300_reg_safe_bm[159] = {
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
-	0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
@@ -1019,7 +1025,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	struct radeon_cs_reloc *reloc;
 	struct r300_cs_track *track;
 	volatile uint32_t *ib;
-	uint32_t tmp;
+	uint32_t tmp, tile_flags = 0;
 	unsigned i;
 	int r;

@@ -1027,6 +1033,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	ib_chunk = &p->chunks[p->chunk_ib_idx];
 	track = (struct r300_cs_track*)p->track;
 	switch(reg) {
+	case AVIVO_D1MODE_VLINE_START_END:
+	case RADEON_CRTC_GUI_TRIG_VLINE:
+		r = r100_cs_packet_parse_vline(p);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		break;
 	case RADEON_DST_PITCH_OFFSET:
 	case RADEON_SRC_PITCH_OFFSET:
 		r = r100_cs_packet_next_reloc(p, &reloc);
@@ -1038,7 +1054,19 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= RADEON_DST_TILE_MACRO;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			if (reg == RADEON_SRC_PITCH_OFFSET) {
+				DRM_ERROR("Cannot src blit from microtiled surface\n");
+				r100_cs_dump_packet(p, pkt);
+				return -EINVAL;
+			}
+			tile_flags |= RADEON_DST_TILE_MICRO;
+		}
+		tmp |= tile_flags;
+		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 		break;
 	case R300_RB3D_COLOROFFSET0:
 	case R300_RB3D_COLOROFFSET1:
@@ -1127,6 +1155,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLORPITCH1 */
 		/* RB3D_COLORPITCH2 */
 		/* RB3D_COLORPITCH3 */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_COLOR_TILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		i = (reg - 0x4E38) >> 2;
 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
@@ -1182,6 +1227,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
 		break;
 	case 0x4104:

--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -27,7 +27,9 @@
 #ifndef _R300_REG_H_
 #define _R300_REG_H_

-
+#define R300_SURF_TILE_MACRO (1<<16)
+#define R300_SURF_TILE_MICRO (2<<16)
+#define R300_SURF_TILE_BOTH (3<<16)


 #define R300_MC_INIT_MISC_LAT_TIMER	0x180

--- a/drivers/gpu/drm/radeon/r500_reg.h
+++ b/drivers/gpu/drm/radeon/r500_reg.h
@@ -445,6 +445,7 @@
 #define AVIVO_D1MODE_DATA_FORMAT                0x6528
 #       define AVIVO_D1MODE_INTERLEAVE_EN       (1 << 0)
 #define AVIVO_D1MODE_DESKTOP_HEIGHT             0x652C
+#define AVIVO_D1MODE_VLINE_START_END            0x6538
 #define AVIVO_D1MODE_VIEWPORT_START             0x6580
 #define AVIVO_D1MODE_VIEWPORT_SIZE              0x6584
 #define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6588
@@ -496,6 +497,7 @@
 #define AVIVO_D2CUR_SIZE                        0x6c10
 #define AVIVO_D2CUR_POSITION                    0x6c14

+#define AVIVO_D2MODE_VLINE_START_END            0x6d38
 #define AVIVO_D2MODE_VIEWPORT_START             0x6d80
 #define AVIVO_D2MODE_VIEWPORT_SIZE              0x6d84
 #define AVIVO_D2MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6d88

--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -28,6 +28,7 @@
 #include "drmP.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"

 /* r520,rv530,rv560,rv570,r580 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -94,8 +95,8 @@ int r520_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}
 	/* Write VRAM size in case we are limiting it */
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R520_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(R520_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(R520_MC_FB_LOCATION, tmp);
@@ -226,9 +227,20 @@ static void r520_vram_get_type(struct radeon_device *rdev)

 void r520_vram_info(struct radeon_device *rdev)
 {
+	fixed20_12 a;
+
 	r520_vram_get_type(rdev);
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);

-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+}
+
+void r520_bandwidth_update(struct radeon_device *rdev)
+{
+	rv515_bandwidth_avivo_update(rdev);
 }
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -67,7 +67,7 @@ int r600_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}

-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
 	tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
 	WREG32(R600_MC_VM_FB_LOCATION, tmp);
@@ -140,7 +140,8 @@ void r600_vram_get_type(struct radeon_device *rdev)
 void r600_vram_info(struct radeon_device *rdev)
 {
 	r600_vram_get_type(rdev);
-	rdev->mc.vram_size = RREG32(R600_CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;

 	/* Could aper size report 0 ? */
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);

--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -64,6 +64,7 @@ extern int radeon_agpmode;
 extern int radeon_vram_limit;
 extern int radeon_gart_size;
 extern int radeon_benchmarking;
+extern int radeon_testing;
 extern int radeon_connector_table;

 /*
@@ -113,6 +114,7 @@ enum radeon_family {
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
+	CHIP_RS880,
 	CHIP_LAST,
 };

@@ -201,6 +203,14 @@ int radeon_fence_wait_last(struct radeon_device *rdev);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);

+/*
+ * Tiling registers
+ */
+struct radeon_surface_reg {
+	struct radeon_object *robj;
+};
+
+#define RADEON_GEM_MAX_SURFACES 8

 /*
 * Radeon buffer.
@@ -213,6 +223,7 @@ struct radeon_object_list {
 	uint64_t		gpu_offset;
 	unsigned		rdomain;
 	unsigned		wdomain;
+	uint32_t                tiling_flags;
 };

 int radeon_object_init(struct radeon_device *rdev);
@@ -242,8 +253,15 @@ void radeon_object_list_clean(struct list_head *head);
 int radeon_object_fbdev_mmap(struct radeon_object *robj,
 			     struct vm_area_struct *vma);
 unsigned long radeon_object_size(struct radeon_object *robj);
-
-
+void radeon_object_clear_surface_reg(struct radeon_object *robj);
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop);
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch);
+void radeon_object_get_tiling_flags(struct radeon_object *robj, uint32_t *tiling_flags, uint32_t *pitch);
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			   struct ttm_mem_reg *mem);
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 /*
 * GEM objects.
 */
@@ -315,8 +333,11 @@ struct radeon_mc {
 	unsigned		gtt_location;
 	unsigned		gtt_size;
 	unsigned		vram_location;
-	unsigned		vram_size;
+	/* for some chips with <= 32MB we need to lie
+	 * about vram size near mc fb location */
+	unsigned		mc_vram_size;
 	unsigned		vram_width;
+	unsigned		real_vram_size;
 	int			vram_mtrr;
 	bool			vram_is_ddr;
 };
@@ -474,6 +495,39 @@ struct radeon_wb {
 	uint64_t		gpu_addr;
 };

+/**
+ * struct radeon_pm - power management datas
+ * @max_bandwidth:      maximum bandwidth the gpu has (MByte/s)
+ * @igp_sideport_mclk:  sideport memory clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_system_mclk:    system clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_ht_link_clk:    ht link clock Mhz (rs690,rs740,rs780,rs880)
+ * @igp_ht_link_width:  ht link width in bits (rs690,rs740,rs780,rs880)
+ * @k8_bandwidth:       k8 bandwidth the gpu has (MByte/s) (IGP)
+ * @sideport_bandwidth: sideport bandwidth the gpu has (MByte/s) (IGP)
+ * @ht_bandwidth:       ht bandwidth the gpu has (MByte/s) (IGP)
+ * @core_bandwidth:     core GPU bandwidth the gpu has (MByte/s) (IGP)
+ * @sclk:          	GPU clock Mhz (core bandwith depends of this clock)
+ * @needed_bandwidth:   current bandwidth needs
+ *
+ * It keeps track of various data needed to take powermanagement decision.
+ * Bandwith need is used to determine minimun clock of the GPU and memory.
+ * Equation between gpu/memory clock and available bandwidth is hw dependent
+ * (type of memory, bus size, efficiency, ...)
+ */
+struct radeon_pm {
+	fixed20_12		max_bandwidth;
+	fixed20_12		igp_sideport_mclk;
+	fixed20_12		igp_system_mclk;
+	fixed20_12		igp_ht_link_clk;
+	fixed20_12		igp_ht_link_width;
+	fixed20_12		k8_bandwidth;
+	fixed20_12		sideport_bandwidth;
+	fixed20_12		ht_bandwidth;
+	fixed20_12		core_bandwidth;
+	fixed20_12		sclk;
+	fixed20_12		needed_bandwidth;
+};
+

 /*
 * Benchmarking
@@ -481,6 +535,12 @@ struct radeon_wb {
 void radeon_benchmark(struct radeon_device *rdev);


+/*
+ * Testing
+ */
+void radeon_test_moves(struct radeon_device *rdev);
+
+
 /*
 * Debugfs
 */
@@ -535,6 +595,11 @@ struct radeon_asic {
 	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
 	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+	int (*set_surface_reg)(struct radeon_device *rdev, int reg,
+			       uint32_t tiling_flags, uint32_t pitch,
+			       uint32_t offset, uint32_t obj_size);
+	int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
+	void (*bandwidth_update)(struct radeon_device *rdev);
 };

 union radeon_asic_config {
@@ -566,6 +631,10 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);


 /*
@@ -594,8 +663,8 @@ struct radeon_device {
 	struct radeon_object		*fbdev_robj;
 	struct radeon_framebuffer	*fbdev_rfb;
 	/* Register mmio */
-	unsigned long			rmmio_base;
-	unsigned long			rmmio_size;
+	resource_size_t			rmmio_base;
+	resource_size_t			rmmio_size;
 	void				*rmmio;
 	radeon_rreg_t			mm_rreg;
 	radeon_wreg_t			mm_wreg;
@@ -619,11 +688,14 @@ struct radeon_device {
 	struct radeon_irq		irq;
 	struct radeon_asic		*asic;
 	struct radeon_gem		gem;
+	struct radeon_pm		pm;
 	struct mutex			cs_mutex;
 	struct radeon_wb		wb;
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
+	bool				need_dma32;
+	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 };

 int radeon_device_init(struct radeon_device *rdev,
@@ -670,6 +742,8 @@ void r100_pll_errata_after_index(struct radeon_device *rdev);
 /*
 * ASICs helpers.
 */
+#define ASIC_IS_RN50(rdev) ((rdev->pdev->device == 0x515e) || \
+			    (rdev->pdev->device == 0x5969))
 #define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
 		(rdev->family == CHIP_RV200) || \
 		(rdev->family == CHIP_RS100) || \
@@ -796,5 +870,8 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
+#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
+#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
+#define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))

 #endif
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -71,6 +71,11 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t dst_offset,
 		   unsigned num_pages,
 		   struct radeon_fence *fence);
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r100_bandwidth_update(struct radeon_device *rdev);

 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -100,6 +105,9 @@ static struct radeon_asic r100_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };


@@ -128,6 +136,7 @@ int r300_copy_dma(struct radeon_device *rdev,
 		  uint64_t dst_offset,
 		  unsigned num_pages,
 		  struct radeon_fence *fence);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
@@ -156,6 +165,9 @@ static struct radeon_asic r300_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };

 /*
@@ -193,6 +205,9 @@ static struct radeon_asic r420_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };


@@ -237,6 +252,9 @@ static struct radeon_asic rs400_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
 };


@@ -254,6 +272,7 @@ void rs600_gart_tlb_flush(struct radeon_device *rdev);
 int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rs600_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs600_asic = {
 	.init = &r300_init,
 	.errata = &rs600_errata,
@@ -282,6 +301,7 @@ static struct radeon_asic rs600_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.bandwidth_update = &rs600_bandwidth_update,
 };


@@ -294,6 +314,7 @@ int rs690_mc_init(struct radeon_device *rdev);
 void rs690_mc_fini(struct radeon_device *rdev);
 uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rs690_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rs690_asic = {
 	.init = &r300_init,
 	.errata = &rs690_errata,
@@ -322,6 +343,9 @@ static struct radeon_asic rs690_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &rs690_bandwidth_update,
 };


@@ -339,6 +363,7 @@ void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rv515_ring_start(struct radeon_device *rdev);
 uint32_t rv515_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
 void rv515_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rv515_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic rv515_asic = {
 	.init = &rv515_init,
 	.errata = &rv515_errata,
@@ -367,6 +392,9 @@ static struct radeon_asic rv515_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &rv515_bandwidth_update,
 };


@@ -377,6 +405,7 @@ void r520_errata(struct radeon_device *rdev);
 void r520_vram_info(struct radeon_device *rdev);
 int r520_mc_init(struct radeon_device *rdev);
 void r520_mc_fini(struct radeon_device *rdev);
+void r520_bandwidth_update(struct radeon_device *rdev);
 static struct radeon_asic r520_asic = {
 	.init = &rv515_init,
 	.errata = &r520_errata,
@@ -405,6 +434,9 @@ static struct radeon_asic r520_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r520_bandwidth_update,
 };

 /*

--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -103,7 +103,8 @@ static inline struct radeon_i2c_bus_rec radeon_lookup_gpio(struct drm_device
 static bool radeon_atom_apply_quirks(struct drm_device *dev,
 				     uint32_t supported_device,
 				     int *connector_type,
-				     struct radeon_i2c_bus_rec *i2c_bus)
+				     struct radeon_i2c_bus_rec *i2c_bus,
+				     uint8_t *line_mux)
 {

 	/* Asus M2A-VM HDMI board lists the DVI port as HDMI */
@@ -127,8 +128,10 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 	if ((dev->pdev->device == 0x5653) &&
 	    (dev->pdev->subsystem_vendor == 0x1462) &&
 	    (dev->pdev->subsystem_device == 0x0291)) {
-		if (*connector_type == DRM_MODE_CONNECTOR_LVDS)
+		if (*connector_type == DRM_MODE_CONNECTOR_LVDS) {
 			i2c_bus->valid = false;
+			*line_mux = 53;
+		}
 	}

 	/* Funky macbooks */
@@ -526,7 +529,7 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct

 		if (!radeon_atom_apply_quirks
 		    (dev, (1 << i), &bios_connectors[i].connector_type,
-		     &bios_connectors[i].ddc_bus))
+		     &bios_connectors[i].ddc_bus, &bios_connectors[i].line_mux))
 			continue;

 		bios_connectors[i].valid = true;

--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -63,7 +63,7 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 		if (r) {
 			goto out_cleanup;
 		}
-		r = radeon_copy_dma(rdev, saddr, daddr, size >> 14, fence);
+		r = radeon_copy_dma(rdev, saddr, daddr, size / 4096, fence);
 		if (r) {
 			goto out_cleanup;
 		}
@@ -88,7 +88,7 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 		if (r) {
 			goto out_cleanup;
 		}
-		r = radeon_copy_blit(rdev, saddr, daddr, size >> 14, fence);
+		r = radeon_copy_blit(rdev, saddr, daddr, size / 4096, fence);
 		if (r) {
 			goto out_cleanup;
 		}

--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -127,17 +127,23 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 				       sizeof(struct drm_radeon_cs_chunk))) {
 			return -EFAULT;
 		}
+		p->chunks[i].length_dw = user_chunk.length_dw;
+		p->chunks[i].kdata = NULL;
 		p->chunks[i].chunk_id = user_chunk.chunk_id;
+
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
 			p->chunk_relocs_idx = i;
 		}
 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
 			p->chunk_ib_idx = i;
+			/* zero length IB isn't useful */
+			if (p->chunks[i].length_dw == 0)
+				return -EINVAL;
 		}
+
 		p->chunks[i].length_dw = user_chunk.length_dw;
 		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;

-		p->chunks[i].kdata = NULL;
 		size = p->chunks[i].length_dw * sizeof(uint32_t);
 		p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
 		if (p->chunks[i].kdata == NULL) {

--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -111,9 +111,11 @@ static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,

 	if (ASIC_IS_AVIVO(rdev))
 		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr);
-	else
+	else {
+		radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
 		/* offset is from DISP(2)_BASE_ADDRESS */
-		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, gpu_addr);
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
+	}
 }

 int radeon_crtc_cursor_set(struct drm_crtc *crtc,
@@ -245,6 +247,9 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 		       (RADEON_CUR_LOCK
 			| ((xorigin ? 0 : x) << 16)
 			| (yorigin ? 0 : y)));
+		/* offset is from DISP(2)_BASE_ADDRESS */
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
+								      (yorigin * 256)));
 	}
 	radeon_lock_cursor(crtc, false);


--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -48,6 +48,8 @@ static void radeon_surface_init(struct radeon_device *rdev)
 			       i * (RADEON_SURFACE1_INFO - RADEON_SURFACE0_INFO),
 			       0);
 		}
+		/* enable surfaces */
+		WREG32(RADEON_SURFACE_CNTL, 0);
 	}
 }

@@ -119,7 +121,7 @@ int radeon_mc_setup(struct radeon_device *rdev)
 	if (rdev->mc.vram_location != 0xFFFFFFFFUL) {
 		/* vram location was already setup try to put gtt after
 		 * if it fits */
-		tmp = rdev->mc.vram_location + rdev->mc.vram_size;
+		tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
 		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
 		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
 			rdev->mc.gtt_location = tmp;
@@ -134,13 +136,13 @@ int radeon_mc_setup(struct radeon_device *rdev)
 	} else if (rdev->mc.gtt_location != 0xFFFFFFFFUL) {
 		/* gtt location was already setup try to put vram before
 		 * if it fits */
-		if (rdev->mc.vram_size < rdev->mc.gtt_location) {
+		if (rdev->mc.mc_vram_size < rdev->mc.gtt_location) {
 			rdev->mc.vram_location = 0;
 		} else {
 			tmp = rdev->mc.gtt_location + rdev->mc.gtt_size;
-			tmp += (rdev->mc.vram_size - 1);
-			tmp &= ~(rdev->mc.vram_size - 1);
-			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.vram_size) {
+			tmp += (rdev->mc.mc_vram_size - 1);
+			tmp &= ~(rdev->mc.mc_vram_size - 1);
+			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.mc_vram_size) {
 				rdev->mc.vram_location = tmp;
 			} else {
 				printk(KERN_ERR "[drm] vram too big to fit "
@@ -150,12 +152,14 @@ int radeon_mc_setup(struct radeon_device *rdev)
 		}
 	} else {
 		rdev->mc.vram_location = 0;
-		rdev->mc.gtt_location = rdev->mc.vram_size;
+		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	}
-	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.vram_size >> 20);
+	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.real_vram_size >> 20);
 	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
 		 rdev->mc.vram_location,
-		 rdev->mc.vram_location + rdev->mc.vram_size - 1);
+		 rdev->mc.vram_location + rdev->mc.mc_vram_size - 1);
+	if (rdev->mc.real_vram_size != rdev->mc.mc_vram_size)
+		DRM_INFO("radeon: VRAM less than aperture workaround enabled\n");
 	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
 	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
 		 rdev->mc.gtt_location,
@@ -450,6 +454,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		       uint32_t flags)
 {
 	int r, ret;
+	int dma_bits;

 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
 	rdev->shutdown = false;
@@ -492,8 +497,20 @@ int radeon_device_init(struct radeon_device *rdev,
 		return r;
 	}

-	/* Report DMA addressing limitation */
-	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+	/* set DMA mask + need_dma32 flags.
+	 * PCIE - can handle 40-bits.
+	 * IGP - can handle 40-bits (in theory)
+	 * AGP - generally dma32 is safest
+	 * PCI - only dma32
+	 */
+	rdev->need_dma32 = false;
+	if (rdev->flags & RADEON_IS_AGP)
+		rdev->need_dma32 = true;
+	if (rdev->flags & RADEON_IS_PCI)
+		rdev->need_dma32 = true;
+
+	dma_bits = rdev->need_dma32 ? 32 : 40;
+	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
 	if (r) {
 		printk(KERN_WARNING "radeon: No suitable DMA available.\n");
 	}
@@ -546,27 +563,22 @@ int radeon_device_init(struct radeon_device *rdev,
 			radeon_combios_asic_init(rdev->ddev);
 		}
 	}
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		return r;
+	}
 	/* Get vram informations */
 	radeon_vram_info(rdev);
-	/* Device is severly broken if aper size > vram size.
-	 * for RN50/M6/M7 - Novell bug 204882 ?
-	 */
-	if (rdev->mc.vram_size < rdev->mc.aper_size) {
-		rdev->mc.aper_size = rdev->mc.vram_size;
-	}
+
 	/* Add an MTRR for the VRAM */
 	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
 				      MTRR_TYPE_WRCOMB, 1);
 	DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
-		 rdev->mc.vram_size >> 20,
+		 rdev->mc.real_vram_size >> 20,
 		 (unsigned)rdev->mc.aper_size >> 20);
 	DRM_INFO("RAM width %dbits %cDR\n",
 		 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
-	/* Initialize clocks */
-	r = radeon_clocks_init(rdev);
-	if (r) {
-		return r;
-	}
 	/* Initialize memory controller (also test AGP) */
 	r = radeon_mc_init(rdev);
 	if (r) {
@@ -626,6 +638,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (!ret) {
 		DRM_INFO("radeon: kernel modesetting successfully initialized.\n");
 	}
+	if (radeon_testing) {
+		radeon_test_moves(rdev);
+	}
 	if (radeon_benchmarking) {
 		radeon_benchmark(rdev);
 	}

--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -187,6 +187,7 @@ static void radeon_crtc_init(struct drm_device *dev, int index)

 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
+	rdev->mode_info.crtcs[index] = radeon_crtc;

 	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
 	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
@@ -491,7 +492,11 @@ void radeon_compute_pll(struct radeon_pll *pll,
 					tmp += (uint64_t)pll->reference_freq * 1000 * frac_feedback_div;
 					current_freq = radeon_div(tmp, ref_div * post_div);

-					error = abs(current_freq - freq);
+					if (flags & RADEON_PLL_PREFER_CLOSEST_LOWER) {
+						error = freq - current_freq;
+						error = error < 0 ? 0xffffffff : error;
+					} else
+						error = abs(current_freq - freq);
 					vco_diff = abs(vco - best_vco);

 					if ((best_vco == 0 && error < best_error) ||
@@ -657,36 +662,51 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 	}
 }

-void radeon_init_disp_bandwidth(struct drm_device *dev)
+bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
 {
-	struct radeon_device *rdev = dev->dev_private;
-	struct drm_display_mode *modes[2];
-	int pixel_bytes[2];
-	struct drm_crtc *crtc;
-
-	pixel_bytes[0] = pixel_bytes[1] = 0;
-	modes[0] = modes[1] = NULL;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_encoder *radeon_encoder;
+	bool first = true;

-		if (crtc->enabled && crtc->fb) {
-			modes[radeon_crtc->crtc_id] = &crtc->mode;
-			pixel_bytes[radeon_crtc->crtc_id] = crtc->fb->bits_per_pixel / 8;
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (encoder->crtc != crtc)
+			continue;
+		if (first) {
+			radeon_crtc->rmx_type = radeon_encoder->rmx_type;
+			radeon_crtc->devices = radeon_encoder->devices;
+			memcpy(&radeon_crtc->native_mode,
+				&radeon_encoder->native_mode,
+				sizeof(struct radeon_native_mode));
+			first = false;
+		} else {
+			if (radeon_crtc->rmx_type != radeon_encoder->rmx_type) {
+				/* WARNING: Right now this can't happen but
+				 * in the future we need to check that scaling
+				 * are consistent accross different encoder
+				 * (ie all encoder can work with the same
+				 *  scaling).
+				 */
+				DRM_ERROR("Scaling not consistent accross encoder.\n");
+				return false;
+			}
 		}
 	}
-
-	if (ASIC_IS_AVIVO(rdev)) {
-		radeon_init_disp_bw_avivo(dev,
-					  modes[0],
-					  pixel_bytes[0],
-					  modes[1],
-					  pixel_bytes[1]);
+	if (radeon_crtc->rmx_type != RMX_OFF) {
+		fixed20_12 a, b;
+		a.full = rfixed_const(crtc->mode.vdisplay);
+		b.full = rfixed_const(radeon_crtc->native_mode.panel_xres);
+		radeon_crtc->vsc.full = rfixed_div(a, b);
+		a.full = rfixed_const(crtc->mode.hdisplay);
+		b.full = rfixed_const(radeon_crtc->native_mode.panel_yres);
+		radeon_crtc->hsc.full = rfixed_div(a, b);
 	} else {
-		radeon_init_disp_bw_legacy(dev,
-					   modes[0],
-					   pixel_bytes[0],
-					   modes[1],
-					   pixel_bytes[1]);
+		radeon_crtc->vsc.full = rfixed_const(1);
+		radeon_crtc->hsc.full = rfixed_const(1);
 	}
+	return true;
 }
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -89,6 +89,7 @@ int radeon_agpmode = 0;
 int radeon_vram_limit = 0;
 int radeon_gart_size = 512; /* default gart size */
 int radeon_benchmarking = 0;
+int radeon_testing = 0;
 int radeon_connector_table = 0;
 #endif

@@ -117,6 +118,9 @@ module_param_named(gartsize, radeon_gart_size, int, 0600);
 MODULE_PARM_DESC(benchmark, "Run benchmark");
 module_param_named(benchmark, radeon_benchmarking, int, 0444);

+MODULE_PARM_DESC(test, "Run tests");
+module_param_named(test, radeon_testing, int, 0444);
+
 MODULE_PARM_DESC(connector_table, "Force connector table");
 module_param_named(connector_table, radeon_connector_table, int, 0444);
 #endif

--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -154,7 +154,6 @@ void radeon_rmx_mode_fixup(struct drm_encoder *encoder,

 	if (mode->hdisplay < native_mode->panel_xres ||
 	    mode->vdisplay < native_mode->panel_yres) {
-		radeon_encoder->flags |= RADEON_USE_RMX;
 		if (ASIC_IS_AVIVO(rdev)) {
 			adjusted_mode->hdisplay = native_mode->panel_xres;
 			adjusted_mode->vdisplay = native_mode->panel_yres;
@@ -197,15 +196,13 @@ void radeon_rmx_mode_fixup(struct drm_encoder *encoder,
 	}
 }

+
 static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
 {
-
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);

-	radeon_encoder->flags &= ~RADEON_USE_RMX;
-
 	drm_mode_set_crtcinfo(adjusted_mode, 0);

 	if (radeon_encoder->rmx_type != RMX_OFF)
@@ -808,234 +805,6 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action)

 }

-static void atom_rv515_force_tv_scaler(struct radeon_device *rdev)
-{
-
-	WREG32(0x659C, 0x0);
-	WREG32(0x6594, 0x705);
-	WREG32(0x65A4, 0x10001);
-	WREG32(0x65D8, 0x0);
-	WREG32(0x65B0, 0x0);
-	WREG32(0x65C0, 0x0);
-	WREG32(0x65D4, 0x0);
-	WREG32(0x6578, 0x0);
-	WREG32(0x657C, 0x841880A8);
-	WREG32(0x6578, 0x1);
-	WREG32(0x657C, 0x84208680);
-	WREG32(0x6578, 0x2);
-	WREG32(0x657C, 0xBFF880B0);
-	WREG32(0x6578, 0x100);
-	WREG32(0x657C, 0x83D88088);
-	WREG32(0x6578, 0x101);
-	WREG32(0x657C, 0x84608680);
-	WREG32(0x6578, 0x102);
-	WREG32(0x657C, 0xBFF080D0);
-	WREG32(0x6578, 0x200);
-	WREG32(0x657C, 0x83988068);
-	WREG32(0x6578, 0x201);
-	WREG32(0x657C, 0x84A08680);
-	WREG32(0x6578, 0x202);
-	WREG32(0x657C, 0xBFF080F8);
-	WREG32(0x6578, 0x300);
-	WREG32(0x657C, 0x83588058);
-	WREG32(0x6578, 0x301);
-	WREG32(0x657C, 0x84E08660);
-	WREG32(0x6578, 0x302);
-	WREG32(0x657C, 0xBFF88120);
-	WREG32(0x6578, 0x400);
-	WREG32(0x657C, 0x83188040);
-	WREG32(0x6578, 0x401);
-	WREG32(0x657C, 0x85008660);
-	WREG32(0x6578, 0x402);
-	WREG32(0x657C, 0xBFF88150);
-	WREG32(0x6578, 0x500);
-	WREG32(0x657C, 0x82D88030);
-	WREG32(0x6578, 0x501);
-	WREG32(0x657C, 0x85408640);
-	WREG32(0x6578, 0x502);
-	WREG32(0x657C, 0xBFF88180);
-	WREG32(0x6578, 0x600);
-	WREG32(0x657C, 0x82A08018);
-	WREG32(0x6578, 0x601);
-	WREG32(0x657C, 0x85808620);
-	WREG32(0x6578, 0x602);
-	WREG32(0x657C, 0xBFF081B8);
-	WREG32(0x6578, 0x700);
-	WREG32(0x657C, 0x82608010);
-	WREG32(0x6578, 0x701);
-	WREG32(0x657C, 0x85A08600);
-	WREG32(0x6578, 0x702);
-	WREG32(0x657C, 0x800081F0);
-	WREG32(0x6578, 0x800);
-	WREG32(0x657C, 0x8228BFF8);
-	WREG32(0x6578, 0x801);
-	WREG32(0x657C, 0x85E085E0);
-	WREG32(0x6578, 0x802);
-	WREG32(0x657C, 0xBFF88228);
-	WREG32(0x6578, 0x10000);
-	WREG32(0x657C, 0x82A8BF00);
-	WREG32(0x6578, 0x10001);
-	WREG32(0x657C, 0x82A08CC0);
-	WREG32(0x6578, 0x10002);
-	WREG32(0x657C, 0x8008BEF8);
-	WREG32(0x6578, 0x10100);
-	WREG32(0x657C, 0x81F0BF28);
-	WREG32(0x6578, 0x10101);
-	WREG32(0x657C, 0x83608CA0);
-	WREG32(0x6578, 0x10102);
-	WREG32(0x657C, 0x8018BED0);
-	WREG32(0x6578, 0x10200);
-	WREG32(0x657C, 0x8148BF38);
-	WREG32(0x6578, 0x10201);
-	WREG32(0x657C, 0x84408C80);
-	WREG32(0x6578, 0x10202);
-	WREG32(0x657C, 0x8008BEB8);
-	WREG32(0x6578, 0x10300);
-	WREG32(0x657C, 0x80B0BF78);
-	WREG32(0x6578, 0x10301);
-	WREG32(0x657C, 0x85008C20);
-	WREG32(0x6578, 0x10302);
-	WREG32(0x657C, 0x8020BEA0);
-	WREG32(0x6578, 0x10400);
-	WREG32(0x657C, 0x8028BF90);
-	WREG32(0x6578, 0x10401);
-	WREG32(0x657C, 0x85E08BC0);
-	WREG32(0x6578, 0x10402);
-	WREG32(0x657C, 0x8018BE90);
-	WREG32(0x6578, 0x10500);
-	WREG32(0x657C, 0xBFB8BFB0);
-	WREG32(0x6578, 0x10501);
-	WREG32(0x657C, 0x86C08B40);
-	WREG32(0x6578, 0x10502);
-	WREG32(0x657C, 0x8010BE90);
-	WREG32(0x6578, 0x10600);
-	WREG32(0x657C, 0xBF58BFC8);
-	WREG32(0x6578, 0x10601);
-	WREG32(0x657C, 0x87A08AA0);
-	WREG32(0x6578, 0x10602);
-	WREG32(0x657C, 0x8010BE98);
-	WREG32(0x6578, 0x10700);
-	WREG32(0x657C, 0xBF10BFF0);
-	WREG32(0x6578, 0x10701);
-	WREG32(0x657C, 0x886089E0);
-	WREG32(0x6578, 0x10702);
-	WREG32(0x657C, 0x8018BEB0);
-	WREG32(0x6578, 0x10800);
-	WREG32(0x657C, 0xBED8BFE8);
-	WREG32(0x6578, 0x10801);
-	WREG32(0x657C, 0x89408940);
-	WREG32(0x6578, 0x10802);
-	WREG32(0x657C, 0xBFE8BED8);
-	WREG32(0x6578, 0x20000);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20001);
-	WREG32(0x657C, 0x90008000);
-	WREG32(0x6578, 0x20002);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20003);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20100);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20101);
-	WREG32(0x657C, 0x8FE0BF70);
-	WREG32(0x6578, 0x20102);
-	WREG32(0x657C, 0xBFE880C0);
-	WREG32(0x6578, 0x20103);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20200);
-	WREG32(0x657C, 0x8018BFF8);
-	WREG32(0x6578, 0x20201);
-	WREG32(0x657C, 0x8F80BF08);
-	WREG32(0x6578, 0x20202);
-	WREG32(0x657C, 0xBFD081A0);
-	WREG32(0x6578, 0x20203);
-	WREG32(0x657C, 0xBFF88000);
-	WREG32(0x6578, 0x20300);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20301);
-	WREG32(0x657C, 0x8EE0BEC0);
-	WREG32(0x6578, 0x20302);
-	WREG32(0x657C, 0xBFB082A0);
-	WREG32(0x6578, 0x20303);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20400);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20401);
-	WREG32(0x657C, 0x8E00BEA0);
-	WREG32(0x6578, 0x20402);
-	WREG32(0x657C, 0xBF8883C0);
-	WREG32(0x6578, 0x20403);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x20500);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20501);
-	WREG32(0x657C, 0x8D00BE90);
-	WREG32(0x6578, 0x20502);
-	WREG32(0x657C, 0xBF588500);
-	WREG32(0x6578, 0x20503);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20600);
-	WREG32(0x657C, 0x80188000);
-	WREG32(0x6578, 0x20601);
-	WREG32(0x657C, 0x8BC0BE98);
-	WREG32(0x6578, 0x20602);
-	WREG32(0x657C, 0xBF308660);
-	WREG32(0x6578, 0x20603);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20700);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20701);
-	WREG32(0x657C, 0x8A80BEB0);
-	WREG32(0x6578, 0x20702);
-	WREG32(0x657C, 0xBF0087C0);
-	WREG32(0x6578, 0x20703);
-	WREG32(0x657C, 0x80008008);
-	WREG32(0x6578, 0x20800);
-	WREG32(0x657C, 0x80108000);
-	WREG32(0x6578, 0x20801);
-	WREG32(0x657C, 0x8920BED0);
-	WREG32(0x6578, 0x20802);
-	WREG32(0x657C, 0xBED08920);
-	WREG32(0x6578, 0x20803);
-	WREG32(0x657C, 0x80008010);
-	WREG32(0x6578, 0x30000);
-	WREG32(0x657C, 0x90008000);
-	WREG32(0x6578, 0x30001);
-	WREG32(0x657C, 0x80008000);
-	WREG32(0x6578, 0x30100);
-	WREG32(0x657C, 0x8FE0BF90);
-	WREG32(0x6578, 0x30101);
-	WREG32(0x657C, 0xBFF880A0);
-	WREG32(0x6578, 0x30200);
-	WREG32(0x657C, 0x8F60BF40);
-	WREG32(0x6578, 0x30201);
-	WREG32(0x657C, 0xBFE88180);
-	WREG32(0x6578, 0x30300);
-	WREG32(0x657C, 0x8EC0BF00);
-	WREG32(0x6578, 0x30301);
-	WREG32(0x657C, 0xBFC88280);
-	WREG32(0x6578, 0x30400);
-	WREG32(0x657C, 0x8DE0BEE0);
-	WREG32(0x6578, 0x30401);
-	WREG32(0x657C, 0xBFA083A0);
-	WREG32(0x6578, 0x30500);
-	WREG32(0x657C, 0x8CE0BED0);
-	WREG32(0x6578, 0x30501);
-	WREG32(0x657C, 0xBF7884E0);
-	WREG32(0x6578, 0x30600);
-	WREG32(0x657C, 0x8BA0BED8);
-	WREG32(0x6578, 0x30601);
-	WREG32(0x657C, 0xBF508640);
-	WREG32(0x6578, 0x30700);
-	WREG32(0x657C, 0x8A60BEE8);
-	WREG32(0x6578, 0x30701);
-	WREG32(0x657C, 0xBF2087A0);
-	WREG32(0x6578, 0x30800);
-	WREG32(0x657C, 0x8900BF00);
-	WREG32(0x6578, 0x30801);
-	WREG32(0x657C, 0xBF008900);
-}
-
 static void
 atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
 {
@@ -1073,129 +842,6 @@ atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
 	WREG32(reg, temp);
 }

-static void
-atombios_overscan_setup(struct drm_encoder *encoder,
-			struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
-	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
-
-	memset(&args, 0, sizeof(args));
-
-	args.usOverscanRight = 0;
-	args.usOverscanLeft = 0;
-	args.usOverscanBottom = 0;
-	args.usOverscanTop = 0;
-	args.ucCRTC = radeon_crtc->crtc_id;
-
-	if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type == RMX_FULL) {
-			args.usOverscanRight = 0;
-			args.usOverscanLeft = 0;
-			args.usOverscanBottom = 0;
-			args.usOverscanTop = 0;
-		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
-			args.usOverscanTop = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
-			args.usOverscanBottom = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
-			args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
-			args.usOverscanRight = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
-		} else if (radeon_encoder->rmx_type == RMX_ASPECT) {
-			int a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
-			int a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
-
-			if (a1 > a2) {
-				args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
-				args.usOverscanRight = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
-			} else if (a2 > a1) {
-				args.usOverscanLeft = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
-				args.usOverscanRight = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
-			}
-		}
-	}
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-}
-
-static void
-atombios_scaler_setup(struct drm_encoder *encoder)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
-	ENABLE_SCALER_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
-	/* fixme - fill in enc_priv for atom dac */
-	enum radeon_tv_std tv_std = TV_STD_NTSC;
-
-	if (!ASIC_IS_AVIVO(rdev) && radeon_crtc->crtc_id)
-		return;
-
-	memset(&args, 0, sizeof(args));
-
-	args.ucScaler = radeon_crtc->crtc_id;
-
-	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
-		switch (tv_std) {
-		case TV_STD_NTSC:
-		default:
-			args.ucTVStandard = ATOM_TV_NTSC;
-			break;
-		case TV_STD_PAL:
-			args.ucTVStandard = ATOM_TV_PAL;
-			break;
-		case TV_STD_PAL_M:
-			args.ucTVStandard = ATOM_TV_PALM;
-			break;
-		case TV_STD_PAL_60:
-			args.ucTVStandard = ATOM_TV_PAL60;
-			break;
-		case TV_STD_NTSC_J:
-			args.ucTVStandard = ATOM_TV_NTSCJ;
-			break;
-		case TV_STD_SCART_PAL:
-			args.ucTVStandard = ATOM_TV_PAL; /* ??? */
-			break;
-		case TV_STD_SECAM:
-			args.ucTVStandard = ATOM_TV_SECAM;
-			break;
-		case TV_STD_PAL_CN:
-			args.ucTVStandard = ATOM_TV_PALCN;
-			break;
-		}
-		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
-	} else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT)) {
-		args.ucTVStandard = ATOM_TV_CV;
-		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
-	} else if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type == RMX_FULL)
-			args.ucEnable = ATOM_SCALER_EXPANSION;
-		else if (radeon_encoder->rmx_type == RMX_CENTER)
-			args.ucEnable = ATOM_SCALER_CENTER;
-		else if (radeon_encoder->rmx_type == RMX_ASPECT)
-			args.ucEnable = ATOM_SCALER_EXPANSION;
-	} else {
-		if (ASIC_IS_AVIVO(rdev))
-			args.ucEnable = ATOM_SCALER_DISABLE;
-		else
-			args.ucEnable = ATOM_SCALER_CENTER;
-	}
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-
-	if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)
-	    && rdev->family >= CHIP_RV515 && rdev->family <= CHIP_RV570) {
-		atom_rv515_force_tv_scaler(rdev);
-	}
-
-}
-
 static void
 radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
@@ -1448,8 +1094,6 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 	radeon_encoder->pixel_clock = adjusted_mode->clock;

 	radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
-	atombios_overscan_setup(encoder, mode, adjusted_mode);
-	atombios_scaler_setup(encoder);
 	atombios_set_encoder_crtc_source(encoder);

 	if (ASIC_IS_AVIVO(rdev)) {
@@ -1667,6 +1311,7 @@ radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t su

 	radeon_encoder->encoder_id = encoder_id;
 	radeon_encoder->devices = supported_device;
+	radeon_encoder->rmx_type = RMX_OFF;

 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:

--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -101,9 +101,10 @@ static int radeonfb_setcolreg(unsigned regno,
 				break;
 			case 24:
 			case 32:
-				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
-					(green & 0xff00) |
-					((blue  & 0xff00) >> 8);
+				fb->pseudo_palette[regno] =
+					(((red >> 8) & 0xff) << info->var.red.offset) |
+					(((green >> 8) & 0xff) << info->var.green.offset) |
+					(((blue >> 8) & 0xff) << info->var.blue.offset);
 				break;
 			}
 		}
@@ -154,6 +155,7 @@ static int radeonfb_check_var(struct fb_var_screeninfo *var,
 		var->transp.length = 0;
 		var->transp.offset = 0;
 		break;
+#ifdef __LITTLE_ENDIAN
 	case 15:
 		var->red.offset = 10;
 		var->green.offset = 5;
@@ -194,6 +196,28 @@ static int radeonfb_check_var(struct fb_var_screeninfo *var,
 		var->transp.length = 8;
 		var->transp.offset = 24;
 		break;
+#else
+	case 24:
+		var->red.offset = 8;
+		var->green.offset = 16;
+		var->blue.offset = 24;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 8;
+		var->green.offset = 16;
+		var->blue.offset = 24;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 0;
+		break;
+#endif
 	default:
 		return -EINVAL;
 	}
@@ -447,10 +471,10 @@ static struct notifier_block paniced = {
 	.notifier_call = radeonfb_panic,
 };

-static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp)
+static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
-	int align_large = (ASIC_IS_AVIVO(rdev));
+	int align_large = (ASIC_IS_AVIVO(rdev)) || tiled;
 	int pitch_mask = 0;

 	switch (bpp / 8) {
@@ -488,12 +512,13 @@ int radeonfb_create(struct radeon_device *rdev,
 	u64 fb_gpuaddr;
 	void *fbptr = NULL;
 	unsigned long tmp;
+	bool fb_tiled = false; /* useful for testing */

 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 	mode_cmd.bpp = 32;
 	/* need to align pitch with crtc limits */
-	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
 	mode_cmd.depth = 24;

 	size = mode_cmd.pitch * mode_cmd.height;
@@ -511,6 +536,8 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	robj = gobj->driver_private;

+	if (fb_tiled)
+		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -539,6 +566,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	rfbdev = info->par;

+	if (fb_tiled)
+		radeon_object_check_tiling(robj, 0, 0);
+
 	ret = radeon_object_kmap(robj, &fbptr);
 	if (ret) {
 		goto out_unref;
@@ -572,6 +602,11 @@ int radeonfb_create(struct radeon_device *rdev,
 	info->var.width = -1;
 	info->var.xres = fb_width;
 	info->var.yres = fb_height;
+
+	/* setup aperture base/size for vesafb takeover */
+	info->aperture_base = rdev->ddev->mode_config.fb_base;
+	info->aperture_size = rdev->mc.real_vram_size;
+
 	info->fix.mmio_start = 0;
 	info->fix.mmio_len = 0;
 	info->pixmap.size = 64*1024;
@@ -600,6 +635,7 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 0;
 		info->var.transp.length = 0;
 		break;
+#ifdef __LITTLE_ENDIAN
 	case 15:
 		info->var.red.offset = 10;
 		info->var.green.offset = 5;
@@ -639,7 +675,29 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 24;
 		info->var.transp.length = 8;
 		break;
+#else
+	case 24:
+		info->var.red.offset = 8;
+		info->var.green.offset = 16;
+		info->var.blue.offset = 24;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 8;
+		info->var.green.offset = 16;
+		info->var.blue.offset = 24;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 8;
+		break;
 	default:
+#endif
 		break;
 	}


--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -195,7 +195,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
 		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
 				radeon_fence_signaled(fence), timeout);
 		if (unlikely(r == -ERESTARTSYS)) {
-			return -ERESTART;
+			return -EBUSY;
 		}
 	} else {
 		r = wait_event_timeout(rdev->fence_drv.queue,

--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -177,7 +177,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 			return -ENOMEM;
 		}
 		rdev->gart.pages[p] = pagelist[i];
-		page_base = (uint32_t)rdev->gart.pages_addr[p];
+		page_base = rdev->gart.pages_addr[p];
 		for (j = 0; j < (PAGE_SIZE / 4096); j++, t++) {
 			radeon_gart_set_page(rdev, t, page_base);
 			page_base += 4096;

--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -157,9 +157,9 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_info *args = data;

-	args->vram_size = rdev->mc.vram_size;
+	args->vram_size = rdev->mc.real_vram_size;
 	/* FIXME: report somethings that makes sense */
-	args->vram_visible = rdev->mc.vram_size - (4 * 1024 * 1024);
+	args->vram_visible = rdev->mc.real_vram_size - (4 * 1024 * 1024);
 	args->gart_size = rdev->mc.gtt_size;
 	return 0;
 }
@@ -285,3 +285,44 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 	return r;
 }
+
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_set_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("%d \n", args->handle);
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_set_tiling_flags(robj, args->tiling_flags, args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_get_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("\n");
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_get_tiling_flags(robj, &args->tiling_flags,
+				       &args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -291,5 +291,7 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -30,170 +30,6 @@
 #include "atom.h"


-static void radeon_legacy_rmx_mode_set(struct drm_encoder *encoder,
-				       struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	int    xres = mode->hdisplay;
-	int    yres = mode->vdisplay;
-	bool   hscale = true, vscale = true;
-	int    hsync_wid;
-	int    vsync_wid;
-	int    hsync_start;
-	uint32_t scale, inc;
-	uint32_t fp_horz_stretch, fp_vert_stretch, crtc_more_cntl, fp_horz_vert_active;
-	uint32_t fp_h_sync_strt_wid, fp_v_sync_strt_wid, fp_crtc_h_total_disp, fp_crtc_v_total_disp;
-	struct radeon_native_mode *native_mode = &radeon_encoder->native_mode;
-
-	DRM_DEBUG("\n");
-
-	fp_vert_stretch = RREG32(RADEON_FP_VERT_STRETCH) &
-		(RADEON_VERT_STRETCH_RESERVED |
-		 RADEON_VERT_AUTO_RATIO_INC);
-	fp_horz_stretch = RREG32(RADEON_FP_HORZ_STRETCH) &
-		(RADEON_HORZ_FP_LOOP_STRETCH |
-		 RADEON_HORZ_AUTO_RATIO_INC);
-
-	crtc_more_cntl = 0;
-	if ((rdev->family == CHIP_RS100) ||
-	    (rdev->family == CHIP_RS200)) {
-		/* This is to workaround the asic bug for RMX, some versions
-		   of BIOS dosen't have this register initialized correctly. */
-		crtc_more_cntl |= RADEON_CRTC_H_CUTOFF_ACTIVE_EN;
-	}
-
-
-	fp_crtc_h_total_disp = ((((mode->crtc_htotal / 8) - 1) & 0x3ff)
-				| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
-
-	hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
-	if (!hsync_wid)
-		hsync_wid = 1;
-	hsync_start = mode->crtc_hsync_start - 8;
-
-	fp_h_sync_strt_wid = ((hsync_start & 0x1fff)
-			      | ((hsync_wid & 0x3f) << 16)
-			      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
-				 ? RADEON_CRTC_H_SYNC_POL
-				 : 0));
-
-	fp_crtc_v_total_disp = (((mode->crtc_vtotal - 1) & 0xffff)
-				| ((mode->crtc_vdisplay - 1) << 16));
-
-	vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
-	if (!vsync_wid)
-		vsync_wid = 1;
-
-	fp_v_sync_strt_wid = (((mode->crtc_vsync_start - 1) & 0xfff)
-			      | ((vsync_wid & 0x1f) << 16)
-			      | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
-				 ? RADEON_CRTC_V_SYNC_POL
-				 : 0));
-
-	fp_horz_vert_active = 0;
-
-	if (native_mode->panel_xres == 0 ||
-	    native_mode->panel_yres == 0) {
-		hscale = false;
-		vscale = false;
-	} else {
-		if (xres > native_mode->panel_xres)
-			xres = native_mode->panel_xres;
-		if (yres > native_mode->panel_yres)
-			yres = native_mode->panel_yres;
-
-		if (xres == native_mode->panel_xres)
-			hscale = false;
-		if (yres == native_mode->panel_yres)
-			vscale = false;
-	}
-
-	if (radeon_encoder->flags & RADEON_USE_RMX) {
-		if (radeon_encoder->rmx_type != RMX_CENTER) {
-			if (!hscale)
-				fp_horz_stretch |= ((xres/8-1) << 16);
-			else {
-				inc = (fp_horz_stretch & RADEON_HORZ_AUTO_RATIO_INC) ? 1 : 0;
-				scale = ((xres + inc) * RADEON_HORZ_STRETCH_RATIO_MAX)
-					/ native_mode->panel_xres + 1;
-				fp_horz_stretch |= (((scale) & RADEON_HORZ_STRETCH_RATIO_MASK) |
-						    RADEON_HORZ_STRETCH_BLEND |
-						    RADEON_HORZ_STRETCH_ENABLE |
-						    ((native_mode->panel_xres/8-1) << 16));
-			}
-
-			if (!vscale)
-				fp_vert_stretch |= ((yres-1) << 12);
-			else {
-				inc = (fp_vert_stretch & RADEON_VERT_AUTO_RATIO_INC) ? 1 : 0;
-				scale = ((yres + inc) * RADEON_VERT_STRETCH_RATIO_MAX)
-					/ native_mode->panel_yres + 1;
-				fp_vert_stretch |= (((scale) & RADEON_VERT_STRETCH_RATIO_MASK) |
-						    RADEON_VERT_STRETCH_ENABLE |
-						    RADEON_VERT_STRETCH_BLEND |
-						    ((native_mode->panel_yres-1) << 12));
-			}
-		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
-			int    blank_width;
-
-			fp_horz_stretch |= ((xres/8-1) << 16);
-			fp_vert_stretch |= ((yres-1) << 12);
-
-			crtc_more_cntl |= (RADEON_CRTC_AUTO_HORZ_CENTER_EN |
-					   RADEON_CRTC_AUTO_VERT_CENTER_EN);
-
-			blank_width = (mode->crtc_hblank_end - mode->crtc_hblank_start) / 8;
-			if (blank_width > 110)
-				blank_width = 110;
-
-			fp_crtc_h_total_disp = (((blank_width) & 0x3ff)
-						| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
-
-			hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
-			if (!hsync_wid)
-				hsync_wid = 1;
-
-			fp_h_sync_strt_wid = ((((mode->crtc_hsync_start - mode->crtc_hblank_start) / 8) & 0x1fff)
-					      | ((hsync_wid & 0x3f) << 16)
-					      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
-						 ? RADEON_CRTC_H_SYNC_POL
-						 : 0));
-
-			fp_crtc_v_total_disp = (((mode->crtc_vblank_end - mode->crtc_vblank_start) & 0xffff)
-						| ((mode->crtc_vdisplay - 1) << 16));
-
-			vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
-			if (!vsync_wid)
-				vsync_wid = 1;
-
-			fp_v_sync_strt_wid = ((((mode->crtc_vsync_start - mode->crtc_vblank_start) & 0xfff)
-					       | ((vsync_wid & 0x1f) << 16)
-					       | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
-						  ? RADEON_CRTC_V_SYNC_POL
-						  : 0)));
-
-			fp_horz_vert_active = (((native_mode->panel_yres) & 0xfff) |
-					       (((native_mode->panel_xres / 8) & 0x1ff) << 16));
-		}
-	} else {
-		fp_horz_stretch |= ((xres/8-1) << 16);
-		fp_vert_stretch |= ((yres-1) << 12);
-	}
-
-	WREG32(RADEON_FP_HORZ_STRETCH,      fp_horz_stretch);
-	WREG32(RADEON_FP_VERT_STRETCH,      fp_vert_stretch);
-	WREG32(RADEON_CRTC_MORE_CNTL,       crtc_more_cntl);
-	WREG32(RADEON_FP_HORZ_VERT_ACTIVE,  fp_horz_vert_active);
-	WREG32(RADEON_FP_H_SYNC_STRT_WID,   fp_h_sync_strt_wid);
-	WREG32(RADEON_FP_V_SYNC_STRT_WID,   fp_v_sync_strt_wid);
-	WREG32(RADEON_FP_CRTC_H_TOTAL_DISP, fp_crtc_h_total_disp);
-	WREG32(RADEON_FP_CRTC_V_TOTAL_DISP, fp_crtc_v_total_disp);
-
-}
-
 static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
@@ -287,9 +123,6 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,

 	DRM_DEBUG("\n");

-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	lvds_pll_cntl = RREG32(RADEON_LVDS_PLL_CNTL);
 	lvds_pll_cntl &= ~RADEON_LVDS_PLL_EN;

@@ -318,7 +151,7 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,

 	if (radeon_crtc->crtc_id == 0) {
 		if (ASIC_IS_R300(rdev)) {
-			if (radeon_encoder->flags & RADEON_USE_RMX)
+			if (radeon_encoder->rmx_type != RMX_OFF)
 				lvds_pll_cntl |= R300_LVDS_SRC_SEL_RMX;
 		} else
 			lvds_gen_cntl &= ~RADEON_LVDS_SEL_CRTC2;
@@ -350,8 +183,6 @@ static bool radeon_legacy_lvds_mode_fixup(struct drm_encoder *encoder,

 	drm_mode_set_crtcinfo(adjusted_mode, 0);

-	radeon_encoder->flags &= ~RADEON_USE_RMX;
-
 	if (radeon_encoder->rmx_type != RMX_OFF)
 		radeon_rmx_mode_fixup(encoder, mode, adjusted_mode);

@@ -455,9 +286,6 @@ static void radeon_legacy_primary_dac_mode_set(struct drm_encoder *encoder,

 	DRM_DEBUG("\n");

-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (radeon_crtc->crtc_id == 0) {
 		if (rdev->family == CHIP_R200 || ASIC_IS_R300(rdev)) {
 			disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL) &
@@ -653,9 +481,6 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,

 	DRM_DEBUG("\n");

-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	tmp = tmds_pll_cntl = RREG32(RADEON_TMDS_PLL_CNTL);
 	tmp &= 0xfffff;
 	if (rdev->family == CHIP_RV280) {
@@ -711,7 +536,7 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
    if (radeon_crtc->crtc_id == 0) {
 	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
 		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    if (radeon_encoder->flags & RADEON_USE_RMX)
+		    if (radeon_encoder->rmx_type != RMX_OFF)
 			    fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
 		    else
 			    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
@@ -820,9 +645,6 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,

 	DRM_DEBUG("\n");

-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (rdev->is_atom_bios) {
 		radeon_encoder->pixel_clock = adjusted_mode->clock;
 		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
@@ -856,7 +678,7 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,
 	if (radeon_crtc->crtc_id == 0) {
 		if ((rdev->family == CHIP_R200) || ASIC_IS_R300(rdev)) {
 			fp2_gen_cntl &= ~R200_FP2_SOURCE_SEL_MASK;
-			if (radeon_encoder->flags & RADEON_USE_RMX)
+			if (radeon_encoder->rmx_type != RMX_OFF)
 				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_RMX;
 			else
 				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC1;
@@ -1014,9 +836,6 @@ static void radeon_legacy_tv_dac_mode_set(struct drm_encoder *encoder,

 	DRM_DEBUG("\n");

-	if (radeon_crtc->crtc_id == 0)
-		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
-
 	if (rdev->family != CHIP_R200) {
 		tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
 		if (rdev->family == CHIP_R420 ||
@@ -1243,6 +1062,7 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t

 	radeon_encoder->encoder_id = encoder_id;
 	radeon_encoder->devices = supported_device;
+	radeon_encoder->rmx_type = RMX_OFF;

 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:

--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -36,6 +36,9 @@
 #include <linux/i2c.h>
 #include <linux/i2c-id.h>
 #include <linux/i2c-algo-bit.h>
+#include "radeon_fixed.h"
+
+struct radeon_device;

 #define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base)
 #define to_radeon_connector(x) container_of(x, struct radeon_connector, base)
@@ -124,6 +127,7 @@ struct radeon_tmds_pll {
 #define RADEON_PLL_PREFER_LOW_POST_DIV  (1 << 8)
 #define RADEON_PLL_PREFER_HIGH_POST_DIV (1 << 9)
 #define RADEON_PLL_USE_FRAC_FB_DIV      (1 << 10)
+#define RADEON_PLL_PREFER_CLOSEST_LOWER (1 << 11)

 struct radeon_pll {
 	uint16_t reference_freq;
@@ -170,6 +174,18 @@ struct radeon_mode_info {
 	struct atom_context *atom_context;
 	enum radeon_connector_table connector_table;
 	bool mode_config_initialized;
+	struct radeon_crtc *crtcs[2];
+};
+
+struct radeon_native_mode {
+	/* preferred mode */
+	uint32_t panel_xres, panel_yres;
+	uint32_t hoverplus, hsync_width;
+	uint32_t hblank;
+	uint32_t voverplus, vsync_width;
+	uint32_t vblank;
+	uint32_t dotclock;
+	uint32_t flags;
 };

 struct radeon_crtc {
@@ -185,19 +201,13 @@ struct radeon_crtc {
 	uint64_t cursor_addr;
 	int cursor_width;
 	int cursor_height;
-};
-
-#define RADEON_USE_RMX 1
-
-struct radeon_native_mode {
-	/* preferred mode */
-	uint32_t panel_xres, panel_yres;
-	uint32_t hoverplus, hsync_width;
-	uint32_t hblank;
-	uint32_t voverplus, vsync_width;
-	uint32_t vblank;
-	uint32_t dotclock;
-	uint32_t flags;
+	uint32_t legacy_display_base_addr;
+	uint32_t legacy_cursor_offset;
+	enum radeon_rmx_type rmx_type;
+	uint32_t devices;
+	fixed20_12 vsc;
+	fixed20_12 hsc;
+	struct radeon_native_mode native_mode;
 };

 struct radeon_encoder_primary_dac {
@@ -383,16 +393,9 @@ void radeon_enc_destroy(struct drm_encoder *encoder);
 void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
 void radeon_combios_asic_init(struct drm_device *dev);
 extern int radeon_static_clocks_init(struct drm_device *dev);
-void radeon_init_disp_bw_legacy(struct drm_device *dev,
-				struct drm_display_mode *mode1,
-				uint32_t pixel_bytes1,
-				struct drm_display_mode *mode2,
-				uint32_t pixel_bytes2);
-void radeon_init_disp_bw_avivo(struct drm_device *dev,
-			       struct drm_display_mode *mode1,
-			       uint32_t pixel_bytes1,
-			       struct drm_display_mode *mode2,
-			       uint32_t pixel_bytes2);
-void radeon_init_disp_bandwidth(struct drm_device *dev);
+bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode);
+void atom_rv515_force_tv_scaler(struct radeon_device *rdev);

 #endif
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -44,6 +44,9 @@ struct radeon_object {
 	uint64_t			gpu_addr;
 	void				*kptr;
 	bool				is_iomem;
+	uint32_t			tiling_flags;
+	uint32_t			pitch;
+	int				surface_reg;
 };

 int radeon_ttm_init(struct radeon_device *rdev);
@@ -70,6 +73,7 @@ static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj)

 	robj = container_of(tobj, struct radeon_object, tobj);
 	list_del_init(&robj->list);
+	radeon_object_clear_surface_reg(robj);
 	kfree(robj);
 }

@@ -99,16 +103,16 @@ static inline uint32_t radeon_object_flags_from_domain(uint32_t domain)
 {
 	uint32_t flags = 0;
 	if (domain & RADEON_GEM_DOMAIN_VRAM) {
-		flags |= TTM_PL_FLAG_VRAM;
+		flags |= TTM_PL_FLAG_VRAM | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
 	}
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
-		flags |= TTM_PL_FLAG_TT;
+		flags |= TTM_PL_FLAG_TT | TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED;
 	}
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
-		flags |= TTM_PL_FLAG_SYSTEM;
+		flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING;
 	}
 	if (!flags) {
-		flags |= TTM_PL_FLAG_SYSTEM;
+		flags |= TTM_PL_FLAG_SYSTEM | TTM_PL_MASK_CACHING;
 	}
 	return flags;
 }
@@ -141,6 +145,7 @@ int radeon_object_create(struct radeon_device *rdev,
 	}
 	robj->rdev = rdev;
 	robj->gobj = gobj;
+	robj->surface_reg = -1;
 	INIT_LIST_HEAD(&robj->list);

 	flags = radeon_object_flags_from_domain(domain);
@@ -304,7 +309,7 @@ int radeon_object_wait(struct radeon_object *robj)
 	}
 	spin_lock(&robj->tobj.lock);
 	if (robj->tobj.sync_obj) {
-		r = ttm_bo_wait(&robj->tobj, true, false, false);
+		r = ttm_bo_wait(&robj->tobj, true, true, false);
 	}
 	spin_unlock(&robj->tobj.lock);
 	radeon_object_unreserve(robj);
@@ -403,7 +408,6 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 	struct radeon_object *robj;
 	struct radeon_fence *old_fence = NULL;
 	struct list_head *i;
-	uint32_t flags;
 	int r;

 	r = radeon_object_list_reserve(head);
@@ -414,27 +418,25 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 	list_for_each(i, head) {
 		lobj = list_entry(i, struct radeon_object_list, list);
 		robj = lobj->robj;
-		if (lobj->wdomain) {
-			flags = radeon_object_flags_from_domain(lobj->wdomain);
-			flags |= TTM_PL_FLAG_TT;
-		} else {
-			flags = radeon_object_flags_from_domain(lobj->rdomain);
-			flags |= TTM_PL_FLAG_TT;
-			flags |= TTM_PL_FLAG_VRAM;
-		}
 		if (!robj->pin_count) {
-			robj->tobj.proposed_placement = flags | TTM_PL_MASK_CACHING;
+			if (lobj->wdomain) {
+				robj->tobj.proposed_placement =
+					radeon_object_flags_from_domain(lobj->wdomain);
+			} else {
+				robj->tobj.proposed_placement =
+					radeon_object_flags_from_domain(lobj->rdomain);
+			}
 			r = ttm_buffer_object_validate(&robj->tobj,
 						       robj->tobj.proposed_placement,
 						       true, false);
 			if (unlikely(r)) {
-				radeon_object_list_unreserve(head);
 				DRM_ERROR("radeon: failed to validate.\n");
 				return r;
 			}
 			radeon_object_gpu_addr(robj);
 		}
 		lobj->gpu_offset = robj->gpu_addr;
+		lobj->tiling_flags = robj->tiling_flags;
 		if (fence) {
 			old_fence = (struct radeon_fence *)robj->tobj.sync_obj;
 			robj->tobj.sync_obj = radeon_fence_ref(fence);
@@ -479,3 +481,127 @@ unsigned long radeon_object_size(struct radeon_object *robj)
 {
 	return robj->tobj.num_pages << PAGE_SHIFT;
 }
+
+int radeon_object_get_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+	struct radeon_object *old_object;
+	int steal;
+	int i;
+
+	if (!robj->tiling_flags)
+		return 0;
+
+	if (robj->surface_reg >= 0) {
+		reg = &rdev->surface_regs[robj->surface_reg];
+		i = robj->surface_reg;
+		goto out;
+	}
+
+	steal = -1;
+	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
+
+		reg = &rdev->surface_regs[i];
+		if (!reg->robj)
+			break;
+
+		old_object = reg->robj;
+		if (old_object->pin_count == 0)
+			steal = i;
+	}
+
+	/* if we are all out */
+	if (i == RADEON_GEM_MAX_SURFACES) {
+		if (steal == -1)
+			return -ENOMEM;
+		/* find someone with a surface reg and nuke their BO */
+		reg = &rdev->surface_regs[steal];
+		old_object = reg->robj;
+		/* blow away the mapping */
+		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
+		ttm_bo_unmap_virtual(&old_object->tobj);
+		old_object->surface_reg = -1;
+		i = steal;
+	}
+
+	robj->surface_reg = i;
+	reg->robj = robj;
+
+out:
+	radeon_set_surface_reg(rdev, i, robj->tiling_flags, robj->pitch,
+			       robj->tobj.mem.mm_node->start << PAGE_SHIFT,
+			       robj->tobj.num_pages << PAGE_SHIFT);
+	return 0;
+}
+
+void radeon_object_clear_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+
+	if (robj->surface_reg == -1)
+		return;
+
+	reg = &rdev->surface_regs[robj->surface_reg];
+	radeon_clear_surface_reg(rdev, robj->surface_reg);
+
+	reg->robj = NULL;
+	robj->surface_reg = -1;
+}
+
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch)
+{
+	robj->tiling_flags = tiling_flags;
+	robj->pitch = pitch;
+}
+
+void radeon_object_get_tiling_flags(struct radeon_object *robj,
+				    uint32_t *tiling_flags,
+				    uint32_t *pitch)
+{
+	if (tiling_flags)
+		*tiling_flags = robj->tiling_flags;
+	if (pitch)
+		*pitch = robj->pitch;
+}
+
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop)
+{
+	if (!(robj->tiling_flags & RADEON_TILING_SURFACE))
+		return 0;
+
+	if (force_drop) {
+		radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if (robj->tobj.mem.mem_type != TTM_PL_VRAM) {
+		if (!has_moved)
+			return 0;
+
+		if (robj->surface_reg >= 0)
+			radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if ((robj->surface_reg >= 0) && !has_moved)
+		return 0;
+
+	return radeon_object_get_surface_reg(robj);
+}
+
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			  struct ttm_mem_reg *mem)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 1);
+}
+
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 0);
+}
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -126,32 +126,19 @@ static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
 	}
 }

-static void radeon_ib_cpu_flush(struct radeon_device *rdev,
-				struct radeon_ib *ib)
-{
-	unsigned long tmp;
-	unsigned i;
-
-	/* To force CPU cache flush ugly but seems reliable */
-	for (i = 0; i < ib->length_dw; i += (rdev->cp.align_mask + 1)) {
-		tmp = readl(&ib->ptr[i]);
-	}
-}
-
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	int r = 0;

 	mutex_lock(&rdev->ib_pool.mutex);
 	radeon_ib_align(rdev, ib);
-	radeon_ib_cpu_flush(rdev, ib);
 	if (!ib->length_dw || !rdev->cp.ready) {
 		/* TODO: Nothings in the ib we should report. */
 		mutex_unlock(&rdev->ib_pool.mutex);
 		DRM_ERROR("radeon: couldn't schedule IB(%lu).\n", ib->idx);
 		return -EINVAL;
 	}
-	/* 64 dwords should be enought for fence too */
+	/* 64 dwords should be enough for fence too */
 	r = radeon_ring_lock(rdev, 64);
 	if (r) {
 		DRM_ERROR("radeon: scheduling IB failled (%d).\n", r);

--- a/drivers/gpu/drm/radeon/radeon_share.h
+++ b/drivers/gpu/drm/radeon/radeon_share.h
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __RADEON_SHARE_H__
+#define __RADEON_SHARE_H__
+
+void r100_vram_init_sizes(struct radeon_device *rdev);
+
+void rs690_line_buffer_adjust(struct radeon_device *rdev,
+			      struct drm_display_mode *mode1,
+			      struct drm_display_mode *mode2);
+
+void rv515_bandwidth_avivo_update(struct radeon_device *rdev);
+
+#endif
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
+/*
+ * Copyright 2009 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Michel Dänzer
+ */
+#include <drm/drmP.h>
+#include <drm/radeon_drm.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+
+
+/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
+void radeon_test_moves(struct radeon_device *rdev)
+{
+	struct radeon_object *vram_obj = NULL;
+	struct radeon_object **gtt_obj = NULL;
+	struct radeon_fence *fence = NULL;
+	uint64_t gtt_addr, vram_addr;
+	unsigned i, n, size;
+	int r;
+
+	size = 1024 * 1024;
+
+	/* Number of tests =
+	 * (Total GTT - IB pool - writeback page - ring buffer) / test size
+	 */
+	n = (rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - 4096 -
+	     rdev->cp.ring_size) / size;
+
+	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
+	if (!gtt_obj) {
+		DRM_ERROR("Failed to allocate %d pointers\n", n);
+		r = 1;
+		goto out_cleanup;
+	}
+
+	r = radeon_object_create(rdev, NULL, size, true, RADEON_GEM_DOMAIN_VRAM,
+				 false, &vram_obj);
+	if (r) {
+		DRM_ERROR("Failed to create VRAM object\n");
+		goto out_cleanup;
+	}
+
+	r = radeon_object_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr);
+	if (r) {
+		DRM_ERROR("Failed to pin VRAM object\n");
+		goto out_cleanup;
+	}
+
+	for (i = 0; i < n; i++) {
+		void *gtt_map, *vram_map;
+		void **gtt_start, **gtt_end;
+		void **vram_start, **vram_end;
+
+		r = radeon_object_create(rdev, NULL, size, true,
+					 RADEON_GEM_DOMAIN_GTT, false, gtt_obj + i);
+		if (r) {
+			DRM_ERROR("Failed to create GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_object_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, &gtt_addr);
+		if (r) {
+			DRM_ERROR("Failed to pin GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_object_kmap(gtt_obj[i], &gtt_map);
+		if (r) {
+			DRM_ERROR("Failed to map GTT object %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size;
+		     gtt_start < gtt_end;
+		     gtt_start++)
+			*gtt_start = gtt_start;
+
+		radeon_object_kunmap(gtt_obj[i]);
+
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_copy(rdev, gtt_addr, vram_addr, size / 4096, fence);
+		if (r) {
+			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		radeon_fence_unref(&fence);
+
+		r = radeon_object_kmap(vram_obj, &vram_map);
+		if (r) {
+			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+		     vram_start = vram_map, vram_end = vram_map + size;
+		     vram_start < vram_end;
+		     gtt_start++, vram_start++) {
+			if (*vram_start != gtt_start) {
+				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
+					  "expected 0x%p (GTT map 0x%p-0x%p)\n",
+					  i, *vram_start, gtt_start, gtt_map,
+					  gtt_end);
+				radeon_object_kunmap(vram_obj);
+				goto out_cleanup;
+			}
+			*vram_start = vram_start;
+		}
+
+		radeon_object_kunmap(vram_obj);
+
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_copy(rdev, vram_addr, gtt_addr, size / 4096, fence);
+		if (r) {
+			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
+			goto out_cleanup;
+		}
+
+		radeon_fence_unref(&fence);
+
+		r = radeon_object_kmap(gtt_obj[i], &gtt_map);
+		if (r) {
+			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
+			goto out_cleanup;
+		}
+
+		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
+		     vram_start = vram_map, vram_end = vram_map + size;
+		     gtt_start < gtt_end;
+		     gtt_start++, vram_start++) {
+			if (*gtt_start != vram_start) {
+				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
+					  "expected 0x%p (VRAM map 0x%p-0x%p)\n",
+					  i, *gtt_start, vram_start, vram_map,
+					  vram_end);
+				radeon_object_kunmap(gtt_obj[i]);
+				goto out_cleanup;
+			}
+		}
+
+		radeon_object_kunmap(gtt_obj[i]);
+
+		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
+			 gtt_addr - rdev->mc.gtt_location);
+	}
+
+out_cleanup:
+	if (vram_obj) {
+		radeon_object_unpin(vram_obj);
+		radeon_object_unref(&vram_obj);
+	}
+	if (gtt_obj) {
+		for (i = 0; i < n; i++) {
+			if (gtt_obj[i]) {
+				radeon_object_unpin(gtt_obj[i]);
+				radeon_object_unref(&gtt_obj[i]);
+			}
+		}
+		kfree(gtt_obj);
+	}
+	if (fence) {
+		radeon_fence_unref(&fence);
+	}
+	if (r) {
+		printk(KERN_WARNING "Error while testing BO move.\n");
+	}
+}
+
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -355,23 +355,26 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	if (!rdev->cp.ready) {
 		/* use memcpy */
 		DRM_ERROR("CP is not ready use memcpy.\n");
-		return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+		goto memcpy;
 	}

 	if (old_mem->mem_type == TTM_PL_VRAM &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
-		return radeon_move_vram_ram(bo, evict, interruptible,
+		r = radeon_move_vram_ram(bo, evict, interruptible,
 					    no_wait, new_mem);
 	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
 		   new_mem->mem_type == TTM_PL_VRAM) {
-		return radeon_move_ram_vram(bo, evict, interruptible,
+		r = radeon_move_ram_vram(bo, evict, interruptible,
 					    no_wait, new_mem);
 	} else {
 		r = radeon_move_blit(bo, evict, no_wait, new_mem, old_mem);
-		if (unlikely(r)) {
-			return r;
-		}
 	}
+
+	if (r) {
+memcpy:
+		r = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+	}
+
 	return r;
 }

@@ -429,6 +432,8 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.sync_obj_flush = &radeon_sync_obj_flush,
 	.sync_obj_unref = &radeon_sync_obj_unref,
 	.sync_obj_ref = &radeon_sync_obj_ref,
+	.move_notify = &radeon_bo_move_notify,
+	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 };

 int radeon_ttm_init(struct radeon_device *rdev)
@@ -442,13 +447,14 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.mem_global_ref.object,
-			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
+			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
 	}
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_VRAM, 0,
-			   ((rdev->mc.aper_size) >> PAGE_SHIFT));
+			   ((rdev->mc.real_vram_size) >> PAGE_SHIFT));
 	if (r) {
 		DRM_ERROR("Failed initializing VRAM heap.\n");
 		return r;
@@ -465,7 +471,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 		return r;
 	}
 	DRM_INFO("radeon: %uM of VRAM memory ready\n",
-		 rdev->mc.vram_size / (1024 * 1024));
+		 rdev->mc.real_vram_size / (1024 * 1024));
 	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
 			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
 	if (r) {

--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -29,6 +29,7 @@
 #include <drm/drmP.h>
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_share.h"

 /* rs400,rs480 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -164,7 +165,9 @@ int rs400_gart_enable(struct radeon_device *rdev)
 		WREG32(RADEON_BUS_CNTL, tmp);
 	}
 	/* Table should be in 32bits address space so ignore bits above. */
-	tmp = rdev->gart.table_addr & 0xfffff000;
+	tmp = (u32)rdev->gart.table_addr & 0xfffff000;
+	tmp |= (upper_32_bits(rdev->gart.table_addr) & 0xff) << 4;
+
 	WREG32_MC(RS480_GART_BASE, tmp);
 	/* TODO: more tweaking here */
 	WREG32_MC(RS480_GART_FEATURE_ID,
@@ -201,10 +204,17 @@ void rs400_gart_disable(struct radeon_device *rdev)

 int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 {
+	uint32_t entry;
+
 	if (i < 0 || i > rdev->gart.num_gpu_pages) {
 		return -EINVAL;
 	}
-	rdev->gart.table.ram.ptr[i] = cpu_to_le32(((uint32_t)addr) | 0xC);
+
+	entry = (lower_32_bits(addr) & PAGE_MASK) |
+		((upper_32_bits(addr) & 0xff) << 4) |
+		0xc;
+	entry = cpu_to_le32(entry);
+	rdev->gart.table.ram.ptr[i] = entry;
 	return 0;
 }

@@ -223,10 +233,9 @@ int rs400_mc_init(struct radeon_device *rdev)

 	rs400_gpu_init(rdev);
 	rs400_gart_disable(rdev);
-	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
 	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
-	rdev->mc.vram_location = 0xFFFFFFFFUL;
 	r = radeon_mc_setup(rdev);
 	if (r) {
 		return r;
@@ -238,7 +247,7 @@ int rs400_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}

-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32(RADEON_MC_FB_LOCATION, tmp);
@@ -284,21 +293,12 @@ void rs400_gpu_init(struct radeon_device *rdev)
 */
 void rs400_vram_info(struct radeon_device *rdev)
 {
-	uint32_t tom;
-
 	rs400_gart_adjust_size(rdev);
 	/* DDR for all card after R300 & IGP */
 	rdev->mc.vram_is_ddr = true;
 	rdev->mc.vram_width = 128;

-	/* read NB_TOM to get the amount of ram stolen for the GPU */
-	tom = RREG32(RADEON_NB_TOM);
-	rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
-	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
-
-	/* Could aper size report 0 ? */
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	r100_vram_init_sizes(rdev);
 }



--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -223,7 +223,7 @@ int rs600_mc_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait MC idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RS600_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RS600_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(RS600_MC_FB_LOCATION, tmp);
@@ -301,6 +301,11 @@ void rs600_vram_info(struct radeon_device *rdev)
 	rdev->mc.vram_width = 128;
 }

+void rs600_bandwidth_update(struct radeon_device *rdev)
+{
+	/* FIXME: implement, should this be like rs690 ? */
+}
+

 /*
 * Indirect registers accessor

--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -28,6 +28,9 @@
 #include "drmP.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "rs690r.h"
+#include "atom.h"
+#include "atom-bits.h"

 /* rs690,rs740 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -64,7 +67,7 @@ int rs690_mc_init(struct radeon_device *rdev)
 	rs400_gart_disable(rdev);

 	/* Setup GPU memory space */
-	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
 	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
 	rdev->mc.vram_location = 0xFFFFFFFFUL;
@@ -79,7 +82,7 @@ int rs690_mc_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait MC idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(RS690_MC_FB_TOP, tmp >> 16);
 	tmp |= REG_SET(RS690_MC_FB_START, rdev->mc.vram_location >> 16);
 	WREG32_MC(RS690_MCCFG_FB_LOCATION, tmp);
@@ -138,9 +141,82 @@ void rs690_gpu_init(struct radeon_device *rdev)
 /*
 * VRAM info.
 */
+void rs690_pm_info(struct radeon_device *rdev)
+{
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	struct _ATOM_INTEGRATED_SYSTEM_INFO *info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 *info_v2;
+	void *ptr;
+	uint16_t data_offset;
+	uint8_t frev, crev;
+	fixed20_12 tmp;
+
+	atom_parse_data_header(rdev->mode_info.atom_context, index, NULL,
+			       &frev, &crev, &data_offset);
+	ptr = rdev->mode_info.atom_context->bios + data_offset;
+	info = (struct _ATOM_INTEGRATED_SYSTEM_INFO *)ptr;
+	info_v2 = (struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 *)ptr;
+	/* Get various system informations from bios */
+	switch (crev) {
+	case 1:
+		tmp.full = rfixed_const(100);
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(info->ulBootUpMemoryClock);
+		rdev->pm.igp_sideport_mclk.full = rfixed_div(rdev->pm.igp_sideport_mclk, tmp);
+		rdev->pm.igp_system_mclk.full = rfixed_const(le16_to_cpu(info->usK8MemoryClock));
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(le16_to_cpu(info->usFSBClock));
+		rdev->pm.igp_ht_link_width.full = rfixed_const(info->ucHTLinkWidth);
+		break;
+	case 2:
+		tmp.full = rfixed_const(100);
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(info_v2->ulBootUpSidePortClock);
+		rdev->pm.igp_sideport_mclk.full = rfixed_div(rdev->pm.igp_sideport_mclk, tmp);
+		rdev->pm.igp_system_mclk.full = rfixed_const(info_v2->ulBootUpUMAClock);
+		rdev->pm.igp_system_mclk.full = rfixed_div(rdev->pm.igp_system_mclk, tmp);
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(info_v2->ulHTLinkFreq);
+		rdev->pm.igp_ht_link_clk.full = rfixed_div(rdev->pm.igp_ht_link_clk, tmp);
+		rdev->pm.igp_ht_link_width.full = rfixed_const(le16_to_cpu(info_v2->usMinHTLinkWidth));
+		break;
+	default:
+		tmp.full = rfixed_const(100);
+		/* We assume the slower possible clock ie worst case */
+		/* DDR 333Mhz */
+		rdev->pm.igp_sideport_mclk.full = rfixed_const(333);
+		/* FIXME: system clock ? */
+		rdev->pm.igp_system_mclk.full = rfixed_const(100);
+		rdev->pm.igp_system_mclk.full = rfixed_div(rdev->pm.igp_system_mclk, tmp);
+		rdev->pm.igp_ht_link_clk.full = rfixed_const(200);
+		rdev->pm.igp_ht_link_width.full = rfixed_const(8);
+		DRM_ERROR("No integrated system info for your GPU, using safe default\n");
+		break;
+	}
+	/* Compute various bandwidth */
+	/* k8_bandwidth = (memory_clk / 2) * 2 * 8 * 0.5 = memory_clk * 4  */
+	tmp.full = rfixed_const(4);
+	rdev->pm.k8_bandwidth.full = rfixed_mul(rdev->pm.igp_system_mclk, tmp);
+	/* ht_bandwidth = ht_clk * 2 * ht_width / 8 * 0.8
+	 *              = ht_clk * ht_width / 5
+	 */
+	tmp.full = rfixed_const(5);
+	rdev->pm.ht_bandwidth.full = rfixed_mul(rdev->pm.igp_ht_link_clk,
+						rdev->pm.igp_ht_link_width);
+	rdev->pm.ht_bandwidth.full = rfixed_div(rdev->pm.ht_bandwidth, tmp);
+	if (tmp.full < rdev->pm.max_bandwidth.full) {
+		/* HT link is a limiting factor */
+		rdev->pm.max_bandwidth.full = tmp.full;
+	}
+	/* sideport_bandwidth = (sideport_clk / 2) * 2 * 2 * 0.7
+	 *                    = (sideport_clk * 14) / 10
+	 */
+	tmp.full = rfixed_const(14);
+	rdev->pm.sideport_bandwidth.full = rfixed_mul(rdev->pm.igp_sideport_mclk, tmp);
+	tmp.full = rfixed_const(10);
+	rdev->pm.sideport_bandwidth.full = rfixed_div(rdev->pm.sideport_bandwidth, tmp);
+}
+
 void rs690_vram_info(struct radeon_device *rdev)
 {
 	uint32_t tmp;
+	fixed20_12 a;

 	rs400_gart_adjust_size(rdev);
 	/* DDR for all card after R300 & IGP */
@@ -152,12 +228,409 @@ void rs690_vram_info(struct radeon_device *rdev)
 	} else {
 		rdev->mc.vram_width = 64;
 	}
-	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;

 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rs690_pm_info(rdev);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	a.full = rfixed_const(16);
+	/* core_bandwidth = sclk(Mhz) * 16 */
+	rdev->pm.core_bandwidth.full = rfixed_div(rdev->pm.sclk, a);
+}
+
+void rs690_line_buffer_adjust(struct radeon_device *rdev,
+			      struct drm_display_mode *mode1,
+			      struct drm_display_mode *mode2)
+{
+	u32 tmp;
+
+	/*
+	 * Line Buffer Setup
+	 * There is a single line buffer shared by both display controllers.
+	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
+	 * the display controllers.  The paritioning can either be done
+	 * manually or via one of four preset allocations specified in bits 1:0:
+	 *  0 - line buffer is divided in half and shared between crtc
+	 *  1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
+	 *  2 - D1 gets the whole buffer
+	 *  3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
+	 * Setting bit 2 of DC_LB_MEMORY_SPLIT controls switches to manual
+	 * allocation mode. In manual allocation mode, D1 always starts at 0,
+	 * D1 end/2 is specified in bits 14:4; D2 allocation follows D1.
+	 */
+	tmp = RREG32(DC_LB_MEMORY_SPLIT) & ~DC_LB_MEMORY_SPLIT_MASK;
+	tmp &= ~DC_LB_MEMORY_SPLIT_SHIFT_MODE;
+	/* auto */
+	if (mode1 && mode2) {
+		if (mode1->hdisplay > mode2->hdisplay) {
+			if (mode1->hdisplay > 2560)
+				tmp |= DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
+			else
+				tmp |= DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else if (mode2->hdisplay > mode1->hdisplay) {
+			if (mode2->hdisplay > 2560)
+				tmp |= DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+			else
+				tmp |= DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else
+			tmp |= AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+	} else if (mode1) {
+		tmp |= DC_LB_MEMORY_SPLIT_D1_ONLY;
+	} else if (mode2) {
+		tmp |= DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+	}
+	WREG32(DC_LB_MEMORY_SPLIT, tmp);
 }

+struct rs690_watermark {
+	u32        lb_request_fifo_depth;
+	fixed20_12 num_line_pair;
+	fixed20_12 estimated_width;
+	fixed20_12 worst_case_latency;
+	fixed20_12 consumption_rate;
+	fixed20_12 active_time;
+	fixed20_12 dbpp;
+	fixed20_12 priority_mark_max;
+	fixed20_12 priority_mark;
+	fixed20_12 sclk;
+};
+
+void rs690_crtc_bandwidth_compute(struct radeon_device *rdev,
+				  struct radeon_crtc *crtc,
+				  struct rs690_watermark *wm)
+{
+	struct drm_display_mode *mode = &crtc->base.mode;
+	fixed20_12 a, b, c;
+	fixed20_12 pclk, request_fifo_depth, tolerable_latency, estimated_width;
+	fixed20_12 consumption_time, line_time, chunk_time, read_delay_latency;
+	/* FIXME: detect IGP with sideport memory, i don't think there is any
+	 * such product available
+	 */
+	bool sideport = false;
+
+	if (!crtc->base.enabled) {
+		/* FIXME: wouldn't it better to set priority mark to maximum */
+		wm->lb_request_fifo_depth = 4;
+		return;
+	}
+
+	if (crtc->vsc.full > rfixed_const(2))
+		wm->num_line_pair.full = rfixed_const(2);
+	else
+		wm->num_line_pair.full = rfixed_const(1);
+
+	b.full = rfixed_const(mode->crtc_hdisplay);
+	c.full = rfixed_const(256);
+	a.full = rfixed_mul(wm->num_line_pair, b);
+	request_fifo_depth.full = rfixed_div(a, c);
+	if (a.full < rfixed_const(4)) {
+		wm->lb_request_fifo_depth = 4;
+	} else {
+		wm->lb_request_fifo_depth = rfixed_trunc(request_fifo_depth);
+	}
+
+	/* Determine consumption rate
+	 *  pclk = pixel clock period(ns) = 1000 / (mode.clock / 1000)
+	 *  vtaps = number of vertical taps,
+	 *  vsc = vertical scaling ratio, defined as source/destination
+	 *  hsc = horizontal scaling ration, defined as source/destination
+	 */
+	a.full = rfixed_const(mode->clock);
+	b.full = rfixed_const(1000);
+	a.full = rfixed_div(a, b);
+	pclk.full = rfixed_div(b, a);
+	if (crtc->rmx_type != RMX_OFF) {
+		b.full = rfixed_const(2);
+		if (crtc->vsc.full > b.full)
+			b.full = crtc->vsc.full;
+		b.full = rfixed_mul(b, crtc->hsc);
+		c.full = rfixed_const(2);
+		b.full = rfixed_div(b, c);
+		consumption_time.full = rfixed_div(pclk, b);
+	} else {
+		consumption_time.full = pclk.full;
+	}
+	a.full = rfixed_const(1);
+	wm->consumption_rate.full = rfixed_div(a, consumption_time);
+
+
+	/* Determine line time
+	 *  LineTime = total time for one line of displayhtotal
+	 *  LineTime = total number of horizontal pixels
+	 *  pclk = pixel clock period(ns)
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	line_time.full = rfixed_mul(a, pclk);
+
+	/* Determine active time
+	 *  ActiveTime = time of active region of display within one line,
+	 *  hactive = total number of horizontal active pixels
+	 *  htotal = total number of horizontal pixels
+	 */
+	a.full = rfixed_const(crtc->base.mode.crtc_htotal);
+	b.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->active_time.full = rfixed_mul(line_time, b);
+	wm->active_time.full = rfixed_div(wm->active_time, a);
+
+	/* Maximun bandwidth is the minimun bandwidth of all component */
+	rdev->pm.max_bandwidth = rdev->pm.core_bandwidth;
+	if (sideport) {
+		if (rdev->pm.max_bandwidth.full > rdev->pm.sideport_bandwidth.full &&
+			rdev->pm.sideport_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.sideport_bandwidth;
+		read_delay_latency.full = rfixed_const(370 * 800 * 1000);
+		read_delay_latency.full = rfixed_div(read_delay_latency,
+			rdev->pm.igp_sideport_mclk);
+	} else {
+		if (rdev->pm.max_bandwidth.full > rdev->pm.k8_bandwidth.full &&
+			rdev->pm.k8_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.k8_bandwidth;
+		if (rdev->pm.max_bandwidth.full > rdev->pm.ht_bandwidth.full &&
+			rdev->pm.ht_bandwidth.full)
+			rdev->pm.max_bandwidth = rdev->pm.ht_bandwidth;
+		read_delay_latency.full = rfixed_const(5000);
+	}
+
+	/* sclk = system clocks(ns) = 1000 / max_bandwidth / 16 */
+	a.full = rfixed_const(16);
+	rdev->pm.sclk.full = rfixed_mul(rdev->pm.max_bandwidth, a);
+	a.full = rfixed_const(1000);
+	rdev->pm.sclk.full = rfixed_div(a, rdev->pm.sclk);
+	/* Determine chunk time
+	 * ChunkTime = the time it takes the DCP to send one chunk of data
+	 * to the LB which consists of pipeline delay and inter chunk gap
+	 * sclk = system clock(ns)
+	 */
+	a.full = rfixed_const(256 * 13);
+	chunk_time.full = rfixed_mul(rdev->pm.sclk, a);
+	a.full = rfixed_const(10);
+	chunk_time.full = rfixed_div(chunk_time, a);
+
+	/* Determine the worst case latency
+	 * NumLinePair = Number of line pairs to request(1=2 lines, 2=4 lines)
+	 * WorstCaseLatency = worst case time from urgent to when the MC starts
+	 *                    to return data
+	 * READ_DELAY_IDLE_MAX = constant of 1us
+	 * ChunkTime = time it takes the DCP to send one chunk of data to the LB
+	 *             which consists of pipeline delay and inter chunk gap
+	 */
+	if (rfixed_trunc(wm->num_line_pair) > 1) {
+		a.full = rfixed_const(3);
+		wm->worst_case_latency.full = rfixed_mul(a, chunk_time);
+		wm->worst_case_latency.full += read_delay_latency.full;
+	} else {
+		a.full = rfixed_const(2);
+		wm->worst_case_latency.full = rfixed_mul(a, chunk_time);
+		wm->worst_case_latency.full += read_delay_latency.full;
+	}
+
+	/* Determine the tolerable latency
+	 * TolerableLatency = Any given request has only 1 line time
+	 *                    for the data to be returned
+	 * LBRequestFifoDepth = Number of chunk requests the LB can
+	 *                      put into the request FIFO for a display
+	 *  LineTime = total time for one line of display
+	 *  ChunkTime = the time it takes the DCP to send one chunk
+	 *              of data to the LB which consists of
+	 *  pipeline delay and inter chunk gap
+	 */
+	if ((2+wm->lb_request_fifo_depth) >= rfixed_trunc(request_fifo_depth)) {
+		tolerable_latency.full = line_time.full;
+	} else {
+		tolerable_latency.full = rfixed_const(wm->lb_request_fifo_depth - 2);
+		tolerable_latency.full = request_fifo_depth.full - tolerable_latency.full;
+		tolerable_latency.full = rfixed_mul(tolerable_latency, chunk_time);
+		tolerable_latency.full = line_time.full - tolerable_latency.full;
+	}
+	/* We assume worst case 32bits (4 bytes) */
+	wm->dbpp.full = rfixed_const(4 * 8);
+
+	/* Determine the maximum priority mark
+	 *  width = viewport width in pixels
+	 */
+	a.full = rfixed_const(16);
+	wm->priority_mark_max.full = rfixed_const(crtc->base.mode.crtc_hdisplay);
+	wm->priority_mark_max.full = rfixed_div(wm->priority_mark_max, a);
+
+	/* Determine estimated width */
+	estimated_width.full = tolerable_latency.full - wm->worst_case_latency.full;
+	estimated_width.full = rfixed_div(estimated_width, consumption_time);
+	if (rfixed_trunc(estimated_width) > crtc->base.mode.crtc_hdisplay) {
+		wm->priority_mark.full = rfixed_const(10);
+	} else {
+		a.full = rfixed_const(16);
+		wm->priority_mark.full = rfixed_div(estimated_width, a);
+		wm->priority_mark.full = wm->priority_mark_max.full - wm->priority_mark.full;
+	}
+}
+
+void rs690_bandwidth_update(struct radeon_device *rdev)
+{
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+	struct rs690_watermark wm0;
+	struct rs690_watermark wm1;
+	u32 tmp;
+	fixed20_12 priority_mark02, priority_mark12, fill_rate;
+	fixed20_12 a, b;
+
+	if (rdev->mode_info.crtcs[0]->base.enabled)
+		mode0 = &rdev->mode_info.crtcs[0]->base.mode;
+	if (rdev->mode_info.crtcs[1]->base.enabled)
+		mode1 = &rdev->mode_info.crtcs[1]->base.mode;
+	/*
+	 * Set display0/1 priority up in the memory controller for
+	 * modes if the user specifies HIGH for displaypriority
+	 * option.
+	 */
+	if (rdev->disp_priority == 2) {
+		tmp = RREG32_MC(MC_INIT_MISC_LAT_TIMER);
+		tmp &= ~MC_DISP1R_INIT_LAT_MASK;
+		tmp &= ~MC_DISP0R_INIT_LAT_MASK;
+		if (mode1)
+			tmp |= (1 << MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode0)
+			tmp |= (1 << MC_DISP0R_INIT_LAT_SHIFT);
+		WREG32_MC(MC_INIT_MISC_LAT_TIMER, tmp);
+	}
+	rs690_line_buffer_adjust(rdev, mode0, mode1);
+
+	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740))
+		WREG32(DCP_CONTROL, 0);
+	if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		WREG32(DCP_CONTROL, 2);
+
+	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[0], &wm0);
+	rs690_crtc_bandwidth_compute(rdev, rdev->mode_info.crtcs[1], &wm1);
+
+	tmp = (wm0.lb_request_fifo_depth - 1);
+	tmp |= (wm1.lb_request_fifo_depth - 1) << 16;
+	WREG32(LB_MAX_REQ_OUTSTANDING, tmp);
+
+	if (mode0 && mode1) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_mul(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			b.full = rfixed_mul(wm1.dbpp, wm1.num_line_pair);
+		else
+			b.full = wm1.num_line_pair.full;
+		a.full += b.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	} else if (mode0) {
+		if (rfixed_trunc(wm0.dbpp) > 64)
+			a.full = rfixed_mul(wm0.dbpp, wm0.num_line_pair);
+		else
+			a.full = wm0.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm0.sclk, a);
+		if (wm0.consumption_rate.full > fill_rate.full) {
+			b.full = wm0.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm0.active_time);
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm0.worst_case_latency,
+						wm0.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark02.full = rfixed_div(a, b);
+		}
+		if (wm0.priority_mark.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark.full;
+		if (rfixed_trunc(priority_mark02) < 0)
+			priority_mark02.full = 0;
+		if (wm0.priority_mark_max.full > priority_mark02.full)
+			priority_mark02.full = wm0.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D1MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark02));
+		WREG32(D2MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+	} else {
+		if (rfixed_trunc(wm1.dbpp) > 64)
+			a.full = rfixed_mul(wm1.dbpp, wm1.num_line_pair);
+		else
+			a.full = wm1.num_line_pair.full;
+		fill_rate.full = rfixed_div(wm1.sclk, a);
+		if (wm1.consumption_rate.full > fill_rate.full) {
+			b.full = wm1.consumption_rate.full - fill_rate.full;
+			b.full = rfixed_mul(b, wm1.active_time);
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			a.full = a.full + b.full;
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		} else {
+			a.full = rfixed_mul(wm1.worst_case_latency,
+						wm1.consumption_rate);
+			b.full = rfixed_const(16 * 1000);
+			priority_mark12.full = rfixed_div(a, b);
+		}
+		if (wm1.priority_mark.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark.full;
+		if (rfixed_trunc(priority_mark12) < 0)
+			priority_mark12.full = 0;
+		if (wm1.priority_mark_max.full > priority_mark12.full)
+			priority_mark12.full = wm1.priority_mark_max.full;
+		WREG32(D1MODE_PRIORITY_A_CNT, MODE_PRIORITY_OFF);
+		WREG32(D1MODE_PRIORITY_B_CNT, MODE_PRIORITY_OFF);
+		WREG32(D2MODE_PRIORITY_A_CNT, rfixed_trunc(priority_mark12));
+		WREG32(D2MODE_PRIORITY_B_CNT, rfixed_trunc(priority_mark12));
+	}
+}

 /*
 * Indirect registers accessor

--- a/drivers/gpu/drm/radeon/rs690r.h
+++ b/drivers/gpu/drm/radeon/rs690r.h
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
--- a/drivers/gpu/drm/radeon/rv515r.h
+++ b/drivers/gpu/drm/radeon/rv515r.h
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -67,7 +67,7 @@ int rv770_mc_init(struct radeon_device *rdev)
 		       "programming pipes. Bad things might happen.\n");
 	}

-	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
 	tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
 	tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
 	WREG32(R700_MC_VM_FB_LOCATION, tmp);

--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -101,6 +101,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}

+	if (bdev->driver->fault_reserve_notify)
+		bdev->driver->fault_reserve_notify(bo);
+
 	/*
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.

--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -86,10 +86,16 @@ void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages)
 	unsigned long i;

 	for (i = 0; i < num_pages; ++i) {
-		if (pages[i]) {
-			unsigned long start = (unsigned long)page_address(pages[i]);
-			flush_dcache_range(start, start + PAGE_SIZE);
-		}
+		struct page *page = pages[i];
+		void *page_virtual;
+
+		if (unlikely(page == NULL))
+			continue;
+
+		page_virtual = kmap_atomic(page, KM_USER0);
+		flush_dcache_range((unsigned long) page_virtual,
+				   (unsigned long) page_virtual + PAGE_SIZE);
+		kunmap_atomic(page_virtual, KM_USER0);
 	}
 #else
 	if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0)
@@ -131,10 +137,17 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)

 static struct page *ttm_tt_alloc_page(unsigned page_flags)
 {
+	gfp_t gfp_flags = GFP_USER;
+
 	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		return alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+		gfp_flags |= __GFP_ZERO;
+
+	if (page_flags & TTM_PAGE_FLAG_DMA32)
+		gfp_flags |= __GFP_DMA32;
+	else
+		gfp_flags |= __GFP_HIGHMEM;

-	return alloc_page(GFP_HIGHUSER);
+	return alloc_page(gfp_flags);
 }

 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)

--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -33,7 +33,7 @@

 #include <linux/kernel.h>

-#define TTM_PFX "[TTM]"
+#define TTM_PFX "[TTM] "

 enum ttm_global_types {
 	TTM_GLOBAL_TTM_MEM = 0,