Merge branch 'next' of git://people.freedesktop.org/~deathsimple/linux into drm-next

This contains all the radeon documentation rebased on top of the ib fixes. * 'next' of git://people.freedesktop.org/~deathsimple/linux: drm/radeon: fix SS setup for DCPLL drm/radeon: fix up pll selection on DCE5/6 drm/radeon: start to document evergreen.c drm/radeon: start to document the functions r100.c drm/radeon: document VM functions in radeon_gart.c (v3) drm/radeon: document non-VM functions in radeon_gart.c (v2) drm/radeon: document radeon_ring.c (v4) drm/radeon: document radeon_fence.c (v2) drm/radeon: document radeon_asic.c drm/radeon: document radeon_irq_kms.c drm/radeon: document radeon_kms.c drm/radeon: document radeon_device.c (v2) drm/radeon: add rptr save support for r1xx-r5xx drm/radeon: update rptr saving logic for memory buffers drm/radeon: remove radeon_ring_index() drm/radeon: update ib_execute for SI (v2) drm/radeon: fix const IB handling v2 drm/radeon: let sa manager block for fences to wait for v2 drm/radeon: return an error if there is nothing to wait for

Merge branch 'next' of git://people.freedesktop.org/~deathsimple/linux into drm-next
This contains all the radeon documentation rebased on top of the ib fixes. * 'next' of git://people.freedesktop.org/~deathsimple/linux: drm/radeon: fix SS setup for DCPLL drm/radeon: fix up pll selection on DCE5/6 drm/radeon: start to document evergreen.c drm/radeon: start to document the functions r100.c drm/radeon: document VM functions in radeon_gart.c (v3) drm/radeon: document non-VM functions in radeon_gart.c (v2) drm/radeon: document radeon_ring.c (v4) drm/radeon: document radeon_fence.c (v2) drm/radeon: document radeon_asic.c drm/radeon: document radeon_irq_kms.c drm/radeon: document radeon_kms.c drm/radeon: document radeon_device.c (v2) drm/radeon: add rptr save support for r1xx-r5xx drm/radeon: update rptr saving logic for memory buffers drm/radeon: remove radeon_ring_index() drm/radeon: update ib_execute for SI (v2) drm/radeon: fix const IB handling v2 drm/radeon: let sa manager block for fences to wait for v2 drm/radeon: return an error if there is nothing to wait for
b68bdc1b · Dave Airlie · 29935554 · f312f093 · b68bdc1b · b68bdc1b
17 changed file
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -457,22 +457,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev,
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_PPLL2:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P2PLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_DCPLL:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_DCPLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(0);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(0);
 			break;
 		case ATOM_PPLL_INVALID:
 			return;
 		}
+		args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+		args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 		args.v3.ucEnable = enable;
 		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE61(rdev))
 			args.v3.ucEnable = ATOM_DISABLE;
@@ -482,22 +478,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev,
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P1PLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_PPLL2:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P2PLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_DCPLL:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_DCPLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(0);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(0);
 			break;
 		case ATOM_PPLL_INVALID:
 			return;
 		}
+		args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+		args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 		args.v2.ucEnable = enable;
 		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE41(rdev))
 			args.v2.ucEnable = ATOM_DISABLE;
@@ -1539,7 +1531,11 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
 				 * crtc virtual pixel clock.
 				 */
 				if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
-					if (ASIC_IS_DCE5(rdev) || rdev->clock.dp_extclk)
+					if (ASIC_IS_DCE5(rdev))
+						return ATOM_DCPLL;
+					else if (ASIC_IS_DCE6(rdev))
+						return ATOM_PPLL0;
+					else if (rdev->clock.dp_extclk)
 						return ATOM_PPLL_INVALID;
 				}
 			}

--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -99,6 +99,14 @@ void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 	}
 }

+/**
+ * dce4_wait_for_vblank - vblank wait asic callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to wait for vblank on
+ *
+ * Wait for vblank on the requested crtc (evergreen+).
+ */
 void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
@@ -118,18 +126,49 @@ void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
 	}
 }

+/**
+ * radeon_irq_kms_pflip_irq_get - pre-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to prepare for pageflip on
+ *
+ * Pre-pageflip callback (evergreen+).
+ * Enables the pageflip irq (vblank irq).
+ */
 void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
 	radeon_irq_kms_pflip_irq_get(rdev, crtc);
 }

+/**
+ * evergreen_post_page_flip - pos-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to cleanup pageflip on
+ *
+ * Post-pageflip callback (evergreen+).
+ * Disables the pageflip irq (vblank irq).
+ */
 void evergreen_post_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* disable the pflip int */
 	radeon_irq_kms_pflip_irq_put(rdev, crtc);
 }

+/**
+ * evergreen_page_flip - pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ *
+ * Does the actual pageflip (evergreen+).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
 u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -214,6 +253,15 @@ int sumo_get_temp(struct radeon_device *rdev)
 	return actual_temp * 1000;
 }

+/**
+ * sumo_pm_init_profile - Initialize power profiles callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the power states used in profile mode
+ * (sumo, trinity, SI).
+ * Used for profile mode only.
+ */
 void sumo_pm_init_profile(struct radeon_device *rdev)
 {
 	int idx;
@@ -265,6 +313,14 @@ void sumo_pm_init_profile(struct radeon_device *rdev)
 		rdev->pm.power_state[idx].num_clock_modes - 1;
 }

+/**
+ * evergreen_pm_misc - set additional pm hw parameters callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set non-clock parameters associated with a power state
+ * (voltage, etc.) (evergreen+).
+ */
 void evergreen_pm_misc(struct radeon_device *rdev)
 {
 	int req_ps_idx = rdev->pm.requested_power_state_index;
@@ -292,6 +348,13 @@ void evergreen_pm_misc(struct radeon_device *rdev)
 	}
 }

+/**
+ * evergreen_pm_prepare - pre-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Prepare for a power state change (evergreen+).
+ */
 void evergreen_pm_prepare(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -310,6 +373,13 @@ void evergreen_pm_prepare(struct radeon_device *rdev)
 	}
 }

+/**
+ * evergreen_pm_finish - post-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clean up after a power state change (evergreen+).
+ */
 void evergreen_pm_finish(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -328,6 +398,15 @@ void evergreen_pm_finish(struct radeon_device *rdev)
 	}
 }

+/**
+ * evergreen_hpd_sense - hpd sense callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
 bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 {
 	bool connected = false;
@@ -364,6 +443,14 @@ bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 	return connected;
 }

+/**
+ * evergreen_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
 void evergreen_hpd_set_polarity(struct radeon_device *rdev,
 				enum radeon_hpd_id hpd)
 {
@@ -424,6 +511,14 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev,
 	}
 }

+/**
+ * evergreen_hpd_init - hpd setup callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
 void evergreen_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -462,6 +557,14 @@ void evergreen_hpd_init(struct radeon_device *rdev)
 	radeon_irq_kms_enable_hpd(rdev, enabled);
 }

+/**
+ * evergreen_hpd_fini - hpd tear down callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
 void evergreen_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -925,6 +1028,14 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,

 }

+/**
+ * evergreen_bandwidth_update - update display watermarks callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Update the display watermarks based on the requested mode(s)
+ * (evergreen+).
+ */
 void evergreen_bandwidth_update(struct radeon_device *rdev)
 {
 	struct drm_display_mode *mode0 = NULL;
@@ -948,6 +1059,15 @@ void evergreen_bandwidth_update(struct radeon_device *rdev)
 	}
 }

+/**
+ * evergreen_mc_wait_for_idle - wait for MC idle callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Wait for the MC (memory controller) to be idle.
+ * (evergreen+).
+ * Returns 0 if the MC is idle, -1 if not.
+ */
 int evergreen_mc_wait_for_idle(struct radeon_device *rdev)
 {
 	unsigned i;
@@ -1364,17 +1484,25 @@ void evergreen_mc_program(struct radeon_device *rdev)
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	u32 next_rptr;

 	/* set to DX10/11 mode */
 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
 	radeon_ring_write(ring, 1);

 	if (ring->rptr_save_reg) {
-		uint32_t next_rptr = ring->wptr + 3 + 4;
+		next_rptr = ring->wptr + 3 + 4;
 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 		radeon_ring_write(ring, ((ring->rptr_save_reg - 
 					  PACKET3_SET_CONFIG_REG_START) >> 2));
 		radeon_ring_write(ring, next_rptr);
+	} else if (rdev->wb.enabled) {
+		next_rptr = ring->wptr + 5 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18));
+		radeon_ring_write(ring, next_rptr);
+		radeon_ring_write(ring, 0);
 	}

 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));

--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -65,6 +65,19 @@ MODULE_FIRMWARE(FIRMWARE_R520);

 #include "r100_track.h"

+/* This files gather functions specifics to:
+ * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ * and others in some cases.
+ */
+
+/**
+ * r100_wait_for_vblank - vblank wait asic callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to wait for vblank on
+ *
+ * Wait for vblank on the requested crtc (r1xx-r4xx).
+ */
 void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
@@ -99,22 +112,49 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 	}
 }

-/* This files gather functions specifics to:
- * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+/**
+ * r100_pre_page_flip - pre-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to prepare for pageflip on
+ *
+ * Pre-pageflip callback (r1xx-r4xx).
+ * Enables the pageflip irq (vblank irq).
 */
-
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
 	radeon_irq_kms_pflip_irq_get(rdev, crtc);
 }

+/**
+ * r100_post_page_flip - pos-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to cleanup pageflip on
+ *
+ * Post-pageflip callback (r1xx-r4xx).
+ * Disables the pageflip irq (vblank irq).
+ */
 void r100_post_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* disable the pflip int */
 	radeon_irq_kms_pflip_irq_put(rdev, crtc);
 }

+/**
+ * r100_page_flip - pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ *
+ * Does the actual pageflip (r1xx-r4xx).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
 u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -141,6 +181,15 @@ u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 	return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
 }

+/**
+ * r100_pm_get_dynpm_state - look up dynpm power state callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Look up the optimal power state based on the
+ * current state of the GPU (r1xx-r5xx).
+ * Used for dynpm only.
+ */
 void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 {
 	int i;
@@ -223,6 +272,15 @@ void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 		  pcie_lanes);
 }

+/**
+ * r100_pm_init_profile - Initialize power profiles callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the power states used in profile mode
+ * (r1xx-r3xx).
+ * Used for profile mode only.
+ */
 void r100_pm_init_profile(struct radeon_device *rdev)
 {
 	/* default */
@@ -262,6 +320,14 @@ void r100_pm_init_profile(struct radeon_device *rdev)
 	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 }

+/**
+ * r100_pm_misc - set additional pm hw parameters callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set non-clock parameters associated with a power state
+ * (voltage, pcie lanes, etc.) (r1xx-r4xx).
+ */
 void r100_pm_misc(struct radeon_device *rdev)
 {
 	int requested_index = rdev->pm.requested_power_state_index;
@@ -353,6 +419,13 @@ void r100_pm_misc(struct radeon_device *rdev)
 	}
 }

+/**
+ * r100_pm_prepare - pre-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Prepare for a power state change (r1xx-r4xx).
+ */
 void r100_pm_prepare(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -377,6 +450,13 @@ void r100_pm_prepare(struct radeon_device *rdev)
 	}
 }

+/**
+ * r100_pm_finish - post-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clean up after a power state change (r1xx-r4xx).
+ */
 void r100_pm_finish(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -401,6 +481,14 @@ void r100_pm_finish(struct radeon_device *rdev)
 	}
 }

+/**
+ * r100_gui_idle - gui idle callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
+ * Returns true if idle, false if not.
+ */
 bool r100_gui_idle(struct radeon_device *rdev)
 {
 	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
@@ -410,6 +498,15 @@ bool r100_gui_idle(struct radeon_device *rdev)
 }

 /* hpd for digital panel detect/disconnect */
+/**
+ * r100_hpd_sense - hpd sense callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (r1xx-r4xx).
+ * Returns true if connected, false if not connected.
+ */
 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 {
 	bool connected = false;
@@ -429,6 +526,14 @@ bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 	return connected;
 }

+/**
+ * r100_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (r1xx-r4xx).
+ */
 void r100_hpd_set_polarity(struct radeon_device *rdev,
 			   enum radeon_hpd_id hpd)
 {
@@ -457,6 +562,14 @@ void r100_hpd_set_polarity(struct radeon_device *rdev,
 	}
 }

+/**
+ * r100_hpd_init - hpd setup callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup the hpd pins used by the card (r1xx-r4xx).
+ * Set the polarity, and enable the hpd interrupts.
+ */
 void r100_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -471,6 +584,14 @@ void r100_hpd_init(struct radeon_device *rdev)
 	radeon_irq_kms_enable_hpd(rdev, enable);
 }

+/**
+ * r100_hpd_fini - hpd tear down callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the hpd pins used by the card (r1xx-r4xx).
+ * Disable the hpd interrupts.
+ */
 void r100_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -1060,6 +1181,14 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	}
 	ring->ready = true;
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	if (radeon_ring_supports_scratch_reg(rdev, ring)) {
+		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+		if (r) {
+			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+			ring->rptr_save_reg = 0;
+		}
+	}
 	return 0;
 }

@@ -1070,6 +1199,7 @@ void r100_cp_fini(struct radeon_device *rdev)
 	}
 	/* Disable ring */
 	r100_cp_disable(rdev);
+	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 	DRM_INFO("radeon: cp finalized\n");
 }
@@ -3661,6 +3791,12 @@ void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];

+	if (ring->rptr_save_reg) {
+		u32 next_rptr = ring->wptr + 2 + 3;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
 	radeon_ring_write(ring, ib->gpu_addr);
 	radeon_ring_write(ring, ib->length_dw);
@@ -3693,7 +3829,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[6] = PACKET2(0);
 	ib.ptr[7] = PACKET2(0);
 	ib.length_dw = 8;
-	r = radeon_ib_schedule(rdev, &ib);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);

--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2163,10 +2163,12 @@ void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsign
 	ring->ring_size = ring_size;
 	ring->align_mask = 16 - 1;

-	r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
-	if (r) {
-		DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
-		ring->rptr_save_reg = 0;
+	if (radeon_ring_supports_scratch_reg(rdev, ring)) {
+		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+		if (r) {
+			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+			ring->rptr_save_reg = 0;
+		}
 	}
 }

@@ -2198,7 +2200,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i, ridx = radeon_ring_index(rdev, ring);
+	unsigned i;
 	int r;

 	r = radeon_scratch_get(rdev, &scratch);
@@ -2209,7 +2211,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	WREG32(scratch, 0xCAFEDEAD);
 	r = radeon_ring_lock(rdev, ring, 3);
 	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r);
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
 		radeon_scratch_free(rdev, scratch);
 		return r;
 	}
@@ -2224,10 +2226,10 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_UDELAY(1);
 	}
 	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test on %d succeeded in %d usecs\n", ridx, i);
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
 	} else {
 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
-			  ridx, scratch, tmp);
+			  ring->idx, scratch, tmp);
 		r = -EINVAL;
 	}
 	radeon_scratch_free(rdev, scratch);
@@ -2576,13 +2578,21 @@ void r600_fini(struct radeon_device *rdev)
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	u32 next_rptr;

 	if (ring->rptr_save_reg) {
-		uint32_t next_rptr = ring->wptr + 3 + 4;
+		next_rptr = ring->wptr + 3 + 4;
 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 		radeon_ring_write(ring, ((ring->rptr_save_reg -
 					 PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 		radeon_ring_write(ring, next_rptr);
+	} else if (rdev->wb.enabled) {
+		next_rptr = ring->wptr + 5 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18));
+		radeon_ring_write(ring, next_rptr);
+		radeon_ring_write(ring, 0);
 	}

 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@@ -2602,7 +2612,6 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	uint32_t tmp = 0;
 	unsigned i;
 	int r;
-	int ring_index = radeon_ring_index(rdev, ring);

 	r = radeon_scratch_get(rdev, &scratch);
 	if (r) {
@@ -2610,7 +2619,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, ring_index, &ib, 256);
+	r = radeon_ib_get(rdev, ring->idx, &ib, 256);
 	if (r) {
 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
 		return r;
@@ -2619,7 +2628,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);

--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -362,7 +362,7 @@ struct radeon_bo_list {
 * alignment).
 */
 struct radeon_sa_manager {
-	spinlock_t		lock;
+	wait_queue_head_t	wq;
 	struct radeon_bo	*bo;
 	struct list_head	*hole;
 	struct list_head	flist[RADEON_NUM_RINGS];
@@ -623,6 +623,8 @@ struct radeon_ring {
 	unsigned		rptr_offs;
 	unsigned		rptr_reg;
 	unsigned		rptr_save_reg;
+	u64			next_rptr_gpu_addr;
+	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		wptr_reg;
@@ -638,6 +640,7 @@ struct radeon_ring {
 	u32			ptr_reg_shift;
 	u32			ptr_reg_mask;
 	u32			nop;
+	u32			idx;
 };

 /*
@@ -751,12 +754,14 @@ struct si_rlc {
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib *ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
-int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
+		       struct radeon_ib *const_ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_ring_tests(struct radeon_device *rdev);
 /* Ring access between begin & end cannot sleep */
-int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
+bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
+				      struct radeon_ring *ring);
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
@@ -870,6 +875,7 @@ struct radeon_wb {
 };

 #define RADEON_WB_SCRATCH_OFFSET 0
+#define RADEON_WB_RING0_NEXT_RPTR 256
 #define RADEON_WB_CP_RPTR_OFFSET 1024
 #define RADEON_WB_CP1_RPTR_OFFSET 1280
 #define RADEON_WB_CP2_RPTR_OFFSET 1536

--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -40,6 +40,16 @@
 /*
 * Registers accessors functions.
 */
+/**
+ * radeon_invalid_rreg - dummy reg read function
+ *
+ * @rdev: radeon device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
 static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
@@ -47,6 +57,16 @@ static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg)
 	return 0;
 }

+/**
+ * radeon_invalid_wreg - dummy reg write function
+ *
+ * @rdev: radeon device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ */
 static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 {
 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
@@ -54,6 +74,14 @@ static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32
 	BUG_ON(1);
 }

+/**
+ * radeon_register_accessor_init - sets up the register accessor callbacks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Sets up the register accessor callbacks for various register
+ * apertures.  Not all asics have all apertures (all asics).
+ */
 static void radeon_register_accessor_init(struct radeon_device *rdev)
 {
 	rdev->mc_rreg = &radeon_invalid_rreg;
@@ -102,6 +130,14 @@ static void radeon_register_accessor_init(struct radeon_device *rdev)


 /* helper to disable agp */
+/**
+ * radeon_agp_disable - AGP disable helper function
+ *
+ * @rdev: radeon device pointer
+ *
+ * Removes AGP flags and changes the gart callbacks on AGP
+ * cards when using the internal gart rather than AGP (all asics).
+ */
 void radeon_agp_disable(struct radeon_device *rdev)
 {
 	rdev->flags &= ~RADEON_IS_AGP;
@@ -1608,6 +1644,16 @@ static struct radeon_asic si_asic = {
 	},
 };

+/**
+ * radeon_asic_init - register asic specific callbacks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Registers the appropriate asic specific callbacks for each
+ * chip family.  Also sets other asics specific info like the number
+ * of crtcs and the register aperture accessors (all asics).
+ * Returns 0 for success.
+ */
 int radeon_asic_init(struct radeon_device *rdev)
 {
 	radeon_register_accessor_init(rdev);

--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -354,7 +354,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 	}
 	radeon_cs_sync_rings(parser);
 	parser->ib.vm_id = 0;
-	r = radeon_ib_schedule(rdev, &parser->ib);
+	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	if (r) {
 		DRM_ERROR("Failed to schedule IB !\n");
 	}
@@ -452,25 +452,24 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	}
 	radeon_cs_sync_rings(parser);

+	parser->ib.vm_id = vm->id;
+	/* ib pool is bind at 0 in virtual address space,
+	 * so gpu_addr is the offset inside the pool bo
+	 */
+	parser->ib.gpu_addr = parser->ib.sa_bo->soffset;
+
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
 		parser->const_ib.vm_id = vm->id;
-		/* ib pool is bind at 0 in virtual address space to gpu_addr is the
-		 * offset inside the pool bo
+		/* ib pool is bind at 0 in virtual address space,
+		 * so gpu_addr is the offset inside the pool bo
 		 */
 		parser->const_ib.gpu_addr = parser->const_ib.sa_bo->soffset;
-		r = radeon_ib_schedule(rdev, &parser->const_ib);
-		if (r)
-			goto out;
+		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
+	} else {
+		r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	}

-	parser->ib.vm_id = vm->id;
-	/* ib pool is bind at 0 in virtual address space to gpu_addr is the
-	 * offset inside the pool bo
-	 */
-	parser->ib.gpu_addr = parser->ib.sa_bo->soffset;
-	parser->ib.is_const_ib = false;
-	r = radeon_ib_schedule(rdev, &parser->ib);
 out:
 	if (!r) {
 		if (vm->fence) {

--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -96,8 +96,12 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };

-/*
- * Clear GPU surface registers.
+/**
+ * radeon_surface_init - Clear GPU surface registers.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clear GPU surface registers (r1xx-r5xx).
 */
 void radeon_surface_init(struct radeon_device *rdev)
 {
@@ -119,6 +123,13 @@ void radeon_surface_init(struct radeon_device *rdev)
 /*
 * GPU scratch registers helpers function.
 */
+/**
+ * radeon_scratch_init - Init scratch register driver information.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Init CP scratch register driver information (r1xx-r5xx)
+ */
 void radeon_scratch_init(struct radeon_device *rdev)
 {
 	int i;
@@ -136,6 +147,15 @@ void radeon_scratch_init(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_scratch_get - Allocate a scratch register
+ *
+ * @rdev: radeon_device pointer
+ * @reg: scratch register mmio offset
+ *
+ * Allocate a CP scratch register for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
 int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
 {
 	int i;
@@ -150,6 +170,14 @@ int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
 	return -EINVAL;
 }

+/**
+ * radeon_scratch_free - Free a scratch register
+ *
+ * @rdev: radeon_device pointer
+ * @reg: scratch register mmio offset
+ *
+ * Free a CP scratch register allocated for use by the driver (all asics)
+ */
 void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
 {
 	int i;
@@ -162,6 +190,20 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
 	}
 }

+/*
+ * radeon_wb_*()
+ * Writeback is the the method by which the the GPU updates special pages
+ * in memory with the status of certain GPU events (fences, ring pointers,
+ * etc.).
+ */
+
+/**
+ * radeon_wb_disable - Disable Writeback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback (all asics).  Used for suspend.
+ */
 void radeon_wb_disable(struct radeon_device *rdev)
 {
 	int r;
@@ -177,6 +219,14 @@ void radeon_wb_disable(struct radeon_device *rdev)
 	rdev->wb.enabled = false;
 }

+/**
+ * radeon_wb_fini - Disable Writeback and free memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver shutdown.
+ */
 void radeon_wb_fini(struct radeon_device *rdev)
 {
 	radeon_wb_disable(rdev);
@@ -187,6 +237,15 @@ void radeon_wb_fini(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_wb_init- Init Writeback driver info and allocate memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver startup.
+ * Returns 0 on success or an -error on failure.
+ */
 int radeon_wb_init(struct radeon_device *rdev)
 {
 	int r;
@@ -355,6 +414,15 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
 * GPU helpers function.
 */
+/**
+ * radeon_card_posted - check if the hw has already been initialized
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check if the asic has been initialized (all asics).
+ * Used at driver startup.
+ * Returns true if initialized or false if not.
+ */
 bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
@@ -404,6 +472,14 @@ bool radeon_card_posted(struct radeon_device *rdev)

 }

+/**
+ * radeon_update_bandwidth_info - update display bandwidth params
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Used when sclk/mclk are switched or display modes are set.
+ * params are used to calculate display watermarks (all asics)
+ */
 void radeon_update_bandwidth_info(struct radeon_device *rdev)
 {
 	fixed20_12 a;
@@ -424,6 +500,15 @@ void radeon_update_bandwidth_info(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_boot_test_post_card - check and possibly initialize the hw
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check if the asic is initialized and if not, attempt to initialize
+ * it (all asics).
+ * Returns true if initialized or false if not.
+ */
 bool radeon_boot_test_post_card(struct radeon_device *rdev)
 {
 	if (radeon_card_posted(rdev))
@@ -442,6 +527,16 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_dummy_page_init - init dummy page used by the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate the dummy page used by the driver (all asics).
+ * This dummy page is used by the driver as a filler for gart entries
+ * when pages are taken out of the GART
+ * Returns 0 on sucess, -ENOMEM on failure.
+ */
 int radeon_dummy_page_init(struct radeon_device *rdev)
 {
 	if (rdev->dummy_page.page)
@@ -460,6 +555,13 @@ int radeon_dummy_page_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_dummy_page_fini - free dummy page used by the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the dummy page used by the driver (all asics).
+ */
 void radeon_dummy_page_fini(struct radeon_device *rdev)
 {
 	if (rdev->dummy_page.page == NULL)
@@ -472,6 +574,23 @@ void radeon_dummy_page_fini(struct radeon_device *rdev)


 /* ATOM accessor methods */
+/*
+ * ATOM is an interpreted byte code stored in tables in the vbios.  The
+ * driver registers callbacks to access registers and the interpreter
+ * in the driver parses the tables and executes then to program specific
+ * actions (set display modes, asic init, etc.).  See radeon_atombios.c,
+ * atombios.h, and atom.c
+ */
+
+/**
+ * cail_pll_read - read PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the PLL register.
+ */
 static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -481,6 +600,15 @@ static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
 	return r;
 }

+/**
+ * cail_pll_write - write PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -488,6 +616,15 @@ static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
 	rdev->pll_wreg(rdev, reg, val);
 }

+/**
+ * cail_mc_read - read MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ *
+ * Provides an MC register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MC register.
+ */
 static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -497,6 +634,15 @@ static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
 	return r;
 }

+/**
+ * cail_mc_write - write MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MC register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -504,6 +650,15 @@ static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
 	rdev->mc_wreg(rdev, reg, val);
 }

+/**
+ * cail_reg_write - write MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MMIO register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -511,6 +666,15 @@ static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
 	WREG32(reg*4, val);
 }

+/**
+ * cail_reg_read - read MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ *
+ * Provides an MMIO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MMIO register.
+ */
 static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -520,6 +684,15 @@ static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
 	return r;
 }

+/**
+ * cail_ioreg_write - write IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a IO register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -527,6 +700,15 @@ static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
 	WREG32_IO(reg*4, val);
 }

+/**
+ * cail_ioreg_read - read IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ *
+ * Provides an IO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the IO register.
+ */
 static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -536,6 +718,16 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
 	return r;
 }

+/**
+ * radeon_atombios_init - init the driver info and callbacks for atombios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initializes the driver info and register access callbacks for the
+ * ATOM interpreter (r4xx+).
+ * Returns 0 on sucess, -ENOMEM on failure.
+ * Called at driver startup.
+ */
 int radeon_atombios_init(struct radeon_device *rdev)
 {
 	struct card_info *atom_card_info =
@@ -569,6 +761,15 @@ int radeon_atombios_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_atombios_fini - free the driver info and callbacks for atombios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the driver info and register access callbacks for the ATOM
+ * interpreter (r4xx+).
+ * Called at driver shutdown.
+ */
 void radeon_atombios_fini(struct radeon_device *rdev)
 {
 	if (rdev->mode_info.atom_context) {
@@ -578,17 +779,50 @@ void radeon_atombios_fini(struct radeon_device *rdev)
 	kfree(rdev->mode_info.atom_card_info);
 }

+/* COMBIOS */
+/*
+ * COMBIOS is the bios format prior to ATOM. It provides
+ * command tables similar to ATOM, but doesn't have a unified
+ * parser.  See radeon_combios.c
+ */
+
+/**
+ * radeon_combios_init - init the driver info for combios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initializes the driver info for combios (r1xx-r3xx).
+ * Returns 0 on sucess.
+ * Called at driver startup.
+ */
 int radeon_combios_init(struct radeon_device *rdev)
 {
 	radeon_combios_initialize_bios_scratch_regs(rdev->ddev);
 	return 0;
 }

+/**
+ * radeon_combios_fini - free the driver info for combios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the driver info for combios (r1xx-r3xx).
+ * Called at driver shutdown.
+ */
 void radeon_combios_fini(struct radeon_device *rdev)
 {
 }

-/* if we get transitioned to only one device, tak VGA back */
+/* if we get transitioned to only one device, take VGA back */
+/**
+ * radeon_vga_set_decode - enable/disable vga decode
+ *
+ * @cookie: radeon_device pointer
+ * @state: enable/disable vga decode
+ *
+ * Enable/disable vga decode (all asics).
+ * Returns VGA resource flags.
+ */
 static unsigned int radeon_vga_set_decode(void *cookie, bool state)
 {
 	struct radeon_device *rdev = cookie;
@@ -600,6 +834,14 @@ static unsigned int radeon_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }

+/**
+ * radeon_check_arguments - validate module params
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Validates certain module parameters and updates
+ * the associated values used by the driver (all asics).
+ */
 void radeon_check_arguments(struct radeon_device *rdev)
 {
 	/* vramlimit must be a power of two */
@@ -666,6 +908,15 @@ void radeon_check_arguments(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_switcheroo_set_state - set switcheroo state
+ *
+ * @pdev: pci dev pointer
+ * @state: vga switcheroo state
+ *
+ * Callback for the switcheroo driver.  Suspends or resumes the
+ * the asics before or after it is powered up using ACPI methods.
+ */
 static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -686,6 +937,15 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 	}
 }

+/**
+ * radeon_switcheroo_can_switch - see if switcheroo state can change
+ *
+ * @pdev: pci dev pointer
+ *
+ * Callback for the switcheroo driver.  Check of the switcheroo
+ * state can be changed.
+ * Returns true if the state can be changed, false if not.
+ */
 static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -703,6 +963,18 @@ static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = {
 	.can_switch = radeon_switcheroo_can_switch,
 };

+/**
+ * radeon_device_init - initialize the driver
+ *
+ * @rdev: radeon_device pointer
+ * @pdev: drm dev pointer
+ * @pdev: pci dev pointer
+ * @flags: driver flags
+ *
+ * Initializes the driver info and hw (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver startup.
+ */
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
@@ -721,6 +993,10 @@ int radeon_device_init(struct radeon_device *rdev,
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
 	rdev->accel_working = false;
+	/* set up ring ids */
+	for (i = 0; i < RADEON_NUM_RINGS; i++) {
+		rdev->ring[i].idx = i;
+	}

 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
 		radeon_family_name[rdev->family], pdev->vendor, pdev->device,
@@ -851,6 +1127,14 @@ int radeon_device_init(struct radeon_device *rdev,

 static void radeon_debugfs_remove_files(struct radeon_device *rdev);

+/**
+ * radeon_device_fini - tear down the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the driver info (all asics).
+ * Called at driver shutdown.
+ */
 void radeon_device_fini(struct radeon_device *rdev)
 {
 	DRM_INFO("radeon: finishing device.\n");
@@ -872,6 +1156,16 @@ void radeon_device_fini(struct radeon_device *rdev)
 /*
 * Suspend & resume.
 */
+/**
+ * radeon_suspend_kms - initiate device suspend
+ *
+ * @pdev: drm dev pointer
+ * @state: suspend state
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
 int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 {
 	struct radeon_device *rdev;
@@ -946,6 +1240,15 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	return 0;
 }

+/**
+ * radeon_resume_kms - initiate device resume
+ *
+ * @pdev: drm dev pointer
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
 int radeon_resume_kms(struct drm_device *dev)
 {
 	struct drm_connector *connector;
@@ -994,6 +1297,14 @@ int radeon_resume_kms(struct drm_device *dev)
 	return 0;
 }

+/**
+ * radeon_gpu_reset - reset the asic
+ *
+ * @rdev: radeon device pointer
+ *
+ * Attempt the reset the GPU if it has hung (all asics).
+ * Returns 0 for success or an error on failure.
+ */
 int radeon_gpu_reset(struct radeon_device *rdev)
 {
 	unsigned ring_sizes[RADEON_NUM_RINGS];

--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -40,6 +40,26 @@
 #include "radeon.h"
 #include "radeon_trace.h"

+/*
+ * Fences
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization.  When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the the relevant GPU caches have been flushed.  Whether
+ * we use a scratch register or memory location depends on the asic
+ * and whether writeback is enabled.
+ */
+
+/**
+ * radeon_fence_write - write a fence value
+ *
+ * @rdev: radeon_device pointer
+ * @seq: sequence number to write
+ * @ring: ring index the fence is associated with
+ *
+ * Writes a fence value to memory or a scratch register (all asics).
+ */
 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
 {
 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
@@ -50,6 +70,15 @@ static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
 	}
 }

+/**
+ * radeon_fence_read - read a fence value
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Reads a fence value from memory or a scratch register (all asics).
+ * Returns the value of the fence read from memory or register.
+ */
 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 {
 	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
@@ -63,6 +92,16 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 	return seq;
 }

+/**
+ * radeon_fence_emit - emit a fence on the requested ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ * @ring: ring index the fence is associated with
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
 int radeon_fence_emit(struct radeon_device *rdev,
 		      struct radeon_fence **fence,
 		      int ring)
@@ -81,6 +120,15 @@ int radeon_fence_emit(struct radeon_device *rdev,
 	return 0;
 }

+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
 void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq;
@@ -141,6 +189,13 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 	}
 }

+/**
+ * radeon_fence_destroy - destroy a fence
+ *
+ * @kref: fence kref
+ *
+ * Frees the fence object (all asics).
+ */
 static void radeon_fence_destroy(struct kref *kref)
 {
 	struct radeon_fence *fence;
@@ -149,6 +204,20 @@ static void radeon_fence_destroy(struct kref *kref)
 	kfree(fence);
 }

+/**
+ * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
+ *
+ * @rdev: radeon device pointer
+ * @seq: sequence number
+ * @ring: ring index the fence is associated with
+ *
+ * Check if the last singled fence sequnce number is >= the requested
+ * sequence number (all asics).
+ * Returns true if the fence has signaled (current fence value
+ * is >= requested value) or false if it has not (current fence
+ * value is < the requested value.  Helper function for
+ * radeon_fence_signaled().
+ */
 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 				      u64 seq, unsigned ring)
 {
@@ -163,6 +232,14 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 	return false;
 }

+/**
+ * radeon_fence_signaled - check if a fence has signaled
+ *
+ * @fence: radeon fence object
+ *
+ * Check if the requested fence has signaled (all asics).
+ * Returns true if the fence has signaled or false if it has not.
+ */
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
 	if (!fence) {
@@ -178,6 +255,24 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	return false;
 }

+/**
+ * radeon_fence_wait_seq - wait for a specific sequence number
+ *
+ * @rdev: radeon device pointer
+ * @target_seq: sequence number we want to wait for
+ * @ring: ring index the fence is associated with
+ * @intr: use interruptable sleep
+ * @lock_ring: whether the ring should be locked or not
+ *
+ * Wait for the requested sequence number to be written (all asics).
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the sequence number.  Helper function
+ * for radeon_fence_wait(), et al.
+ * Returns 0 if the sequence number has passed, error for all other cases.
+ * -EDEADLK is returned when a GPU lockup has been detected and the ring is
+ * marked as not ready so no further jobs get scheduled until a successful
+ * reset.
+ */
 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
 				 unsigned ring, bool intr, bool lock_ring)
 {
@@ -273,6 +368,17 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
 	return 0;
 }

+/**
+ * radeon_fence_wait - wait for a fence to signal
+ *
+ * @fence: radeon fence object
+ * @intr: use interruptable sleep
+ *
+ * Wait for the requested fence to signal (all asics).
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the fence.
+ * Returns 0 if the fence has passed, error for all other cases.
+ */
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	int r;
@@ -303,6 +409,20 @@ bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 	return false;
 }

+/**
+ * radeon_fence_wait_any_seq - wait for a sequence number on any ring
+ *
+ * @rdev: radeon device pointer
+ * @target_seq: sequence number(s) we want to wait for
+ * @intr: use interruptable sleep
+ *
+ * Wait for the requested sequence number(s) to be written by any ring
+ * (all asics).  Sequnce number array is indexed by ring id.
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the sequence number.  Helper function
+ * for radeon_fence_wait_any(), et al.
+ * Returns 0 if the sequence number has passed, error for all other cases.
+ */
 static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 				     u64 *target_seq, bool intr)
 {
@@ -331,7 +451,7 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev,

 	/* nothing to wait for ? */
 	if (ring == RADEON_NUM_RINGS) {
-		return 0;
+		return -ENOENT;
 	}

 	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
@@ -412,6 +532,19 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 	return 0;
 }

+/**
+ * radeon_fence_wait_any - wait for a fence to signal on any ring
+ *
+ * @rdev: radeon device pointer
+ * @fences: radeon fence object(s)
+ * @intr: use interruptable sleep
+ *
+ * Wait for any requested fence to signal (all asics).  Fence
+ * array is indexed by ring id.  @intr selects whether to use
+ * interruptable (true) or non-interruptable (false) sleep when
+ * waiting for the fences. Used by the suballocator.
+ * Returns 0 if any fence has passed, error for all other cases.
+ */
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr)
@@ -442,7 +575,16 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 	return 0;
 }

-/* caller must hold ring lock */
+/**
+ * radeon_fence_wait_next_locked - wait for the next fence to signal
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for the next fence on the requested ring to signal (all asics).
+ * Returns 0 if the next fence has passed, error for all other cases.
+ * Caller must hold ring lock.
+ */
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq;
@@ -456,7 +598,16 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 	return radeon_fence_wait_seq(rdev, seq, ring, false, false);
 }

-/* caller must hold ring lock */
+/**
+ * radeon_fence_wait_empty_locked - wait for all fences to signal
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns 0 if the fences have passed, error for all other cases.
+ * Caller must hold ring lock.
+ */
 void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
@@ -479,12 +630,27 @@ void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 	}
 }

+/**
+ * radeon_fence_ref - take a ref on a fence
+ *
+ * @fence: radeon fence object
+ *
+ * Take a reference on a fence (all asics).
+ * Returns the fence.
+ */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
 	kref_get(&fence->kref);
 	return fence;
 }

+/**
+ * radeon_fence_unref - remove a ref on a fence
+ *
+ * @fence: radeon fence object
+ *
+ * Remove a reference on a fence (all asics).
+ */
 void radeon_fence_unref(struct radeon_fence **fence)
 {
 	struct radeon_fence *tmp = *fence;
@@ -495,6 +661,16 @@ void radeon_fence_unref(struct radeon_fence **fence)
 	}
 }

+/**
+ * radeon_fence_count_emitted - get the count of emitted fences
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Get the number of fences emitted on the requested ring (all asics).
+ * Returns the number of emitted fences on the ring.  Used by the
+ * dynpm code to ring track activity.
+ */
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 {
 	uint64_t emitted;
@@ -512,6 +688,17 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 	return (unsigned)emitted;
 }

+/**
+ * radeon_fence_need_sync - do we need a semaphore
+ *
+ * @fence: radeon fence object
+ * @dst_ring: which ring to check against
+ *
+ * Check if the fence needs to be synced against another ring
+ * (all asics).  If so, we need to emit a semaphore.
+ * Returns true if we need to sync with another ring, false if
+ * not.
+ */
 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
 {
 	struct radeon_fence_driver *fdrv;
@@ -533,6 +720,15 @@ bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
 	return true;
 }

+/**
+ * radeon_fence_note_sync - record the sync point
+ *
+ * @fence: radeon fence object
+ * @dst_ring: which ring to check against
+ *
+ * Note the sequence number at which point the fence will
+ * be synced with the requested ring (all asics).
+ */
 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
 {
 	struct radeon_fence_driver *dst, *src;
@@ -557,6 +753,18 @@ void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
 	}
 }

+/**
+ * radeon_fence_driver_start_ring - make the fence driver
+ * ready for use on the requested ring.
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index to start the fence driver on
+ *
+ * Make the fence driver ready for processing (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has.
+ * Returns 0 for success, errors for failure.
+ */
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
 	uint64_t index;
@@ -585,6 +793,16 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 	return 0;
 }

+/**
+ * radeon_fence_driver_init_ring - init the fence driver
+ * for the requested ring.
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index to start the fence driver on
+ *
+ * Init the fence driver for the requested ring (all asics).
+ * Helper function for radeon_fence_driver_init().
+ */
 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 {
 	int i;
@@ -599,6 +817,18 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 	rdev->fence_drv[ring].initialized = false;
 }

+/**
+ * radeon_fence_driver_init - init the fence driver
+ * for all possible rings.
+ *
+ * @rdev: radeon device pointer
+ *
+ * Init the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * radeon_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
 int radeon_fence_driver_init(struct radeon_device *rdev)
 {
 	int ring;
@@ -613,6 +843,14 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_fence_driver_fini - tear down the fence driver
+ * for all possible rings.
+ *
+ * @rdev: radeon device pointer
+ *
+ * Tear down the fence driver for all possible rings (all asics).
+ */
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
 	int ring;

--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -30,9 +30,39 @@
 #include "radeon.h"
 #include "radeon_reg.h"

+/*
+ * GART
+ * The GART (Graphics Aperture Remapping Table) is an aperture
+ * in the GPU's address space.  System pages can be mapped into
+ * the aperture and look like contiguous pages from the GPU's
+ * perspective.  A page table maps the pages in the aperture
+ * to the actual backing pages in system memory.
+ *
+ * Radeon GPUs support both an internal GART, as described above,
+ * and AGP.  AGP works similarly, but the GART table is configured
+ * and maintained by the northbridge rather than the driver.
+ * Radeon hw has a separate AGP aperture that is programmed to
+ * point to the AGP aperture provided by the northbridge and the
+ * requests are passed through to the northbridge aperture.
+ * Both AGP and internal GART can be used at the same time, however
+ * that is not currently supported by the driver.
+ *
+ * This file handles the common internal GART management.
+ */
+
 /*
 * Common GART table functions.
 */
+/**
+ * radeon_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate system memory for GART page table
+ * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
+ * gart table to be in system memory.
+ * Returns 0 for success, -ENOMEM for failure.
+ */
 int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
 {
 	void *ptr;
@@ -54,6 +84,15 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_gart_table_ram_free - free system ram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Free system memory for GART page table
+ * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
+ * gart table to be in system memory.
+ */
 void radeon_gart_table_ram_free(struct radeon_device *rdev)
 {
 	if (rdev->gart.ptr == NULL) {
@@ -73,6 +112,16 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev)
 	rdev->gart.table_addr = 0;
 }

+/**
+ * radeon_gart_table_vram_alloc - allocate vram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+).  These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 {
 	int r;
@@ -88,6 +137,16 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_gart_table_vram_pin - pin gart page table in vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Pin the GART page table in vram so it will not be moved
+ * by the memory manager (pcie r4xx, r5xx+).  These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_table_vram_pin(struct radeon_device *rdev)
 {
 	uint64_t gpu_addr;
@@ -110,6 +169,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev)
 	return r;
 }

+/**
+ * radeon_gart_table_vram_unpin - unpin gart page table in vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Unpin the GART page table in vram (pcie r4xx, r5xx+).
+ * These asics require the gart table to be in video memory.
+ */
 void radeon_gart_table_vram_unpin(struct radeon_device *rdev)
 {
 	int r;
@@ -126,6 +193,15 @@ void radeon_gart_table_vram_unpin(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_gart_table_vram_free - free gart page table vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Free the video memory used for the GART page table
+ * (pcie r4xx, r5xx+).  These asics require the gart table to
+ * be in video memory.
+ */
 void radeon_gart_table_vram_free(struct radeon_device *rdev)
 {
 	if (rdev->gart.robj == NULL) {
@@ -135,12 +211,19 @@ void radeon_gart_table_vram_free(struct radeon_device *rdev)
 	radeon_bo_unref(&rdev->gart.robj);
 }

-
-
-
 /*
 * Common gart functions.
 */
+/**
+ * radeon_gart_unbind - unbind pages from the gart page table
+ *
+ * @rdev: radeon_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to unbind
+ *
+ * Unbinds the requested pages from the gart page table and
+ * replaces them with the dummy page (all asics).
+ */
 void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			int pages)
 {
@@ -172,6 +255,19 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	radeon_gart_tlb_flush(rdev);
 }

+/**
+ * radeon_gart_bind - bind pages into the gart page table
+ *
+ * @rdev: radeon_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @pagelist: pages to bind
+ * @dma_addr: DMA addresses of pages
+ *
+ * Binds the requested pages to the gart page table
+ * (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
 {
@@ -203,6 +299,14 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	return 0;
 }

+/**
+ * radeon_gart_restore - bind all pages in the gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Binds all pages in the gart page table (all asics).
+ * Used to rebuild the gart table on device startup or resume.
+ */
 void radeon_gart_restore(struct radeon_device *rdev)
 {
 	int i, j, t;
@@ -222,6 +326,14 @@ void radeon_gart_restore(struct radeon_device *rdev)
 	radeon_gart_tlb_flush(rdev);
 }

+/**
+ * radeon_gart_init - init the driver info for managing the gart
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate the dummy page and init the gart driver info (all asics).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_init(struct radeon_device *rdev)
 {
 	int r, i;
@@ -262,6 +374,13 @@ int radeon_gart_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_gart_fini - tear down the driver info for managing the gart
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the gart driver info and free the dummy page (all asics).
+ */
 void radeon_gart_fini(struct radeon_device *rdev)
 {
 	if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
@@ -277,12 +396,40 @@ void radeon_gart_fini(struct radeon_device *rdev)
 	radeon_dummy_page_fini(rdev);
 }

+/*
+ * GPUVM
+ * GPUVM is similar to the legacy gart on older asics, however
+ * rather than there being a single global gart table
+ * for the entire GPU, there are multiple VM page tables active
+ * at any given time.  The VM page tables can contain a mix
+ * vram pages and system memory pages and system memory pages
+ * can be mapped as snooped (cached system pages) or unsnooped
+ * (uncached system pages).
+ * Each VM has an ID associated with it and there is a page table
+ * associated with each VMID.  When execting a command buffer,
+ * the kernel tells the the ring what VMID to use for that command
+ * buffer.  VMIDs are allocated dynamically as commands are submitted.
+ * The userspace drivers maintain their own address space and the kernel
+ * sets up their pages tables accordingly when they submit their
+ * command buffers and a VMID is assigned.
+ * Cayman/Trinity support up to 8 active VMs at any given time;
+ * SI supports 16.
+ */
+
 /*
 * vm helpers
 *
 * TODO bind a default page at vm initialization for default address
 */

+/**
+ * radeon_vm_manager_init - init the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Init the vm manager (cayman+).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_manager_init(struct radeon_device *rdev)
 {
 	struct radeon_vm *vm;
@@ -337,6 +484,16 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
 }

 /* global mutex must be lock */
+/**
+ * radeon_vm_unbind_locked - unbind a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Unbind the requested vm (cayman+).
+ * Wait for use of the VM to finish, then unbind the page table,
+ * and free the page table memory.
+ */
 static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 				    struct radeon_vm *vm)
 {
@@ -376,6 +533,13 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 	}
 }

+/**
+ * radeon_vm_manager_fini - tear down the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the VM manager (cayman+).
+ */
 void radeon_vm_manager_fini(struct radeon_device *rdev)
 {
 	struct radeon_vm *vm, *tmp;
@@ -397,6 +561,14 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
 }

 /* global mutex must be locked */
+/**
+ * radeon_vm_unbind - locked version of unbind
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Locked version that wraps radeon_vm_unbind_locked (cayman+).
+ */
 void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	mutex_lock(&vm->mutex);
@@ -405,6 +577,18 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
 }

 /* global and local mutex must be locked */
+/**
+ * radeon_vm_bind - bind a page table to a VMID
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to bind
+ *
+ * Bind the requested vm (cayman+).
+ * Suballocate memory for the page table, allocate a VMID
+ * and bind the page table to it, and finally start to populate
+ * the page table.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_vm *vm_evict;
@@ -467,6 +651,20 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
 }

 /* object have to be reserved */
+/**
+ * radeon_vm_bo_add - add a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ * @offset: requested offset of the buffer in the VM address space
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add @bo into the requested vm (cayman+).
+ * Add @bo to the list of bos associated with the vm and validate
+ * the offset requested within the vm address space.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_bo_add(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_bo *bo,
@@ -544,6 +742,17 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 	return 0;
 }

+/**
+ * radeon_vm_get_addr - get the physical address of the page
+ *
+ * @rdev: radeon_device pointer
+ * @mem: ttm mem
+ * @pfn: pfn
+ *
+ * Look up the physical address of the page that the pte resolves
+ * to (cayman+).
+ * Returns the physical address of the page.
+ */
 static u64 radeon_vm_get_addr(struct radeon_device *rdev,
 			      struct ttm_mem_reg *mem,
 			      unsigned pfn)
@@ -573,6 +782,17 @@ static u64 radeon_vm_get_addr(struct radeon_device *rdev,
 }

 /* object have to be reserved & global and local mutex must be locked */
+/**
+ * radeon_vm_bo_update_pte - map a bo into the vm page table
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ * @mem: ttm mem
+ *
+ * Fill in the page table entries for @bo (cayman+).
+ * Returns 0 for success, -EINVAL for failure.
+ */
 int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 			    struct radeon_vm *vm,
 			    struct radeon_bo *bo,
@@ -621,6 +841,18 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 }

 /* object have to be reserved */
+/**
+ * radeon_vm_bo_rmv - remove a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Remove @bo from the requested vm (cayman+).
+ * Remove @bo from the list of bos associated with the vm and
+ * remove the ptes for @bo in the page table.
+ * Returns 0 for success.
+ */
 int radeon_vm_bo_rmv(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_bo *bo)
@@ -643,6 +875,15 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 	return 0;
 }

+/**
+ * radeon_vm_bo_invalidate - mark the bo as invalid
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Mark @bo as invalid (cayman+).
+ */
 void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 			     struct radeon_bo *bo)
 {
@@ -654,6 +895,17 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 	}
 }

+/**
+ * radeon_vm_init - initialize a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Init @vm (cayman+).
+ * Map the IB pool and any other shared objects into the VM
+ * by default as it's used by all VMs.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	int r;
@@ -672,6 +924,15 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	return r;
 }

+/**
+ * radeon_vm_init - tear down a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Tear down @vm (cayman+).
+ * Unbind the VM and remove all bos from the vm bo list
+ */
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_bo_va *bo_va, *tmp;

--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -34,6 +34,15 @@

 #define RADEON_WAIT_IDLE_TIMEOUT 200

+/**
+ * radeon_driver_irq_handler_kms - irq handler for KMS
+ *
+ * @DRM_IRQ_ARGS: args
+ *
+ * This is the irq handler for the radeon KMS driver (all asics).
+ * radeon_irq_process is a macro that points to the per-asic
+ * irq handler callback.
+ */
 irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -45,6 +54,17 @@ irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS)
 /*
 * Handle hotplug events outside the interrupt handler proper.
 */
+/**
+ * radeon_hotplug_work_func - display hotplug work handler
+ *
+ * @work: work struct
+ *
+ * This is the hot plug event work handler (all asics).
+ * The work gets scheduled from the irq handler if there
+ * was a hot plug interrupt.  It walks the connector table
+ * and calls the hotplug handler for each one, then sends
+ * a drm hotplug event to alert userspace.
+ */
 static void radeon_hotplug_work_func(struct work_struct *work)
 {
 	struct radeon_device *rdev = container_of(work, struct radeon_device,
@@ -61,6 +81,14 @@ static void radeon_hotplug_work_func(struct work_struct *work)
 	drm_helper_hpd_irq_event(dev);
 }

+/**
+ * radeon_driver_irq_preinstall_kms - drm irq preinstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * Gets the hw ready to enable irqs (all asics).
+ * This function disables all interrupt sources on the GPU.
+ */
 void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -85,12 +113,27 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
 	radeon_irq_process(rdev);
 }

+/**
+ * radeon_driver_irq_postinstall_kms - drm irq preinstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * Handles stuff to be done after enabling irqs (all asics).
+ * Returns 0 on success.
+ */
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
 {
 	dev->max_vblank_count = 0x001fffff;
 	return 0;
 }

+/**
+ * radeon_driver_irq_uninstall_kms - drm irq uninstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * This function disables all interrupt sources on the GPU (all asics).
+ */
 void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -116,6 +159,16 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }

+/**
+ * radeon_msi_ok - asic specific msi checks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Handles asic specific MSI checks to determine if
+ * MSIs should be enabled on a particular chip (all asics).
+ * Returns true if MSIs should be enabled, false if MSIs
+ * should not be enabled.
+ */
 static bool radeon_msi_ok(struct radeon_device *rdev)
 {
 	/* RV370/RV380 was first asic with MSI support */
@@ -168,6 +221,14 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
 	return true;
 }

+/**
+ * radeon_irq_kms_init - init driver interrupt info
+ *
+ * @rdev: radeon device pointer
+ *
+ * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_irq_kms_init(struct radeon_device *rdev)
 {
 	int r = 0;
@@ -200,6 +261,13 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_irq_kms_fini - tear down driver interrrupt info
+ *
+ * @rdev: radeon device pointer
+ *
+ * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics).
+ */
 void radeon_irq_kms_fini(struct radeon_device *rdev)
 {
 	drm_vblank_cleanup(rdev->ddev);
@@ -212,6 +280,16 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 	flush_work_sync(&rdev->hotplug_work);
 }

+/**
+ * radeon_irq_kms_sw_irq_get - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
@@ -226,6 +304,16 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 	}
 }

+/**
+ * radeon_irq_kms_sw_irq_put - disable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to disable
+ *
+ * Disables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
@@ -240,6 +328,15 @@ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring)
 	}
 }

+/**
+ * radeon_irq_kms_pflip_irq_get - enable pageflip interrupt
+ *
+ * @rdev: radeon device pointer
+ * @crtc: crtc whose interrupt you want to enable
+ *
+ * Enables the pageflip interrupt for a specific crtc (all asics).
+ * For pageflips we use the vblank interrupt source.
+ */
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
 {
 	unsigned long irqflags;
@@ -257,6 +354,15 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
 	}
 }

+/**
+ * radeon_irq_kms_pflip_irq_put - disable pageflip interrupt
+ *
+ * @rdev: radeon device pointer
+ * @crtc: crtc whose interrupt you want to disable
+ *
+ * Disables the pageflip interrupt for a specific crtc (all asics).
+ * For pageflips we use the vblank interrupt source.
+ */
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc)
 {
 	unsigned long irqflags;
@@ -274,6 +380,14 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc)
 	}
 }

+/**
+ * radeon_irq_kms_enable_afmt - enable audio format change interrupt
+ *
+ * @rdev: radeon device pointer
+ * @block: afmt block whose interrupt you want to enable
+ *
+ * Enables the afmt change interrupt for a specific afmt block (all asics).
+ */
 void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block)
 {
 	unsigned long irqflags;
@@ -285,6 +399,14 @@ void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block)

 }

+/**
+ * radeon_irq_kms_disable_afmt - disable audio format change interrupt
+ *
+ * @rdev: radeon device pointer
+ * @block: afmt block whose interrupt you want to disable
+ *
+ * Disables the afmt change interrupt for a specific afmt block (all asics).
+ */
 void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block)
 {
 	unsigned long irqflags;
@@ -295,6 +417,14 @@ void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }

+/**
+ * radeon_irq_kms_enable_hpd - enable hotplug detect interrupt
+ *
+ * @rdev: radeon device pointer
+ * @hpd_mask: mask of hpd pins you want to enable.
+ *
+ * Enables the hotplug detect interrupt for a specific hpd pin (all asics).
+ */
 void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
 {
 	unsigned long irqflags;
@@ -307,6 +437,14 @@ void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }

+/**
+ * radeon_irq_kms_disable_hpd - disable hotplug detect interrupt
+ *
+ * @rdev: radeon device pointer
+ * @hpd_mask: mask of hpd pins you want to disable.
+ *
+ * Disables the hotplug detect interrupt for a specific hpd pin (all asics).
+ */
 void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
 {
 	unsigned long irqflags;
@@ -319,6 +457,18 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }

+/**
+ * radeon_irq_kms_wait_gui_idle - waits for drawing engine to be idle
+ *
+ * @rdev: radeon device pointer
+ *
+ * Enabled the GUI idle interrupt and waits for it to fire (r6xx+).
+ * This is currently used to make sure the 3D engine is idle for power
+ * management, but should be replaces with proper fence waits.
+ * GUI idle interrupts don't work very well on pre-r6xx hw and it also
+ * does not take into account other aspects of the chip that may be busy.
+ * DO NOT USE GOING FORWARD.
+ */
 int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev)
 {
 	unsigned long irqflags;

--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -33,6 +33,17 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>

+/**
+ * radeon_driver_unload_kms - Main unload function for KMS.
+ *
+ * @dev: drm dev pointer
+ *
+ * This is the main unload function for KMS (all asics).
+ * It calls radeon_modeset_fini() to tear down the
+ * displays, and radeon_device_fini() to tear down
+ * the rest of the device (CP, writeback, etc.).
+ * Returns 0 on success.
+ */
 int radeon_driver_unload_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -46,6 +57,19 @@ int radeon_driver_unload_kms(struct drm_device *dev)
 	return 0;
 }

+/**
+ * radeon_driver_load_kms - Main load function for KMS.
+ *
+ * @dev: drm dev pointer
+ * @flags: device flags
+ *
+ * This is the main load function for KMS (all asics).
+ * It calls radeon_device_init() to set up the non-display
+ * parts of the chip (asic init, CP, writeback, etc.), and
+ * radeon_modeset_init() to set up the display parts
+ * (crtcs, encoders, hotplug detect, etc.).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 {
 	struct radeon_device *rdev;
@@ -96,6 +120,16 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	return r;
 }

+/**
+ * radeon_set_filp_rights - Set filp right.
+ *
+ * @dev: drm dev pointer
+ * @owner: drm file
+ * @applier: drm file
+ * @value: value
+ *
+ * Sets the filp rights for the device (all asics).
+ */
 static void radeon_set_filp_rights(struct drm_device *dev,
 				   struct drm_file **owner,
 				   struct drm_file *applier,
@@ -118,6 +152,18 @@ static void radeon_set_filp_rights(struct drm_device *dev,
 /*
 * Userspace get information ioctl
 */
+/**
+ * radeon_info_ioctl - answer a device specific request.
+ *
+ * @rdev: radeon device pointer
+ * @data: request object
+ * @filp: drm filp
+ *
+ * This function is used to pass device specific parameters to the userspace
+ * drivers.  Examples include: pci device id, pipeline parms, tiling params,
+ * etc. (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
 int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -301,16 +347,40 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 /*
 * Outdated mess for old drm with Xorg being in charge (void function now).
 */
+/**
+ * radeon_driver_firstopen_kms - drm callback for first open
+ *
+ * @dev: drm dev pointer
+ *
+ * Nothing to be done for KMS (all asics).
+ * Returns 0 on success.
+ */
 int radeon_driver_firstopen_kms(struct drm_device *dev)
 {
 	return 0;
 }

+/**
+ * radeon_driver_firstopen_kms - drm callback for last close
+ *
+ * @dev: drm dev pointer
+ *
+ * Switch vga switcheroo state after last close (all asics).
+ */
 void radeon_driver_lastclose_kms(struct drm_device *dev)
 {
 	vga_switcheroo_process_delayed_switch();
 }

+/**
+ * radeon_driver_open_kms - drm callback for open
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device open, init vm on cayman+ (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -339,6 +409,14 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	return 0;
 }

+/**
+ * radeon_driver_postclose_kms - drm callback for post close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device post close, tear down vm on cayman+ (all asics).
+ */
 void radeon_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv)
 {
@@ -354,6 +432,15 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
 	}
 }

+/**
+ * radeon_driver_preclose_kms - drm callback for pre close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx
+ * (all asics).
+ */
 void radeon_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
@@ -367,6 +454,15 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
 /*
 * VBlank related functions.
 */
+/**
+ * radeon_get_vblank_counter_kms - get frame count
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to get the frame count from
+ *
+ * Gets the frame count on the requested crtc (all asics).
+ * Returns frame count on success, -EINVAL on failure.
+ */
 u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -379,6 +475,15 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
 	return radeon_get_vblank_counter(rdev, crtc);
 }

+/**
+ * radeon_enable_vblank_kms - enable vblank interrupt
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to enable vblank interrupt for
+ *
+ * Enable the interrupt on the requested crtc (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
 int radeon_enable_vblank_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -397,6 +502,14 @@ int radeon_enable_vblank_kms(struct drm_device *dev, int crtc)
 	return r;
 }

+/**
+ * radeon_disable_vblank_kms - disable vblank interrupt
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to disable vblank interrupt for
+ *
+ * Disable the interrupt on the requested crtc (all asics).
+ */
 void radeon_disable_vblank_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -413,6 +526,19 @@ void radeon_disable_vblank_kms(struct drm_device *dev, int crtc)
 	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }

+/**
+ * radeon_get_vblank_timestamp_kms - get vblank timestamp
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to get the timestamp for
+ * @max_error: max error
+ * @vblank_time: time value
+ * @flags: flags passed to the driver
+ *
+ * Gets the timestamp on the requested crtc based on the
+ * scanout position.  (all asics).
+ * Returns postive status flags on success, negative error on failure.
+ */
 int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc,
 				    int *max_error,
 				    struct timeval *vblank_time,

--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,10 +35,28 @@
 #include "atom.h"

 /*
- * IB.
+ * IB
+ * IBs (Indirect Buffers) and areas of GPU accessible memory where
+ * commands are stored.  You can put a pointer to the IB in the
+ * command ring and the hw will fetch the commands from the IB
+ * and execute them.  Generally userspace acceleration drivers
+ * produce command buffers which are send to the kernel and
+ * put in IBs for execution by the requested ring.
 */
 int radeon_debugfs_sa_init(struct radeon_device *rdev);

+/**
+ * radeon_ib_get - request an IB (Indirect Buffer)
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the IB is associated with
+ * @ib: IB object returned
+ * @size: requested IB size
+ *
+ * Request an IB (all asics).  IBs are allocated using the
+ * suballocator.
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib *ib, unsigned size)
 {
@@ -67,6 +85,14 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 	return 0;
 }

+/**
+ * radeon_ib_free - free an IB (Indirect Buffer)
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to free
+ *
+ * Free an IB (all asics).
+ */
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
@@ -74,7 +100,28 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_fence_unref(&ib->fence);
 }

-int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
+/**
+ * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ * @const_ib: Const IB to schedule (SI only)
+ *
+ * Schedule an IB on the associated ring (all asics).
+ * Returns 0 on success, error on failure.
+ *
+ * On SI, there are two parallel engines fed from the primary ring,
+ * the CE (Constant Engine) and the DE (Drawing Engine).  Since
+ * resource descriptors have moved to memory, the CE allows you to
+ * prime the caches while the DE is updating register state so that
+ * the resource descriptors will be already in cache when the draw is
+ * processed.  To accomplish this, the userspace driver submits two
+ * IBs, one for the CE and one for the DE.  If there is a CE IB (called
+ * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
+ * to SI there was just a DE IB.
+ */
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
+		       struct radeon_ib *const_ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	bool need_sync = false;
@@ -105,6 +152,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 	if (!need_sync) {
 		radeon_semaphore_free(rdev, &ib->semaphore, NULL);
 	}
+	if (const_ib) {
+		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
+		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
+	}
 	radeon_ring_ib_execute(rdev, ib->ring, ib);
 	r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
 	if (r) {
@@ -112,10 +163,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 		radeon_ring_unlock_undo(rdev, ring);
 		return r;
 	}
+	if (const_ib) {
+		const_ib->fence = radeon_fence_ref(ib->fence);
+	}
 	radeon_ring_unlock_commit(rdev, ring);
 	return 0;
 }

+/**
+ * radeon_ib_pool_init - Init the IB (Indirect Buffer) pool
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the suballocator to manage a pool of memory
+ * for use as IBs (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
 	int r;
@@ -142,6 +205,14 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	return 0;
 }

+/**
+ * radeon_ib_pool_fini - Free the IB (Indirect Buffer) pool
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the suballocator managing the pool of memory
+ * for use as IBs (all asics).
+ */
 void radeon_ib_pool_fini(struct radeon_device *rdev)
 {
 	if (rdev->ib_pool_ready) {
@@ -151,6 +222,16 @@ void radeon_ib_pool_fini(struct radeon_device *rdev)
 	}
 }

+/**
+ * radeon_ib_ring_tests - test IBs on the rings
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Test an IB (Indirect Buffer) on each ring.
+ * If the test fails, disable the ring.
+ * Returns 0 on success, error if the primary GFX ring
+ * IB test fails.
+ */
 int radeon_ib_ring_tests(struct radeon_device *rdev)
 {
 	unsigned i;
@@ -182,10 +263,28 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 }

 /*
- * Ring.
+ * Rings
+ * Most engines on the GPU are fed via ring buffers.  Ring
+ * buffers are areas of GPU accessible memory that the host
+ * writes commands into and the GPU reads commands out of.
+ * There is a rptr (read pointer) that determines where the
+ * GPU is currently reading, and a wptr (write pointer)
+ * which determines where the host has written.  When the
+ * pointers are equal, the ring is idle.  When the host
+ * writes commands to the ring buffer, it increments the
+ * wptr.  The GPU then starts fetching commands and executes
+ * them until the pointers are equal again.
 */
 int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);

+/**
+ * radeon_ring_write - write a value to the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ * @v: dword (dw) value to write
+ *
+ * Write a value to the requested ring buffer (all asics).
+ */
 void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
 #if DRM_DEBUG_CODE
@@ -199,21 +298,37 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 	ring->ring_free_dw--;
 }

-int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring)
+/**
+ * radeon_ring_supports_scratch_reg - check if the ring supports
+ * writing to scratch registers
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if a specific ring supports writing to scratch registers (all asics).
+ * Returns true if the ring supports writing to scratch regs, false if not.
+ */
+bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
+				      struct radeon_ring *ring)
 {
-	/* r1xx-r5xx only has CP ring */
-	if (rdev->family < CHIP_R600)
-		return RADEON_RING_TYPE_GFX_INDEX;
-
-	if (rdev->family >= CHIP_CAYMAN) {
-		if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX])
-			return CAYMAN_RING_TYPE_CP1_INDEX;
-		else if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX])
-			return CAYMAN_RING_TYPE_CP2_INDEX;
+	switch (ring->idx) {
+	case RADEON_RING_TYPE_GFX_INDEX:
+	case CAYMAN_RING_TYPE_CP1_INDEX:
+	case CAYMAN_RING_TYPE_CP2_INDEX:
+		return true;
+	default:
+		return false;
 	}
-	return RADEON_RING_TYPE_GFX_INDEX;
 }

+/**
+ * radeon_ring_free_size - update the free size
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the free dw slots in the ring buffer (all asics).
+ */
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 rptr;
@@ -232,7 +347,16 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 	}
 }

-
+/**
+ * radeon_ring_alloc - allocate space on the ring buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
@@ -245,7 +369,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
 		if (ndw < ring->ring_free_dw) {
 			break;
 		}
-		r = radeon_fence_wait_next_locked(rdev, radeon_ring_index(rdev, ring));
+		r = radeon_fence_wait_next_locked(rdev, ring->idx);
 		if (r)
 			return r;
 	}
@@ -254,6 +378,17 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
 	return 0;
 }

+/**
+ * radeon_ring_lock - lock the ring and allocate space on it
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Lock the ring and allocate @ndw dwords in the ring buffer
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
@@ -267,6 +402,16 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	return 0;
 }

+/**
+ * radeon_ring_commit - tell the GPU to execute the new
+ * commands on the ring buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the wptr (write pointer) to tell the GPU to
+ * execute new commands on the ring buffer (all asics).
+ */
 void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	/* We pad to match fetch size */
@@ -278,23 +423,55 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 	(void)RREG32(ring->wptr_reg);
 }

+/**
+ * radeon_ring_unlock_commit - tell the GPU to execute the new
+ * commands on the ring buffer and unlock it
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Call radeon_ring_commit() then unlock the ring (all asics).
+ */
 void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	radeon_ring_commit(rdev, ring);
 	mutex_unlock(&rdev->ring_lock);
 }

+/**
+ * radeon_ring_undo - reset the wptr
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Reset the driver's copy of the wtpr (all asics).
+ */
 void radeon_ring_undo(struct radeon_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
 }

+/**
+ * radeon_ring_unlock_undo - reset the wptr and unlock the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Call radeon_ring_undo() then unlock the ring (all asics).
+ */
 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	radeon_ring_undo(ring);
 	mutex_unlock(&rdev->ring_lock);
 }

+/**
+ * radeon_ring_force_activity - add some nop packets to the ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Add some nop packets to the ring to force activity (all asics).
+ * Used for lockup detection to see if the rptr is advancing.
+ */
 void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
@@ -309,6 +486,13 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *
 	}
 }

+/**
+ * radeon_ring_force_activity - update lockup variables
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the last rptr value and timestamp (all asics).
+ */
 void radeon_ring_lockup_update(struct radeon_ring *ring)
 {
 	ring->last_rptr = ring->rptr;
@@ -374,25 +558,33 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring
 			    uint32_t **data)
 {
 	unsigned size, ptr, i;
-	int ridx = radeon_ring_index(rdev, ring);

 	/* just in case lock the ring */
 	mutex_lock(&rdev->ring_lock);
 	*data = NULL;

-	if (ring->ring_obj == NULL || !ring->rptr_save_reg) {
+	if (ring->ring_obj == NULL) {
 		mutex_unlock(&rdev->ring_lock);
 		return 0;
 	}

 	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!radeon_fence_count_emitted(rdev, ridx)) {
+	if (!radeon_fence_count_emitted(rdev, ring->idx)) {
 		mutex_unlock(&rdev->ring_lock);
 		return 0;
 	}

 	/* calculate the number of dw on the ring */
-	ptr = RREG32(ring->rptr_save_reg);
+	if (ring->rptr_save_reg)
+		ptr = RREG32(ring->rptr_save_reg);
+	else if (rdev->wb.enabled)
+		ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
+	else {
+		/* no way to read back the next rptr */
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+
 	size = ring->wptr + (ring->ring_size / 4);
 	size -= ptr;
 	size &= ring->ptr_mask;
@@ -444,6 +636,22 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
 	return 0;
 }

+/**
+ * radeon_ring_init - init driver ring struct.
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ring_size: size of the ring
+ * @rptr_offs: offset of the rptr writeback location in the WB buffer
+ * @rptr_reg: MMIO offset of the rptr register
+ * @wptr_reg: MMIO offset of the wptr register
+ * @ptr_reg_shift: bit offset of the rptr/wptr values
+ * @ptr_reg_mask: bit mask of the rptr/wptr values
+ * @nop: nop packet for this ring
+ *
+ * Initialize the driver information for the selected ring (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
 		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
 		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
@@ -486,12 +694,25 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
+	if (rdev->wb.enabled) {
+		u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4);
+		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
+		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
+	}
 	if (radeon_debugfs_ring_init(rdev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
 	return 0;
 }

+/**
+ * radeon_ring_fini - tear down the driver ring struct.
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Tear down the driver information for the selected ring (all asics).
+ */
 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;

--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -54,7 +54,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 {
 	int i, r;

-	spin_lock_init(&sa_manager->lock);
+	init_waitqueue_head(&sa_manager->wq);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
 	sa_manager->domain = domain;
@@ -211,6 +211,39 @@ static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
 	return false;
 }

+/**
+ * radeon_sa_event - Check if we can stop waiting
+ *
+ * @sa_manager: pointer to the sa_manager
+ * @size: number of bytes we want to allocate
+ * @align: alignment we need to match
+ *
+ * Check if either there is a fence we can wait for or
+ * enough free memory to satisfy the allocation directly
+ */
+static bool radeon_sa_event(struct radeon_sa_manager *sa_manager,
+			    unsigned size, unsigned align)
+{
+	unsigned soffset, eoffset, wasted;
+	int i;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!list_empty(&sa_manager->flist[i])) {
+			return true;
+		}
+	}
+
+	soffset = radeon_sa_bo_hole_soffset(sa_manager);
+	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
+	wasted = (align - (soffset % align)) % align;
+
+	if ((eoffset - soffset) >= (size + wasted)) {
+		return true;
+	}
+
+	return false;
+}
+
 static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
 				   struct radeon_fence **fences,
 				   unsigned *tries)
@@ -297,8 +330,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&(*sa_bo)->olist);
 	INIT_LIST_HEAD(&(*sa_bo)->flist);

-	spin_lock(&sa_manager->lock);
-	do {
+	spin_lock(&sa_manager->wq.lock);
+	while(1) {
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			fences[i] = NULL;
 			tries[i] = 0;
@@ -309,30 +342,34 @@ int radeon_sa_bo_new(struct radeon_device *rdev,

 			if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
 						   size, align)) {
-				spin_unlock(&sa_manager->lock);
+				spin_unlock(&sa_manager->wq.lock);
 				return 0;
 			}

 			/* see if we can skip over some allocations */
 		} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));

-		if (block) {
-			spin_unlock(&sa_manager->lock);
-			r = radeon_fence_wait_any(rdev, fences, false);
-			spin_lock(&sa_manager->lock);
-			if (r) {
-				/* if we have nothing to wait for we
-				   are practically out of memory */
-				if (r == -ENOENT) {
-					r = -ENOMEM;
-				}
-				goto out_err;
-			}
+		if (!block) {
+			break;
+		}
+
+		spin_unlock(&sa_manager->wq.lock);
+		r = radeon_fence_wait_any(rdev, fences, false);
+		spin_lock(&sa_manager->wq.lock);
+		/* if we have nothing to wait for block */
+		if (r == -ENOENT) {
+			r = wait_event_interruptible_locked(
+				sa_manager->wq, 
+				radeon_sa_event(sa_manager, size, align)
+			);
+		}
+		if (r) {
+			goto out_err;
 		}
-	} while (block);
+	};

 out_err:
-	spin_unlock(&sa_manager->lock);
+	spin_unlock(&sa_manager->wq.lock);
 	kfree(*sa_bo);
 	*sa_bo = NULL;
 	return r;
@@ -348,7 +385,7 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 	}

 	sa_manager = (*sa_bo)->manager;
-	spin_lock(&sa_manager->lock);
+	spin_lock(&sa_manager->wq.lock);
 	if (fence && !radeon_fence_signaled(fence)) {
 		(*sa_bo)->fence = radeon_fence_ref(fence);
 		list_add_tail(&(*sa_bo)->flist,
@@ -356,7 +393,8 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 	} else {
 		radeon_sa_bo_remove_locked(*sa_bo);
 	}
-	spin_unlock(&sa_manager->lock);
+	wake_up_all_locked(&sa_manager->wq);
+	spin_unlock(&sa_manager->wq.lock);
 	*sa_bo = NULL;
 }

@@ -366,7 +404,7 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 {
 	struct radeon_sa_bo *i;

-	spin_lock(&sa_manager->lock);
+	spin_lock(&sa_manager->wq.lock);
 	list_for_each_entry(i, &sa_manager->olist, olist) {
 		if (&i->olist == sa_manager->hole) {
 			seq_printf(m, ">");
@@ -381,6 +419,6 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 		}
 		seq_printf(m, "\n");
 	}
-	spin_unlock(&sa_manager->lock);
+	spin_unlock(&sa_manager->wq.lock);
 }
 #endif
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -229,8 +229,6 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 {
 	struct radeon_fence *fence1 = NULL, *fence2 = NULL;
 	struct radeon_semaphore *semaphore = NULL;
-	int ridxA = radeon_ring_index(rdev, ringA);
-	int ridxB = radeon_ring_index(rdev, ringB);
 	int r;

 	r = radeon_semaphore_create(rdev, &semaphore);
@@ -241,18 +239,18 @@ void radeon_test_ring_sync(struct radeon_device *rdev,

 	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	r = radeon_fence_emit(rdev, &fence1, ridxA);
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fence1, ringA->idx);
 	if (r) {
 		DRM_ERROR("Failed to emit fence 1\n");
 		radeon_ring_unlock_undo(rdev, ringA);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	r = radeon_fence_emit(rdev, &fence2, ridxA);
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fence2, ringA->idx);
 	if (r) {
 		DRM_ERROR("Failed to emit fence 2\n");
 		radeon_ring_unlock_undo(rdev, ringA);
@@ -272,7 +270,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringB);

 	r = radeon_fence_wait(fence1, false);
@@ -293,7 +291,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringB);

 	r = radeon_fence_wait(fence2, false);
@@ -322,9 +320,6 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 {
 	struct radeon_fence *fenceA = NULL, *fenceB = NULL;
 	struct radeon_semaphore *semaphore = NULL;
-	int ridxA = radeon_ring_index(rdev, ringA);
-	int ridxB = radeon_ring_index(rdev, ringB);
-	int ridxC = radeon_ring_index(rdev, ringC);
 	bool sigA, sigB;
 	int i, r;

@@ -336,11 +331,11 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,

 	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	r = radeon_fence_emit(rdev, &fenceA, ridxA);
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
 	if (r) {
 		DRM_ERROR("Failed to emit sync fence 1\n");
 		radeon_ring_unlock_undo(rdev, ringA);
@@ -350,11 +345,11 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,

 	r = radeon_ring_lock(rdev, ringB, 64);
 	if (r) {
-		DRM_ERROR("Failed to lock ring B %d\n", ridxB);
+		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxB, semaphore);
-	r = radeon_fence_emit(rdev, &fenceB, ridxB);
+	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
 	if (r) {
 		DRM_ERROR("Failed to create sync fence 2\n");
 		radeon_ring_unlock_undo(rdev, ringB);
@@ -378,7 +373,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringC);

 	for (i = 0; i < 30; ++i) {
@@ -404,7 +399,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringC);

 	mdelay(1000);

--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1765,18 +1765,31 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	u32 header;

-	if (ring->rptr_save_reg) {
-		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(ring, ((ring->rptr_save_reg - 
-					  PACKET3_SET_CONFIG_REG_START) >> 2));
-		radeon_ring_write(ring, next_rptr);
-	}
+	if (ib->is_const_ib) {
+		/* set switch buffer packet before const IB */
+		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		radeon_ring_write(ring, 0);

-	if (ib->is_const_ib)
 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
-	else
+	} else {
+		u32 next_rptr;
+		if (ring->rptr_save_reg) {
+			next_rptr = ring->wptr + 3 + 4 + 8;
+			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+			radeon_ring_write(ring, ((ring->rptr_save_reg -
+						  PACKET3_SET_CONFIG_REG_START) >> 2));
+			radeon_ring_write(ring, next_rptr);
+		} else if (rdev->wb.enabled) {
+			next_rptr = ring->wptr + 5 + 4 + 8;
+			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+			radeon_ring_write(ring, (1 << 8));
+			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+			radeon_ring_write(ring, next_rptr);
+		}
+
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+	}

 	radeon_ring_write(ring, header);
 	radeon_ring_write(ring,
@@ -1787,18 +1800,20 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));

-	/* flush read cache over gart for this vmid */
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(ring, ib->vm_id);
-	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
-			  PACKET3_TC_ACTION_ENA |
-			  PACKET3_SH_KCACHE_ACTION_ENA |
-			  PACKET3_SH_ICACHE_ACTION_ENA);
-	radeon_ring_write(ring, 0xFFFFFFFF);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 10); /* poll interval */
+	if (!ib->is_const_ib) {
+		/* flush read cache over gart for this vmid */
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, ib->vm_id);
+		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+				  PACKET3_TC_ACTION_ENA |
+				  PACKET3_SH_KCACHE_ACTION_ENA |
+				  PACKET3_SH_ICACHE_ACTION_ENA);
+		radeon_ring_write(ring, 0xFFFFFFFF);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, 10); /* poll interval */
+	}
 }

 /*

--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -901,5 +901,6 @@
 #define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
 #define	PACKET3_SET_CE_DE_COUNTERS			0x89
 #define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
+#define	PACKET3_SWITCH_BUFFER				0x8B

 #endif