diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 32807baf55e232ebb0bc1c35ee4775d49fb1af00..0040d28816f19a995238a0e6e2cf574da2a5f7a3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1143,6 +1143,8 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
+int radeon_gem_wait_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
 
 /* VRAM scratch page for HDP bug */
 struct r700_vram_scratch {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 14e8531511057e5ecca25da1f8fe0afd8fde45c1..f0b9066abc5c73cbed7314270522473d7c7f612e 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -80,7 +80,10 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].lobj.wdomain = r->write_domain;
 			p->relocs[i].lobj.rdomain = r->read_domains;
 			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
-			p->relocs[i].lobj.tv.usage = TTM_USAGE_READWRITE;
+			if (r->read_domains)
+				p->relocs[i].lobj.tv.usage |= TTM_USAGE_READ;
+			if (r->write_domain)
+				p->relocs[i].lobj.tv.usage |= TTM_USAGE_WRITE;
 			p->relocs[i].handle = r->handle;
 			p->relocs[i].flags = r->flags;
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e71d2ed7fa113126e7cacb06b1553f8767b948e6..bd187e097e77571ea960e9d15f0c4fe97e55c4b3 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -52,9 +52,10 @@
  *   2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query
  *   2.10.0 - fusion 2D tiling
  *   2.11.0 - backend map, initial compute support for the CS checker
+ *   2.12.0 - DRM_RADEON_GEM_WAIT ioctl
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	11
+#define KMS_DRIVER_MINOR	12
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index aa1ca2dea42f1623f09c062e4c9171e220c75c0d..2edc2a40d4d7b5b0b220ff225bc6e1f9200ed79b 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -122,7 +122,7 @@ int radeon_gem_set_domain(struct drm_gem_object *gobj,
 	}
 	if (domain == RADEON_GEM_DOMAIN_CPU) {
 		/* Asking for cpu access wait for object idle */
-		r = radeon_bo_wait(robj, NULL, false);
+		r = radeon_bo_wait(robj, NULL, false, TTM_USAGE_READWRITE);
 		if (r) {
 			printk(KERN_ERR "Failed to wait for object !\n");
 			return r;
@@ -273,7 +273,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, &cur_placement, true);
+	r = radeon_bo_wait(robj, &cur_placement, true, TTM_USAGE_READWRITE);
 	switch (cur_placement) {
 	case TTM_PL_VRAM:
 		args->domain = RADEON_GEM_DOMAIN_VRAM;
@@ -303,7 +303,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, NULL, false);
+	r = radeon_bo_wait(robj, NULL, false, TTM_USAGE_READWRITE);
 	/* callback hw specific functions if any */
 	if (robj->rdev->asic->ioctl_wait_idle)
 		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
@@ -311,6 +311,36 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	return r;
 }
 
+int radeon_gem_wait_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	struct drm_radeon_gem_wait *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *robj;
+	bool no_wait = (args->flags & RADEON_GEM_NO_WAIT) != 0;
+	enum ttm_buffer_usage usage = 0;
+	int r;
+
+	if (args->flags & RADEON_GEM_USAGE_READ)
+		usage |= TTM_USAGE_READ;
+	if (args->flags & RADEON_GEM_USAGE_WRITE)
+		usage |= TTM_USAGE_WRITE;
+	if (!usage)
+		usage = TTM_USAGE_READWRITE;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -ENOENT;
+	}
+	robj = gem_to_radeon_bo(gobj);
+	r = radeon_bo_wait(robj, NULL, no_wait, usage);
+	/* callback hw specific functions if any */
+	if (!no_wait && robj->rdev->asic->ioctl_wait_idle)
+		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
+	drm_gem_object_unreference_unlocked(gobj);
+	return r;
+}
+
 int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index be2c1224e68ae073f36a971f531d052a8bf2a9e5..a749c262663fa875a6ba6e6357901f0e0f4ebe63 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -451,5 +451,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT, radeon_gem_wait_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index e9dc8b249c5f8598747eb1306eb5f64bc9e7ccbc..a057a8e5a6e685d6666f9df6108832c2cf24e89b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -119,7 +119,7 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 }
 
 static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-					bool no_wait)
+				 bool no_wait, enum ttm_buffer_usage usage)
 {
 	int r;
 
@@ -130,7 +130,7 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
 	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, TTM_USAGE_READWRITE);
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
 	spin_unlock(&bo->tbo.bdev->fence_lock);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b65be6054a183efe7b0ea5daa7c8afd7b1aa6a72..939b8547cc2653407e4fafae82faafcd25463012 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -509,6 +509,7 @@ typedef struct {
 #define DRM_RADEON_GEM_SET_TILING	0x28
 #define DRM_RADEON_GEM_GET_TILING	0x29
 #define DRM_RADEON_GEM_BUSY		0x2a
+#define DRM_RADEON_GEM_WAIT		0x2b
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -550,6 +551,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
 #define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
+#define DRM_IOCTL_RADEON_GEM_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT, struct drm_radeon_gem_wait)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -846,6 +848,15 @@ struct drm_radeon_gem_busy {
 	uint32_t        domain;
 };
 
+#define RADEON_GEM_NO_WAIT	0x1
+#define RADEON_GEM_USAGE_READ	0x2
+#define RADEON_GEM_USAGE_WRITE	0x4
+
+struct drm_radeon_gem_wait {
+	uint32_t	handle;
+	uint32_t        flags;  /* one of RADEON_GEM_* */
+};
+
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
 	uint32_t handle;