未验证 提交 2660ea37 编写于 作者: W whs 提交者: GitHub

Fix cuda kernel of affine grid (#27003)

test=develop
上级 b6fb9798
...@@ -62,11 +62,11 @@ __global__ void affine_grid_kernel(const int count, int n, int out_h, int out_w, ...@@ -62,11 +62,11 @@ __global__ void affine_grid_kernel(const int count, int n, int out_h, int out_w,
int theta_offset = n * 6; // 2 * 3; int theta_offset = n * 6; // 2 * 3;
// affine from (h_coor, w_coor) to (x, y) // affine from (h_coor, w_coor) to (x, y)
output[index * 2] = theta[theta_offset] * h_coor + output[index * 2] = theta[theta_offset] * w_coor +
theta[theta_offset + 1] * w_coor + theta[theta_offset + 1] * h_coor +
theta[theta_offset + 2]; theta[theta_offset + 2];
output[index * 2 + 1] = theta[theta_offset + 3] * h_coor + output[index * 2 + 1] = theta[theta_offset + 3] * w_coor +
theta[theta_offset + 4] * w_coor + theta[theta_offset + 4] * h_coor +
theta[theta_offset + 5]; theta[theta_offset + 5];
} }
} }
...@@ -86,13 +86,13 @@ __global__ void affine_grid_grad_kernel(const int count, int n, int out_h, ...@@ -86,13 +86,13 @@ __global__ void affine_grid_grad_kernel(const int count, int n, int out_h,
int theta_offset = n * 6; // 2 * 3; int theta_offset = n * 6; // 2 * 3;
T out_grad_x = out_grad[index * 2]; T out_grad_x = out_grad[index * 2];
platform::CudaAtomicAdd(theta_grad + theta_offset, out_grad_x * h_coor); platform::CudaAtomicAdd(theta_grad + theta_offset, out_grad_x * w_coor);
platform::CudaAtomicAdd(theta_grad + theta_offset + 1, out_grad_x * w_coor); platform::CudaAtomicAdd(theta_grad + theta_offset + 1, out_grad_x * h_coor);
platform::CudaAtomicAdd(theta_grad + theta_offset + 2, out_grad_x); platform::CudaAtomicAdd(theta_grad + theta_offset + 2, out_grad_x);
T out_grad_y = out_grad[index * 2 + 1]; T out_grad_y = out_grad[index * 2 + 1];
platform::CudaAtomicAdd(theta_grad + theta_offset + 3, out_grad_y * h_coor); platform::CudaAtomicAdd(theta_grad + theta_offset + 3, out_grad_y * w_coor);
platform::CudaAtomicAdd(theta_grad + theta_offset + 4, out_grad_y * w_coor); platform::CudaAtomicAdd(theta_grad + theta_offset + 4, out_grad_y * h_coor);
platform::CudaAtomicAdd(theta_grad + theta_offset + 5, out_grad_y); platform::CudaAtomicAdd(theta_grad + theta_offset + 5, out_grad_y);
} }
} }
......
...@@ -49,7 +49,6 @@ class TestAffineGridOp(OpTest): ...@@ -49,7 +49,6 @@ class TestAffineGridOp(OpTest):
self.initTestCase() self.initTestCase()
self.op_type = "affine_grid" self.op_type = "affine_grid"
theta = np.random.randint(1, 3, self.theta_shape).astype("float32") theta = np.random.randint(1, 3, self.theta_shape).astype("float32")
theta = np.ones(self.theta_shape).astype("float32")
self.inputs = {'Theta': theta} self.inputs = {'Theta': theta}
self.attrs = { self.attrs = {
"use_cudnn": self.use_cudnn, "use_cudnn": self.use_cudnn,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册