提交 e2a5905e 编写于 作者: S sweetsky0901

gpu test ok unpool2dmax

上级 abb3357d
...@@ -37,8 +37,6 @@ class Unpool2dMaxFunctor<platform::CPUPlace, T> { ...@@ -37,8 +37,6 @@ class Unpool2dMaxFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T * indices_data = indices.data<T>(); const T * indices_data = indices.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
memset(output_data, 0, \
sizeof(T) * output_feasize * output_channels * batch_size);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int i = 0; i < input_feasize; ++i) { for (int i = 0; i < input_feasize; ++i) {
......
...@@ -22,41 +22,56 @@ namespace math { ...@@ -22,41 +22,56 @@ namespace math {
template <typename T> template <typename T>
__global__ void KernelUnpool2dMax(const int nthreads, __global__ void KernelUnpool2dMax(const int nthreads,
const T* input_data, const T* input_data,
const int* indices_data, const T* indices_data,
const int input_height, const int input_height,
const int input_width, const int input_width,
const int channels,
T* output_data, T* output_data,
const int output_height, const int output_height,
const int output_width) { const int output_width) {
int bsize = input_height * input_width * channels;
int csize = input_height * input_width;
int out_bsize = output_height * output_width * channels;
int out_csize = output_height * output_width;
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x; int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) { for (int i = index; i < nthreads; i += offset) {
int out_offset = i / (input_height * input_width) \ int bidx = i / bsize;
* output_height * output_width; int boffset = i % bsize;
int cidx = boffset / csize;
int out_offset = bidx * out_bsize + cidx * out_csize;
int out_index = indices_data[i]; int out_index = indices_data[i];
PADDLE_ASSERT(out_index < (output_height * output_width)); PADDLE_ASSERT(out_index < (output_height * output_width));
printf("-------%d------[%f]\n", out_offset + out_index, input_data[i]);
output_data[out_offset + out_index] = input_data[i]; output_data[out_offset + out_index] = input_data[i];
} }
} }
template <typename T> template <typename T>
__global__ void KernelUnpool2dMaxGrad(const int nthreads, __global__ void KernelUnpool2dMaxGrad(const int nthreads,
const T* input_data, const T* input_data,
const int* indices_data, const T* indices_data,
const int input_height, const int input_height,
const int input_width, const int input_width,
const int channels,
const T* output_data, const T* output_data,
const T* output_grad, const T* output_grad,
const int output_height, const int output_height,
const int output_width, const int output_width,
T* input_grad) { T* input_grad) {
int bsize = input_height * input_width * channels;
int csize = input_height * input_width;
int out_bsize = output_height * output_width * channels;
int out_csize = output_height * output_width;
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x; int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) { for (int i = index; i < nthreads; i += offset) {
int out_offset = i / (input_height * input_width) \ int bidx = i / bsize;
* output_height * output_width; int boffset = i % bsize;
int out_index = indices_data[i]; int cidx = boffset / csize;
PADDLE_ASSERT(out_index < (output_height * output_width)); int out_offset = bidx * out_bsize + cidx * out_csize;
input_grad[i] = output_grad[out_offset + out_index]; int out_index = indices_data[i];
PADDLE_ASSERT(out_index < (output_height * output_width));
input_grad[i] = output_grad[out_offset + out_index];
} }
} }
/* /*
...@@ -78,8 +93,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> { ...@@ -78,8 +93,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* indices_data = indices.data<T>(); const T* indices_data = indices.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * input_height * input_width;
int nthreads = output->numel();
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
...@@ -88,7 +102,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> { ...@@ -88,7 +102,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, indices_data, .stream()>>>(nthreads, input_data, indices_data,
input_height, input_width, input_height, input_width, output_channels,
output_data, output_height, output_width); output_data, output_height, output_width);
} }
}; };
...@@ -115,7 +129,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> { ...@@ -115,7 +129,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> {
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = output.numel(); int nthreads = batch_size * output_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
...@@ -125,7 +139,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> { ...@@ -125,7 +139,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> {
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, indices_data, nthreads, input_data, indices_data,
input_height, input_width, input_height, input_width, output_channels,
output_data, output_grad_data, output_data, output_grad_data,
output_height, output_width, output_height, output_width,
input_grad_data); input_grad_data);
......
...@@ -21,9 +21,6 @@ namespace paddle { ...@@ -21,9 +21,6 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
#define FLT_MAX \
__FLT_MAX__
template <typename Place, typename T> template <typename Place, typename T>
class Unpool2dMaxFunctor { class Unpool2dMaxFunctor {
......
...@@ -108,9 +108,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel { ...@@ -108,9 +108,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
// PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null.");
// PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
// "Input(Out@GRAD) should not be null");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null."); "Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
......
...@@ -29,11 +29,16 @@ class UnpoolKernel : public framework::OpKernel<T> { ...@@ -29,11 +29,16 @@ class UnpoolKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X"); const Tensor* in_x = context.Input<Tensor>("X");
const Tensor* in_y = context.Input<Tensor>("Y"); const Tensor* in_y = context.Input<Tensor>("Y");
Tensor* out = context.Output<Tensor>("Out"); auto * out = context.Output<Tensor>("Out");
std::string unpoolingtype = context.Attr<std::string>("unpoolingtype"); std::string unpoolingtype = context.Attr<std::string>("unpoolingtype");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize"); std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
T* output_data = out->mutable_data<T>(context.GetPlace());
if (output_data) {
math::SetConstant<Place, T> set_zero;
set_zero(context.device_context(), out, static_cast<T>(0));
}
switch (ksize.size()) { switch (ksize.size()) {
case 2: { case 2: {
if (unpoolingtype == "max") { if (unpoolingtype == "max") {
...@@ -66,7 +71,7 @@ class UnpoolGradKernel : public framework::OpKernel<T> { ...@@ -66,7 +71,7 @@ class UnpoolGradKernel : public framework::OpKernel<T> {
if (in_x_grad) { if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace()); in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0.0)); zero(device_ctx, in_x_grad, static_cast<T>(0.0));
} }
switch (ksize.size()) { switch (ksize.size()) {
case 2: { case 2: {
if (unpoolingtype == "max") { if (unpoolingtype == "max") {
......
...@@ -54,6 +54,8 @@ class TestUnpoolOp(OpTest): ...@@ -54,6 +54,8 @@ class TestUnpoolOp(OpTest):
self.outputs = {'Out': output.astype('float32')} self.outputs = {'Out': output.astype('float32')}
def test_check_output(self): def test_check_output(self):
print self.inputs['X']
print self.inputs['Y']
print self.outputs['Out'] print self.outputs['Out']
self.check_output() self.check_output()
...@@ -63,7 +65,7 @@ class TestUnpoolOp(OpTest): ...@@ -63,7 +65,7 @@ class TestUnpoolOp(OpTest):
def init_test_case(self): def init_test_case(self):
self.Unpool2d_forward_naive = unpool2dmax_forward_naive self.Unpool2d_forward_naive = unpool2dmax_forward_naive
self.unpoolingtype = "max" self.unpoolingtype = "max"
self.shape = [10, 2, 5, 5] self.shape = [6, 4, 5, 5]
self.ksize = [3, 3] self.ksize = [3, 3]
self.strides = [2, 2] self.strides = [2, 2]
self.paddings = [0, 0] self.paddings = [0, 0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册