提交 e2a5905e 编写于 作者: S sweetsky0901

gpu test ok unpool2dmax

上级 abb3357d
......@@ -37,8 +37,6 @@ class Unpool2dMaxFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>();
const T * indices_data = indices.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
memset(output_data, 0, \
sizeof(T) * output_feasize * output_channels * batch_size);
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) {
for (int i = 0; i < input_feasize; ++i) {
......
......@@ -22,41 +22,56 @@ namespace math {
template <typename T>
__global__ void KernelUnpool2dMax(const int nthreads,
const T* input_data,
const int* indices_data,
const T* indices_data,
const int input_height,
const int input_width,
const int channels,
T* output_data,
const int output_height,
const int output_width) {
int bsize = input_height * input_width * channels;
int csize = input_height * input_width;
int out_bsize = output_height * output_width * channels;
int out_csize = output_height * output_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int out_offset = i / (input_height * input_width) \
* output_height * output_width;
int bidx = i / bsize;
int boffset = i % bsize;
int cidx = boffset / csize;
int out_offset = bidx * out_bsize + cidx * out_csize;
int out_index = indices_data[i];
PADDLE_ASSERT(out_index < (output_height * output_width));
printf("-------%d------[%f]\n", out_offset + out_index, input_data[i]);
output_data[out_offset + out_index] = input_data[i];
}
}
template <typename T>
__global__ void KernelUnpool2dMaxGrad(const int nthreads,
const T* input_data,
const int* indices_data,
const T* indices_data,
const int input_height,
const int input_width,
const int channels,
const T* output_data,
const T* output_grad,
const int output_height,
const int output_width,
T* input_grad) {
int bsize = input_height * input_width * channels;
int csize = input_height * input_width;
int out_bsize = output_height * output_width * channels;
int out_csize = output_height * output_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int out_offset = i / (input_height * input_width) \
* output_height * output_width;
int out_index = indices_data[i];
PADDLE_ASSERT(out_index < (output_height * output_width));
input_grad[i] = output_grad[out_offset + out_index];
int bidx = i / bsize;
int boffset = i % bsize;
int cidx = boffset / csize;
int out_offset = bidx * out_bsize + cidx * out_csize;
int out_index = indices_data[i];
PADDLE_ASSERT(out_index < (output_height * output_width));
input_grad[i] = output_grad[out_offset + out_index];
}
}
/*
......@@ -78,8 +93,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>();
const T* indices_data = indices.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = output->numel();
int nthreads = batch_size * output_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
......@@ -88,7 +102,7 @@ class Unpool2dMaxFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, indices_data,
input_height, input_width,
input_height, input_width, output_channels,
output_data, output_height, output_width);
}
};
......@@ -115,7 +129,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> {
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = output.numel();
int nthreads = batch_size * output_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
......@@ -125,7 +139,7 @@ class Unpool2dMaxGradFunctor<platform::GPUPlace, T> {
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, indices_data,
input_height, input_width,
input_height, input_width, output_channels,
output_data, output_grad_data,
output_height, output_width,
input_grad_data);
......
......@@ -21,9 +21,6 @@ namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX \
__FLT_MAX__
template <typename Place, typename T>
class Unpool2dMaxFunctor {
......
......@@ -108,9 +108,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
// PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null.");
// PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
// "Input(Out@GRAD) should not be null");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
......
......@@ -29,11 +29,16 @@ class UnpoolKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
const Tensor* in_y = context.Input<Tensor>("Y");
Tensor* out = context.Output<Tensor>("Out");
auto * out = context.Output<Tensor>("Out");
std::string unpoolingtype = context.Attr<std::string>("unpoolingtype");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
T* output_data = out->mutable_data<T>(context.GetPlace());
if (output_data) {
math::SetConstant<Place, T> set_zero;
set_zero(context.device_context(), out, static_cast<T>(0));
}
switch (ksize.size()) {
case 2: {
if (unpoolingtype == "max") {
......@@ -66,7 +71,7 @@ class UnpoolGradKernel : public framework::OpKernel<T> {
if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0.0));
}
}
switch (ksize.size()) {
case 2: {
if (unpoolingtype == "max") {
......
......@@ -54,6 +54,8 @@ class TestUnpoolOp(OpTest):
self.outputs = {'Out': output.astype('float32')}
def test_check_output(self):
print self.inputs['X']
print self.inputs['Y']
print self.outputs['Out']
self.check_output()
......@@ -63,7 +65,7 @@ class TestUnpoolOp(OpTest):
def init_test_case(self):
self.Unpool2d_forward_naive = unpool2dmax_forward_naive
self.unpoolingtype = "max"
self.shape = [10, 2, 5, 5]
self.shape = [6, 4, 5, 5]
self.ksize = [3, 3]
self.strides = [2, 2]
self.paddings = [0, 0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册