diff --git a/AUTHORS.md b/AUTHORS.md index 54a1097b50f7a09062f8987e62db6b5f5e89e0b7..deafa641203ed9d9bd794fe92e4a91e3aaa03f63 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -25,6 +25,7 @@ | kexinzhao | Ke-Xin Zhao | | kuke | Yi-Bing Liu | | lcy-seso | Ying Cao | +| cjld | Dun Liang | | lipeng-unisound | Peng Li | | liuyuan | Yuan Liu | | livc | Zhao Li | diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index da8941c351571a8ff43974321490065079c2c0b4..541c4db1fa0914b657b3553ea20114f4bbe74464 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) +paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)) paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6322659b67f6aeaeae3e29135fd52e08bf21ead1 --- /dev/null +++ b/paddle/fluid/operators/group_norm_op.cc @@ -0,0 +1,162 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/group_norm_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +class GroupNormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Y"), + "Output(Y) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Mean"), + "Output(Mean) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Variance"), + "Output(Variance) of GroupNormOp should not be null."); + + auto x_dim = ctx->GetInputDim("X"); + auto channel_num = x_dim[1]; + auto batch_size = x_dim[0]; + auto groups = ctx->Attrs().Get("groups"); + PADDLE_ENFORCE_LE( + groups, channel_num, + "'groups' must be less equal than the number of channels."); + PADDLE_ENFORCE_GE(groups, 1, "'groups' must be greater equal than 1."); + + if (ctx->HasInput("Scale")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], channel_num); + } + if (ctx->HasInput("Bias")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], channel_num); + } + + ctx->SetOutputDim("Y", ctx->GetInputDim("X")); + ctx->SetOutputDim("Mean", {batch_size, groups}); + ctx->SetOutputDim("Variance", {batch_size, groups}); + ctx->ShareLoD("X", "Y"); + } +}; + +class GroupNormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The input tensor."); + AddInput("Scale", + "Scale is a 1-dimensional tensor of size C" + "that is applied to the output.") + .AsDispensable(); + AddInput("Bias", + "Bias is a 1-dimensional tensor of size C " + "that is applied to the output") + .AsDispensable(); + AddOutput("Y", "Result after normalization."); + AddOutput("Mean", "Mean of each group.").AsIntermediate(); + AddOutput("Variance", "Variance of each group.").AsIntermediate(); + + AddAttr("epsilon", + "Constant for numerical stability [default 1e-5].") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 1.0f, + "'epsilon' should be between 0.0 and 1.0."); + }); + AddAttr("groups", "The number of groups that divided from channels.") + .AddCustomChecker([](const int &groups) { + PADDLE_ENFORCE_GT(groups, 0, "'groups' should be greater than zero."); + }); + + AddComment(R"DOC( +Group Normalization + +Refer to `Group Normalization `_ +)DOC"); + } +}; + +class GroupNormGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + // check input + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Mean"), + "Input(Mean) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Variance"), + "Input(Variance) of GroupNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) of GroupNormOp should not be null."); + + // check output + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } + if (ctx->HasOutput(framework::GradVarName("Scale"))) { + ctx->SetOutputDim(framework::GradVarName("Scale"), + ctx->GetInputDim("Scale")); + } + if (ctx->HasOutput(framework::GradVarName("Bias"))) { + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Bias")); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + return framework::OpKernelType(framework::ToDataType(t->type()), + ctx.GetPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(group_norm, ops::GroupNormOp, ops::GroupNormOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(group_norm_grad, ops::GroupNormGradOp); +REGISTER_OP_CPU_KERNEL( + group_norm, ops::GroupNormKernel, + ops::GroupNormKernel); +REGISTER_OP_CPU_KERNEL( + group_norm_grad, + ops::GroupNormGradKernel, + ops::GroupNormGradKernel); diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..27174630227c8123a31cb1c70d5eb5f5b3ee5107 --- /dev/null +++ b/paddle/fluid/operators/group_norm_op.cu @@ -0,0 +1,292 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/operators/group_norm_op.h" + +namespace paddle { +namespace operators { + +template +__global__ void GroupNormForwardGetMeanAndVar(const T* x, int N, int C, + int imsize, int groups, + int group_size, T* mean, T* var) { + int gid = blockIdx.y; + int cid = blockIdx.x; + int bid = blockIdx.z; + int number = min(group_size, static_cast(C - gid * group_size)); + int ccid = gid * group_size + cid; + if (ccid >= C) return; + T x_mean = 0, x_var = 0; + for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) { + T val = x[(bid * C + ccid) * imsize + imid]; + x_mean += val; + x_var += val * val; + } + x_mean /= number * imsize; + x_var /= number * imsize; + __shared__ T s_mem[2]; + if (threadIdx.x == 0) { + s_mem[0] = s_mem[1] = 0; + } + __syncthreads(); + paddle::platform::CudaAtomicAdd(&s_mem[0], x_mean); + paddle::platform::CudaAtomicAdd(&s_mem[1], x_var); + __syncthreads(); + if (threadIdx.x == 0) { + paddle::platform::CudaAtomicAdd(&mean[bid * groups + gid], s_mem[0]); + paddle::platform::CudaAtomicAdd(&var[bid * groups + gid], s_mem[1]); + } +} + +template +__global__ void GroupNormForward(const T* x, const T* mean, const T* var, + const T* scale, const T* bias, int N, int C, + int imsize, int groups, int group_size, + T epsilon, T* y, T* real_var) { + int gid = blockIdx.y; + int cid = blockIdx.x; + int bid = blockIdx.z; + int ccid = gid * group_size + cid; + if (ccid >= C) return; + T x_mean = mean[bid * groups + gid]; + T x_var = var[bid * groups + gid]; + x_var = x_var - x_mean * x_mean; + T var_inv = 1.0 / sqrt(x_var + epsilon); + if (cid == 0 && threadIdx.x == 0) real_var[bid * groups + gid] = x_var; + for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) { + T val = x[(bid * C + ccid) * imsize + imid]; + val = (val - x_mean) * var_inv; + if (scale) val *= scale[gid * group_size + cid]; + if (bias) val += bias[gid * group_size + cid]; + y[(bid * C + ccid) * imsize + imid] = val; + } +} + +template +class GroupNormKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); + + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); + const auto groups = ctx.Attr("groups"); + + const auto x_dims = x->dims(); + const int group_size = (x_dims[1] - 1) / groups + 1; + + y->mutable_data(ctx.GetPlace()); + mean->mutable_data(ctx.GetPlace()); + var->mutable_data(ctx.GetPlace()); + math::SetConstant set_zero; + auto& dev_ctx = ctx.template device_context(); + Tensor temp_var; + temp_var.mutable_data(var->dims(), ctx.GetPlace()); + + set_zero(dev_ctx, mean, static_cast(0)); + set_zero(dev_ctx, &temp_var, static_cast(0)); + + auto* x_data = x->data(); + auto* y_data = y->data(); + auto* mean_data = mean->data(); + auto* var_data = var->data(); + auto* temp_var_data = temp_var.data(); + + const T* scale_data = nullptr; + if (scale) scale_data = scale->data(); + const T* bias_data = nullptr; + if (bias) bias_data = bias->data(); + + int imsize = x_dims[2] * x_dims[3]; + int block_size = std::min(512, imsize); + dim3 grid(group_size, groups, x_dims[0]); + dim3 threads(block_size, 1, 1); + GroupNormForwardGetMeanAndVar<<>>( + x_data, x_dims[0], x_dims[1], imsize, groups, group_size, mean_data, + temp_var_data); + GroupNormForward<<>>( + x_data, mean_data, temp_var_data, scale_data, bias_data, x_dims[0], + x_dims[1], imsize, groups, group_size, epsilon, y_data, var_data); + } +}; + +template +__global__ void GroupNormBackwardGetMeanAndVar( + const T* x, const T* mean, const T* var, const T* scale, const T* d_y, + int N, int C, int imsize, int groups, int group_size, T epsilon, T* d_x, + T* d_mean, T* d_var, T* d_scale, T* d_bias) { + int gid = blockIdx.y; + int cid = blockIdx.x; + int bid = blockIdx.z; + int number = min(group_size, static_cast(C - gid * group_size)); + int ccid = gid * group_size + cid; + if (ccid >= C) return; + T x_mean = mean[bid * groups + gid]; + T x_var = var[bid * groups + gid]; + T var_inv = 1.0 / sqrt(x_var + epsilon); + T d_var_inv = 0, d_x_mean = 0; + T d_mean_data = 0, d_var_data = 0, d_scale_data = 0, d_bias_data = 0; + + for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) { + T tmp = x[(bid * C + ccid) * imsize + imid]; + T val = (tmp - x_mean) * var_inv; + T dval = d_y[(bid * C + ccid) * imsize + imid]; + if (d_bias) d_bias_data += dval; + if (d_scale) d_scale_data += val * dval; + if (scale) dval = dval * scale[ccid]; + d_var_data += (tmp - x_mean) * dval; + T d_tmp = dval * var_inv; + if (d_x) d_x[(bid * C + ccid) * imsize + imid] = d_tmp; + d_mean_data -= d_tmp; + } + + __shared__ T s_mem[4]; + if (threadIdx.x == 0) { + s_mem[0] = s_mem[1] = 0; + if (d_scale) s_mem[2] = 0; + if (d_bias) s_mem[3] = 0; + } + __syncthreads(); + paddle::platform::CudaAtomicAdd(&s_mem[0], d_mean_data); + paddle::platform::CudaAtomicAdd(&s_mem[1], d_var_data); + if (d_scale) paddle::platform::CudaAtomicAdd(&s_mem[2], d_scale_data); + if (d_bias) paddle::platform::CudaAtomicAdd(&s_mem[3], d_bias_data); + __syncthreads(); + if (threadIdx.x == 0) { + paddle::platform::CudaAtomicAdd(&d_mean[bid * groups + gid], s_mem[0]); + paddle::platform::CudaAtomicAdd(&d_var[bid * groups + gid], s_mem[1]); + if (d_scale) paddle::platform::CudaAtomicAdd(&d_scale[ccid], s_mem[2]); + if (d_bias) paddle::platform::CudaAtomicAdd(&d_bias[ccid], s_mem[3]); + } +} + +template +__global__ void GroupNormBackward(const T* x, const T* mean, const T* var, + const T* d_mean, const T* d_var, int N, int C, + int imsize, int groups, int group_size, + T epsilon, T* d_x) { + int gid = blockIdx.y; + int cid = blockIdx.x; + int bid = blockIdx.z; + int number = min(group_size, static_cast(C - gid * group_size)); + int ccid = gid * group_size + cid; + if (ccid >= C) return; + T x_mean = mean[bid * groups + gid]; + T x_var = var[bid * groups + gid]; + T d_x_mean = d_mean[bid * groups + gid]; + T d_var_inv = d_var[bid * groups + gid]; + + T d_x_var = + -1.0 / (2 * (x_var + epsilon) * sqrt(x_var + epsilon)) * d_var_inv; + d_x_mean -= 2 * d_x_var * x_mean; + d_x_var /= number * imsize; + d_x_mean /= number * imsize; + for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) { + T tmp = x[(bid * C + ccid) * imsize + imid]; + if (d_x) + d_x[(bid * C + ccid) * imsize + imid] += d_x_mean + tmp * 2 * d_x_var; + } +} + +template +class GroupNormGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + auto* x = ctx.Input("X"); + auto* mean = ctx.Input("Mean"); + auto* var = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); + const auto groups = ctx.Attr("groups"); + + // init output + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_scale = ctx.Output(framework::GradVarName("Scale")); + auto* d_bias = ctx.Output(framework::GradVarName("Bias")); + + const auto& x_dims = x->dims(); + const int group_size = (x_dims[1] - 1) / groups + 1; + + T* d_x_data = nullptr; + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + d_x_data = d_x->data(); + } + math::SetConstant set_zero; + auto& dev_ctx = ctx.template device_context(); + + Tensor temp_var; + temp_var.mutable_data(var->dims(), ctx.GetPlace()); + set_zero(dev_ctx, &temp_var, static_cast(0)); + T* temp_var_data = temp_var.data(); + + Tensor temp_mean; + temp_mean.mutable_data(var->dims(), ctx.GetPlace()); + set_zero(dev_ctx, &temp_mean, static_cast(0)); + T* temp_mean_data = temp_mean.data(); + + auto* x_data = x->data(); + auto* y_data = d_y->data(); + auto* mean_data = mean->data(); + auto* var_data = var->data(); + T* d_scale_data = nullptr; + if (d_scale) { + d_scale->mutable_data(ctx.GetPlace()); + set_zero(dev_ctx, d_scale, static_cast(0)); + d_scale_data = d_scale->data(); + } + T* d_bias_data = nullptr; + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + set_zero(dev_ctx, d_bias, static_cast(0)); + d_bias_data = d_bias->data(); + } + + const T* scale_data = nullptr; + if (scale) scale_data = scale->data(); + + int imsize = x_dims[2] * x_dims[3]; + int block_size = std::min(512, imsize); + dim3 grid(group_size, groups, x_dims[0]); + dim3 threads(block_size, 1, 1); + GroupNormBackwardGetMeanAndVar<<>>( + x_data, mean_data, var_data, scale_data, y_data, x_dims[0], x_dims[1], + imsize, groups, group_size, epsilon, d_x_data, temp_mean_data, + temp_var_data, d_scale_data, d_bias_data); + GroupNormBackward<<>>( + x_data, mean_data, var_data, temp_mean_data, temp_var_data, x_dims[0], + x_dims[1], imsize, groups, group_size, epsilon, d_x_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + group_norm, + ops::GroupNormKernel, + ops::GroupNormKernel); +REGISTER_OP_CUDA_KERNEL( + group_norm_grad, + ops::GroupNormGradKernel, + ops::GroupNormGradKernel); diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..3d6c6a46a9662e3b99b4e080b424b4794db7fcc3 --- /dev/null +++ b/paddle/fluid/operators/group_norm_op.h @@ -0,0 +1,197 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +template +class GroupNormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); + + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); + const auto groups = ctx.Attr("groups"); + + const auto x_dims = x->dims(); + const int group_size = (x_dims[1] - 1) / groups + 1; + + y->mutable_data(ctx.GetPlace()); + mean->mutable_data(ctx.GetPlace()); + var->mutable_data(ctx.GetPlace()); + + auto* x_data = x->data(); + auto* y_data = y->data(); + auto* mean_data = mean->data(); + auto* var_data = var->data(); + + const T* scale_data = nullptr; + if (scale) scale_data = scale->data(); + const T* bias_data = nullptr; + if (bias) bias_data = bias->data(); + + int imsize = x_dims[2] * x_dims[3]; + auto* iter_x_data = x_data; + auto* iter_y_data = y_data; + for (int bid = 0; bid < x_dims[0]; bid++) + for (int gid = 0; gid < groups; gid++) { + T x_mean = 0, x_var = 0; + int number = std::min(group_size, + static_cast(x_dims[1] - gid * group_size)); + auto* tmp = iter_x_data; + for (int cid = 0; cid < number; cid++) { + for (int imid = 0; imid < imsize; imid++, iter_x_data++) { + x_mean += iter_x_data[0]; + x_var += iter_x_data[0] * iter_x_data[0]; + } + } + x_mean /= number * imsize; + x_var /= number * imsize; + x_var = x_var - x_mean * x_mean; + T var_inv = 1.0 / sqrt(x_var + epsilon); + mean_data[bid * groups + gid] = x_mean; + var_data[bid * groups + gid] = x_var; + for (int cid = 0; cid < number; cid++) { + for (int imid = 0; imid < imsize; imid++, tmp++, iter_y_data++) { + T val = (tmp[0] - x_mean) * var_inv; + if (scale_data) val *= scale_data[gid * group_size + cid]; + if (bias_data) val += bias_data[gid * group_size + cid]; + iter_y_data[0] = val; + } + } + } + } +}; + +template +class GroupNormGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + auto* x = ctx.Input("X"); + auto* mean = ctx.Input("Mean"); + auto* var = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); + const auto groups = ctx.Attr("groups"); + + // init output + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_scale = ctx.Output(framework::GradVarName("Scale")); + auto* d_bias = ctx.Output(framework::GradVarName("Bias")); + + const auto& x_dims = x->dims(); + const int group_size = (x_dims[1] - 1) / groups + 1; + + // TODO(liangdun): need to check d_x is null + math::SetConstant set_zero; + auto& dev_ctx = ctx.template device_context(); + T* d_x_data = nullptr; + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + set_zero(dev_ctx, d_x, static_cast(0)); + d_x_data = d_x->data(); + } + + auto* x_data = x->data(); + auto* y_data = d_y->data(); + auto* mean_data = mean->data(); + auto* var_data = var->data(); + T* d_scale_data = nullptr; + if (d_scale) { + d_scale->mutable_data(ctx.GetPlace()); + set_zero(dev_ctx, d_scale, static_cast(0)); + d_scale_data = d_scale->data(); + } + T* d_bias_data = nullptr; + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + set_zero(dev_ctx, d_bias, static_cast(0)); + d_bias_data = d_bias->data(); + } + + const T* scale_data = nullptr; + if (scale) scale_data = scale->data(); + + int imsize = x_dims[2] * x_dims[3]; + auto* iter_x_data = x_data; + auto* iter_d_x_data = d_x_data; + auto* iter_y_data = y_data; + for (int bid = 0; bid < x_dims[0]; bid++) + for (int gid = 0; gid < groups; gid++) { + T x_mean = mean_data[bid * groups + gid]; + T x_var = var_data[bid * groups + gid]; + T var_inv = 1.0 / sqrt(x_var + epsilon); + int number = std::min(group_size, + static_cast(x_dims[1] - gid * group_size)); + auto* tmp = iter_x_data; + auto* tmp2 = iter_d_x_data; + T d_var_inv = 0, d_x_mean = 0; + for (int cid = 0; cid < number; cid++) { + for (int imid = 0; imid < imsize; + imid++, tmp++, iter_y_data++, iter_d_x_data++) { + T val = (tmp[0] - x_mean) * var_inv; + T dval = iter_y_data[0]; + if (d_bias_data) d_bias_data[gid * group_size + cid] += dval; + if (d_scale_data) + d_scale_data[gid * group_size + cid] += val * dval; + if (scale_data) dval = scale_data[gid * group_size + cid] * dval; + + d_var_inv += (tmp[0] - x_mean) * dval; + T d_tmp = dval * var_inv; + if (d_x_data) iter_d_x_data[0] += d_tmp; + d_x_mean -= d_tmp; + } + } + + T d_x_var = + -1.0 / (2 * (x_var + epsilon) * sqrt(x_var + epsilon)) * d_var_inv; + d_x_mean -= 2 * d_x_var * x_mean; + d_x_var /= number * imsize; + d_x_mean /= number * imsize; + + iter_d_x_data = tmp2; + + if (d_x_data) { + for (int cid = 0; cid < number; cid++) { + for (int imid = 0; imid < imsize; + imid++, iter_x_data++, iter_d_x_data++) { + iter_d_x_data[0] += d_x_mean; + iter_d_x_data[0] += iter_x_data[0] * 2 * d_x_var; + } + } + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/detail/activation_functions.h b/paddle/fluid/operators/math/detail/activation_functions.h index b127fbe8c8515e7fe57b07ea1d4291675ec4efca..2b3d38d95a18fad9b76e616cdf2cb6c3eb07da3a 100644 --- a/paddle/fluid/operators/math/detail/activation_functions.h +++ b/paddle/fluid/operators/math/detail/activation_functions.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/hostdevice.h" diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 6cc3a1739a5cc4d4e87679ec4e345edefba47a82..795800fd51763759c0f660e3eb60625afe669881 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -37,6 +37,7 @@ limitations under the License. */ #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" @@ -86,6 +87,9 @@ bool IsCompiledWithDIST() { } PYBIND11_PLUGIN(core) { + // Not used, just make sure cpu_info.cc is linked. + paddle::platform::CpuTotalPhysicalMemory(); + paddle::memory::allocation::UseAllocatorStrategyGFlag(); py::module m("core", "C++ core of PaddlePaddle"); diff --git a/python/paddle/fluid/contrib/utils/__init__.py b/python/paddle/fluid/contrib/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..df6d367782327f7b22e72ab88d6b6cc26c9d5bc9 --- /dev/null +++ b/python/paddle/fluid/contrib/utils/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +from . import hdfs_utils +from .hdfs_utils import * + +__all__ = hdfs_utils.__all__ diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..251665d85e166f4ebf66eced7a5889ee9fc23e08 --- /dev/null +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -0,0 +1,505 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""HDFS Utils""" + +import os +import subprocess +import multiprocessing +from datetime import datetime + +import re +import copy +import errno + +import logging + +__all__ = ["HDFSClient", "multi_download"] + +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') +_logger = logging.getLogger("hdfs_utils") +_logger.setLevel(logging.INFO) + + +class HDFSClient(object): + def __init__(self, hadoop_home, configs): + self.pre_commands = [] + hadoop_bin = '%s/bin/hadoop' % hadoop_home + self.pre_commands.append(hadoop_bin) + dfs = 'fs' + self.pre_commands.append(dfs) + + for k, v in configs.iteritems(): + config_command = '-D%s=%s' % (k, v) + self.pre_commands.append(config_command) + + def __run_hdfs_cmd(self, commands, retry_times=5): + whole_commands = copy.deepcopy(self.pre_commands) + whole_commands.extend(commands) + + print('Running system command: {0}'.format(' '.join(whole_commands))) + + ret_code = 0 + ret_out = None + ret_err = None + for x in range(retry_times + 1): + proc = subprocess.Popen( + whole_commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (output, errors) = proc.communicate() + ret_code, ret_out, ret_err = proc.returncode, output, errors + if ret_code: + _logger.warn( + 'Times: %d, Error running command: %s. Return code: %d, Error: %s' + % (x, ' '.join(whole_commands), proc.returncode, errors)) + else: + break + return ret_code, ret_out, ret_err + + def upload(self, hdfs_path, local_path, overwrite=False, retry_times=5): + """ + upload the local file to hdfs + args: + local_file_path: the local file path + remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) + return: + True or False + """ + assert hdfs_path is not None + assert local_path is not None and os.path.exists(local_path) + + if os.path.isdir(local_path): + _logger.warn( + "The Local path: {} is dir and I will support it later, return". + format(local_path)) + return + + base = os.path.basename(local_path) + if not self.is_exist(hdfs_path): + self.makedirs(hdfs_path) + else: + if self.is_exist(os.path.join(hdfs_path, base)): + if overwrite: + _logger.error( + "The HDFS path: {} is exist and overwrite is True, delete it". + format(hdfs_path)) + self.delete(hdfs_path) + else: + _logger.error( + "The HDFS path: {} is exist and overwrite is False, return". + format(hdfs_path)) + return False + + put_commands = ["-put", local_path, hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd(put_commands, + retry_times) + if returncode: + _logger.error("Put local path: {} to HDFS path: {} failed".format( + local_path, hdfs_path)) + return False + else: + _logger.info("Put local path: {} to HDFS path: {} successfully". + format(local_path, hdfs_path)) + return True + + def download(self, hdfs_path, local_path, overwrite=False, unzip=False): + """ + download from hdfs + args: + local_file_path: the local file path + remote_file_path: remote dir on hdfs + return: + True or False + """ + _logger.info('Downloading %r to %r.', hdfs_path, local_path) + _logger.info('Download of %s to %r complete.', hdfs_path, local_path) + + if not self.is_exist(hdfs_path): + print("HDFS path: {} do not exist".format(hdfs_path)) + return False + if self.is_dir(hdfs_path): + _logger.error( + "The HDFS path: {} is dir and I will support it later, return". + format(hdfs_path)) + + if os.path.exists(local_path): + base = os.path.basename(hdfs_path) + local_file = os.path.join(local_path, base) + if os.path.exists(local_file): + if overwrite: + os.remove(local_file) + else: + _logger.error( + "The Local path: {} is exist and overwrite is False, return". + format(local_file)) + return False + + self.make_local_dirs(local_path) + + download_commands = ["-get", hdfs_path, local_path] + returncode, output, errors = self.__run_hdfs_cmd(download_commands) + if returncode: + _logger.error("Get local path: {} from HDFS path: {} failed".format( + local_path, hdfs_path)) + return False + else: + _logger.info("Get local path: {} from HDFS path: {} successfully". + format(local_path, hdfs_path)) + return True + + def is_exist(self, hdfs_path=None): + """ + whether the remote hdfs path exists? + args: + remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) + fs_name: The default values are the same as in the job configuration + fs_ugi: The default values are the same as in the job configuration + return: + True or False + """ + exist_cmd = ['-test', '-e', hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd( + exist_cmd, retry_times=1) + + if returncode: + _logger.error("HDFS is_exist HDFS path: {} failed".format( + hdfs_path)) + return False + else: + _logger.info("HDFS is_exist HDFS path: {} successfully".format( + hdfs_path)) + return True + + def is_dir(self, hdfs_path=None): + """ + whether the remote hdfs path exists? + args: + remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) + fs_name: The default values are the same as in the job configuration + fs_ugi: The default values are the same as in the job configuration + return: + True or False + """ + + if not self.is_exist(hdfs_path): + return False + + dir_cmd = ['-test', '-d', hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd(dir_cmd, retry_times=1) + + if returncode: + _logger.error("HDFS path: {} failed is not a directory".format( + hdfs_path)) + return False + else: + _logger.info("HDFS path: {} successfully is a directory".format( + hdfs_path)) + return True + + def delete(self, hdfs_path): + """Remove a file or directory from HDFS. + + :param hdfs_path: HDFS path. + :param recursive: Recursively delete files and directories. By default, + this method will raise an :class:`HdfsError` if trying to delete a + non-empty directory. + + This function returns `True` if the deletion was successful and `False` if + no file or directory previously existed at `hdfs_path`. + + """ + _logger.info('Deleting %r.', hdfs_path) + + if not self.is_exist(hdfs_path): + _logger.warn("HDFS path: {} do not exist".format(hdfs_path)) + return True + + if self.is_dir(hdfs_path): + del_cmd = ['-rmr', hdfs_path] + else: + del_cmd = ['-rm', hdfs_path] + + returncode, output, errors = self.__run_hdfs_cmd(del_cmd, retry_times=0) + + if returncode: + _logger.error("HDFS path: {} delete files failure".format( + hdfs_path)) + return False + else: + _logger.info("HDFS path: {} delete files successfully".format( + hdfs_path)) + return True + + def rename(self, hdfs_src_path, hdfs_dst_path, overwrite=False): + """Move a file or folder. + + :param hdfs_src_path: Source path. + :param hdfs_dst_path: Destination path. If the path already exists and is + a directory, the source will be moved into it. If the path exists and is + a file, or if a parent destination directory is missing, this method will + raise an :class:`HdfsError`. + + """ + assert hdfs_src_path is not None + assert hdfs_dst_path is not None + + if not self.is_exist(hdfs_src_path): + _logger.info("HDFS path do not exist: {}".format(hdfs_src_path)) + if self.is_exist(hdfs_dst_path) and not overwrite: + _logger.error("HDFS path is exist: {} and overwrite=False".format( + hdfs_dst_path)) + + rename_command = ['-mv', hdfs_src_path, hdfs_dst_path] + returncode, output, errors = self.__run_hdfs_cmd( + rename_command, retry_times=1) + + if returncode: + _logger.error("HDFS rename path: {} to {} failed".format( + hdfs_src_path, hdfs_dst_path)) + return False + else: + _logger.info("HDFS rename path: {} to {} successfully".format( + hdfs_src_path, hdfs_dst_path)) + return True + + @staticmethod + def make_local_dirs(local_path): + try: + os.makedirs(local_path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + def makedirs(self, hdfs_path): + """Create a remote directory, recursively if necessary. + + :param hdfs_path: Remote path. Intermediate directories will be created + appropriately. + """ + _logger.info('Creating directories to %r.', hdfs_path) + assert hdfs_path is not None + + if self.is_exist(hdfs_path): + return + + mkdirs_commands = ['-mkdir', hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd( + mkdirs_commands, retry_times=1) + + if returncode: + _logger.error("HDFS mkdir path: {} failed".format(hdfs_path)) + return False + else: + _logger.error("HDFS mkdir path: {} successfully".format(hdfs_path)) + return True + + def ls(self, hdfs_path): + assert hdfs_path is not None + + if not self.is_exist(hdfs_path): + return [] + + ls_commands = ['-ls', hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd( + ls_commands, retry_times=1) + + if returncode: + _logger.error("HDFS list path: {} failed".format(hdfs_path)) + return [] + else: + _logger.info("HDFS list path: {} successfully".format(hdfs_path)) + + ret_lines = [] + regex = re.compile('\s+') + out_lines = output.strip().split("\n") + for line in out_lines: + re_line = regex.split(line) + if len(re_line) == 8: + ret_lines.append(re_line[7]) + return ret_lines + + def lsr(self, hdfs_path, only_file=True, sort=True): + def sort_by_time(v1, v2): + v1_time = datetime.strptime(v1[1], '%Y-%m-%d %H:%M') + v2_time = datetime.strptime(v2[1], '%Y-%m-%d %H:%M') + return v1_time > v2_time + + assert hdfs_path is not None + + if not self.is_exist(hdfs_path): + return [] + + ls_commands = ['-lsr', hdfs_path] + returncode, output, errors = self.__run_hdfs_cmd( + ls_commands, retry_times=1) + + if returncode: + _logger.error("HDFS list all files: {} failed".format(hdfs_path)) + return [] + else: + _logger.info("HDFS list all files: {} successfully".format( + hdfs_path)) + lines = [] + regex = re.compile('\s+') + out_lines = output.strip().split("\n") + for line in out_lines: + re_line = regex.split(line) + if len(re_line) == 8: + if only_file and re_line[0][0] == "d": + continue + else: + lines.append( + (re_line[7], re_line[5] + " " + re_line[6])) + if sort: + sorted(lines, cmp=sort_by_time) + ret_lines = [ret[0] for ret in lines] + return ret_lines + + +def multi_upload(client, + hdfs_path, + local_path, + multi_processes=5, + overwrite=False): + """ + :param overwrite: will overwrite hdfs file or not + :param multi_processes: the upload data process at the same time, default=5 + :param client: instance of HDFSClient + :param hdfs_path: path on hdfs + :param local_path: path on local + :return: + """ + + def __subprocess_upload(datas): + for data in datas: + re_path = os.path.relpath(os.path.dirname(data), local_path) + hdfs_re_path = os.path.join(hdfs_path, re_path) + client.upload(hdfs_re_path, data, overwrite, retry_times=5) + + def get_local_files(path): + rlist = [] + + if not os.path.isdir(path): + return rlist + + for dirname, folder, files in os.walk(path): + for i in files: + t = os.path.join(dirname, i) + rlist.append(t) + return rlist + + assert isinstance(client, HDFSClient) + + all_files = get_local_files(local_path) + if not all_files: + _logger.info("there are nothing need to upload, exit") + return + _logger.info("Start {} multi process to upload datas".format( + multi_processes)) + procs = [] + for i in range(multi_processes): + process_datas = all_files[i::multi_processes] + p = multiprocessing.Process( + target=__subprocess_upload, args=(process_datas, )) + procs.append(p) + p.start() + + # complete the processes + for proc in procs: + proc.join() + + _logger.info("Finish {} multi process to upload datas".format( + multi_processes)) + + +def multi_download(client, + hdfs_path, + local_path, + trainer_id, + trainers, + multi_processes=5): + """ + multi_download + :param client: instance of HDFSClient + :param hdfs_path: path on hdfs + :param local_path: path on local + :param trainer_id: current trainer id + :param trainers: all trainers number + :param multi_processes: the download data process at the same time, default=5 + :return: None + """ + + def __subprocess_download(datas): + for data in datas: + re_path = os.path.relpath(os.path.dirname(data), hdfs_path) + local_re_path = os.path.join(local_path, re_path) + client.download(data, local_re_path) + + assert isinstance(client, HDFSClient) + + client.make_local_dirs(local_path) + _logger.info("Make local dir {} successfully".format(local_path)) + + all_need_download = client.lsr(hdfs_path, sort=True) + need_download = all_need_download[trainer_id::trainers] + _logger.info("Get {} files From all {} files need to be download from {}". + format(len(need_download), len(all_need_download), hdfs_path)) + + _logger.info("Start {} multi process to download datas".format( + multi_processes)) + procs = [] + for i in range(multi_processes): + process_datas = need_download[i::multi_processes] + p = multiprocessing.Process( + target=__subprocess_download, args=(process_datas, )) + procs.append(p) + p.start() + + # complete the processes + for proc in procs: + proc.join() + + _logger.info("Finish {} multi process to download datas".format( + multi_processes)) + + local_downloads = [] + for data in need_download: + data_name = os.path.basename(data) + re_path = os.path.relpath(os.path.dirname(data), hdfs_path) + local_re_path = os.path.join(local_path, re_path, data_name) + local_downloads.append(local_re_path) + + return local_downloads + + +if __name__ == "__main__": + hadoop_home = "/home/client/hadoop-client/hadoop/" + + configs = { + "fs.default.name": "hdfs://xxx.hadoop.com:54310", + "hadoop.job.ugi": "hello,hello123" + } + + client = HDFSClient(hadoop_home, configs) + + client.ls("/user/com/train-25") + files = client.lsr("/user/com/train-25/models") + + downloads = multi_download( + client, + "/user/com/train-25/model", + "/home/xx/data1", + 1, + 5, + multi_processes=5) + + multi_upload(client, "/user/com/train-25/model", "/home/xx/data1") diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e0cc09a4c76caee7496f935682ea0d1788bc4bcb..ccd9175b64d46d745c8be5f64d7ddc21a117c181 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -85,6 +85,7 @@ __all__ = [ 'row_conv', 'multiplex', 'layer_norm', + 'group_norm', 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot', @@ -2547,6 +2548,84 @@ def layer_norm(input, return helper.append_activation(layer_norm_out) +@templatedoc() +def group_norm(input, + groups, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + act=None, + data_layout='NCHW', + name=None): + """ + **Group Normalization Layer** + + Refer to `Group Normalization ` + + Args: + input(Variable): The input tensor variable. + groups(int): The number of groups that divided from channels. + epsilon(float): The small value added to the variance to prevent + division by zero. + param_attr(ParamAttr|None): The parameter attribute for the learnable + scale :math:`g`. If it is set to False, no scale will be added to the output units. + If it is set to None, the bias is initialized one. Default: None. + bias_attr(ParamAttr|None): The parameter attribute for the learnable + bias :math:`b`. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + act(str): Activation to be applied to the output of group normalizaiton. + data_layout(string|NCHW): Only NCHW is supported. + name (str): The name of this layer. It is optional. + + Returns: + Variable: A tensor variable which is the result after applying group normalization on the input. + + Examples: + + >>> data = fluid.layers.data(name='data', shape=[8, 32, 32], + >>> dtype='float32') + >>> x = fluid.layers.group_norm(input=data, groups=4) + """ + helper = LayerHelper('group_norm', **locals()) + dtype = helper.input_dtype() + + # create intput and parameters + inputs = {'X': input} + input_shape = input.shape + if data_layout != 'NCHW': + raise ValueError("unsupported data layout:" + data_layout) + param_shape = [input_shape[1]] + if param_attr: + scale = helper.create_parameter( + attr=helper.param_attr, + shape=param_shape, + dtype=dtype, + default_initializer=Constant(1.0)) + inputs['Scale'] = scale + if bias_attr: + bias = helper.create_parameter( + attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) + inputs['Bias'] = bias + + # create output + mean_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True) + variance_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True) + group_norm_out = helper.create_tmp_variable(dtype) + + helper.append_op( + type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={"epsilon": epsilon, + "groups": groups}) + + return helper.append_activation(group_norm_out) + + def conv2d_transpose(input, num_filters, output_size=None, diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 3fc12d584d2fc11b1c4f3cf88a39690e0b9e06b4..4fa69191ad50f3953de658d2aeb52668cfd1fb63 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -23,11 +23,11 @@ if(NOT WITH_DISTRIBUTE) LIST(REMOVE_ITEM TEST_OPS test_dist_text_classification) endif(NOT WITH_DISTRIBUTE) -if(WITH_GPU) - if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) - LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) - endif() -endif(WITH_GPU) +if (NOT ${WITH_GPU}) + LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) +elseif(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) +endif() list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 @@ -81,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) +list(REMOVE_ITEM TEST_OPS test_interpolate_op) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) +py_test_modules(test_interpolate_op MODULES test_interpolate_op SERIAL) if(WITH_DISTRIBUTE) py_test_modules(test_dist_train MODULES test_dist_train SERIAL) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index c195a28e452fbe073a9afb5d650f538176f688fd..271b9c740fd99554e9a7aa8d476a52cf6385b1d9 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -381,8 +381,8 @@ class OpTest(unittest.TestCase): outs.sort(key=len) checker(outs) - def __assert_is_close(self, numeric_grads, analytic_grads, names, - max_relative_error, msg_prefix): + def _assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): for a, b, name in six.moves.zip(numeric_grads, analytic_grads, names): abs_a = np.abs(a) @@ -451,9 +451,9 @@ class OpTest(unittest.TestCase): analytic_grads = self._get_gradient(inputs_to_check, place, output_names, no_grad_set) - self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check, - max_relative_error, - "Gradient Check On %s" % str(place)) + self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check, + max_relative_error, + "Gradient Check On %s" % str(place)) @staticmethod def _numpy_to_lod_tensor(np_value, lod, place): diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op.py b/python/paddle/fluid/tests/unittests/test_group_norm_op.py new file mode 100644 index 0000000000000000000000000000000000000000..0b6d039f050898793b69312f50f6709d66d080cd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op.py @@ -0,0 +1,143 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np + +from operator import mul +import paddle.fluid.core as core +import paddle.fluid as fluid +from op_test import OpTest + +from testsuite import create_op + + +def group_norm_naive(x, scale, bias, epsilon, groups): + N, C, H, W = x.shape + G = groups + x = x.reshape((N * G, -1)) + mean = np.mean(x, axis=1, keepdims=True) + var = np.var(x, axis=1, keepdims=True) + output = (x - mean) / np.sqrt(var + epsilon) + output = output.reshape((N, C, H, W)) * scale.reshape( + (-1, 1, 1)) + bias.reshape((-1, 1, 1)) + return output, mean.reshape((N, G)), var.reshape((N, G)) + + +class TestGroupNormOp(OpTest): + def setUp(self): + self.op_type = "group_norm" + self.data_format = "NCHW" + self.dtype = np.float32 + self.shape = (2, 4, 3, 3) + self.attrs = {'epsilon': 1e-5, 'groups': 2} + self.compare_between_place = False + self.init_test_case() + + input = np.random.random(self.shape).astype(self.dtype) + scale = np.random.random([self.shape[1]]).astype(self.dtype) + bias = np.random.random([self.shape[1]]).astype(self.dtype) + output, mean, var = group_norm_naive( + input, scale, bias, self.attrs['epsilon'], self.attrs['groups']) + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(input), + 'Scale': OpTest.np_dtype_to_fluid_dtype(scale), + 'Bias': OpTest.np_dtype_to_fluid_dtype(bias) + } + self.outputs = {'Y': output, 'Mean': mean, 'Variance': var} + + def test_check_output(self): + atol = 1e-4 + place = core.CPUPlace() + self.check_output_with_place(place, atol=atol) + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=atol) + + def do_compare_between_place(self): + if not core.is_compiled_with_cuda(): return + place = core.CPUPlace() + place2 = core.CUDAPlace(0) + self.scope = core.Scope() + op_inputs = self.inputs if hasattr(self, "inputs") else dict() + op_outputs = self.outputs if hasattr(self, "outputs") else dict() + op_attrs = self.attrs if hasattr(self, "attrs") else dict() + self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs, + op_attrs) + inputs_to_check = set(['X', 'Scale', 'Bias']) + output_names = 'Y' + cpu_grads = self._get_gradient(inputs_to_check, place, output_names, + None) + gpu_grads = self._get_gradient(inputs_to_check, place2, output_names, + None) + self._assert_is_close(cpu_grads, gpu_grads, inputs_to_check, 0.005, + "Gradient Check On %s" % str(place)) + + def test_check_grad(self): + if self.compare_between_place: + self.do_compare_between_place() + return + place = core.CPUPlace() + self.check_grad_with_place( + place, set(['X', 'Scale', 'Bias']), 'Y', max_relative_error=0.01) + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['X', 'Scale', 'Bias']), + 'Y', + max_relative_error=0.01) + + def init_test_case(self): + pass + + +class TestGroupNormOp1(TestGroupNormOp): + def init_test_case(self): + self.attrs['groups'] = 1 + + +class TestGroupNormOp2(TestGroupNormOp): + def init_test_case(self): + self.attrs['groups'] = 4 + + +class TestGroupNormOpBigEps1(TestGroupNormOp): + def init_test_case(self): + self.attrs['groups'] = 1 + self.attrs['epsilon'] = 0.5 + + +class TestGroupNormOpBigEps2(TestGroupNormOp): + def init_test_case(self): + self.attrs['groups'] = 4 + self.attrs['epsilon'] = 0.5 + + +class TestGroupNormOpBigEps3(TestGroupNormOp): + def init_test_case(self): + self.attrs['epsilon'] = 0.5 + + +class TestGroupNormOpLargeData(TestGroupNormOp): + def init_test_case(self): + self.shape = (2, 32, 64, 64) + self.attrs['groups'] = 8 + self.compare_between_place = True + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/manylinux1/Dockerfile.x64 b/tools/manylinux1/Dockerfile.x64 index 0d59e4c110ff8502acb4dbcda15f855f7652a946..e91216a5b89c585dd8ccade472e21e6524de9eb9 100644 --- a/tools/manylinux1/Dockerfile.x64 +++ b/tools/manylinux1/Dockerfile.x64 @@ -36,17 +36,21 @@ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf tar xzf protobuf-cpp-3.1.0.tar.gz && \ cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz -RUN wget -O /root/requirements.txt https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt +RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install -r /root/requirements.txt && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install -r /root/requirements.txt && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install -r /root/requirements.txt && \ go get github.com/Masterminds/glide && \ rm -rf /root/requirements.txt RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \ LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \ - LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python + LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python RUN wget -O /opt/swig-2.0.12.tar.gz https://cytranet.dl.sourceforge.net/project/swig/swig/swig-2.0.12/swig-2.0.12.tar.gz && \ cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz diff --git a/tools/manylinux1/build_scripts/build.sh b/tools/manylinux1/build_scripts/build.sh index eb4b477dcb538f7ba17cfc54057a97c9669a6916..6c551eceb4543bf33229b9e5b5124522f3ee134c 100644 --- a/tools/manylinux1/build_scripts/build.sh +++ b/tools/manylinux1/build_scripts/build.sh @@ -9,12 +9,12 @@ set -ex # remove others to expedite build and reduce docker image size. The original # manylinux docker image project builds many python versions. # NOTE We added back 3.5.1, since auditwheel requires python 3.3+ -CPYTHON_VERSIONS="2.7.11 3.5.1" +CPYTHON_VERSIONS="3.7.0 3.6.0 3.5.1 2.7.11" # openssl version to build, with expected sha256 hash of .tar.gz # archive -OPENSSL_ROOT=openssl-1.0.2l -OPENSSL_HASH=ce07195b659e75f4e1db43552860070061f156a98bb37b672b101ba6e3ddf30c +OPENSSL_ROOT=openssl-1.1.0i +OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99 EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb @@ -25,7 +25,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969 # Dependencies for compiling Python that we want to remove from # the final image after compiling Python -PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel" +PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel" # Libraries that are allowed as part of the manylinux1 profile MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel freetype-devel libpng-devel" @@ -61,7 +61,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \ wget -q https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz && tar xzf cmake-3.5.2.tar.gz && \ cd cmake-3.5.2 && ./bootstrap && \ -make -j4 && make install && cd .. && rm cmake-3.5.2.tar.gz +make -j8 && make install && cd .. && rm cmake-3.5.2.tar.gz # Install newest autoconf @@ -77,11 +77,13 @@ mkdir -p /opt/python build_cpythons $CPYTHON_VERSIONS PY35_BIN=/opt/python/cp35-cp35m/bin +PY36_BIN=/opt/python/cp36-cp36m/bin +PY37_BIN=/opt/python/cp37-cp37m/bin # NOTE Since our custom manylinux image builds pythons with shared # libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running # python. ORIGINAL_LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" -LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" +LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib:$(dirname ${PY36_BIN})/lib:$(dirname ${PY37_BIN})/lib" # Our openssl doesn't know how to find the system CA trust store # (https://github.com/pypa/manylinux/issues/53) @@ -119,9 +121,8 @@ ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel # final image yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \ avahi freetype bitstream-vera-fonts \ - ${PYTHON_COMPILE_DEPS} > /dev/null 2>&1 -yum -y install ${MANYLINUX1_DEPS} -yum -y clean all > /dev/null 2>&1 + ${PYTHON_COMPILE_DEPS} > /dev/null 2>&1 || true +yum -y install ${MANYLINUX1_DEPS} && yum -y clean all > /dev/null 2>&1 || true yum list installed # we don't need libpython*.a, and they're many megabytes find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f diff --git a/tools/manylinux1/build_scripts/build_utils.sh b/tools/manylinux1/build_scripts/build_utils.sh index 10422ae3bd00f4e0dd059af0384f8cc17e4b7855..d97745ad2dd80e9855f9d8b643cf1e9e836b467c 100755 --- a/tools/manylinux1/build_scripts/build_utils.sh +++ b/tools/manylinux1/build_scripts/build_utils.sh @@ -52,9 +52,17 @@ function do_cpython_build { # NOTE --enable-shared for generating libpython shared library needed for # linking of some of the nupic.core test executables. - CFLAGS="-Wformat" ./configure --prefix=${prefix} --enable-shared $unicode_flags > /dev/null - make -j2 > /dev/null - make install > /dev/null + if [ $(lex_pyver $py_ver) -ge $(lex_pyver 3.7) ]; then + # NOTE python 3.7 should be installed via make altinstall rather than + # make install, and we should specify the location of ssl + CFLAGS="-Wformat" ./configure --prefix=${prefix} --with-openssl=/usr/local/ssl --enable-shared $unicode_flags > /dev/null + make -j8 > /dev/null + make altinstall > /dev/null + else + CFLAGS="-Wformat" ./configure --prefix=${prefix} --enable-shared $unicode_flags > /dev/null + make -j8 > /dev/null + make install > /dev/null + fi popd echo "ZZZ looking for libpython" find / -name 'libpython*.so*' @@ -64,6 +72,9 @@ function do_cpython_build { if [ -e ${prefix}/bin/python3 ]; then ln -s python3 ${prefix}/bin/python fi + if [ -e ${prefix}/bin/python3.7 ]; then + ln -s python3.7 ${prefix}/bin/python + fi # NOTE Make libpython shared library visible to python calls below LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python get-pip.py LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/pip install wheel