diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index 544c014eaf98a99b1737809f2cbad39b46fdb276..0b22bab26789a3e2ebd20428adc236faa8b38dee 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -19,13 +19,17 @@ namespace paddle { namespace framework { extern size_t SizeOfType(proto::VarType::Type type); void Tensor::check_memory_size() const { - PADDLE_ENFORCE_NOT_NULL( - holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); + PADDLE_ENFORCE_NOT_NULL(holder_, platform::errors::PreconditionNotMet( + "Tensor holds no memory. " + "Call Tensor::mutable_data firstly.")); PADDLE_ENFORCE_LE( numel() * SizeOfType(type()), memory_size(), - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory.\n" - "or maybe the required data-type mismatches the data already stored."); + platform::errors::PreconditionNotMet( + "Tensor's dimension is out of bound." + "Tensor's dimension must be equal or less than the size of its " + "memory." + "But received Tensor's dimension is d%, memory's size is %d.", + numel() * SizeOfType(type()), memory_size())); } Tensor::Tensor(const proto::VarType::Type& dtype) : type_(dtype), offset_(0) {} @@ -37,15 +41,21 @@ size_t Tensor::memory_size() const { void* Tensor::mutable_data(const platform::Place& place, proto::VarType::Type type, size_t requested_size) { type_ = type; - PADDLE_ENFORCE_GE(numel(), 0, - "When calling this method, the Tensor's numel must be " - "equal or larger than zero. " - "Please check Tensor::dims, or Tensor::Resize has been " - "called first. The Tensor's shape is [", - dims(), "] now"); + PADDLE_ENFORCE_GE( + numel(), 0, + platform::errors::PreconditionNotMet( + "The Tensor's element number must be equal or greater than zero. " + "The Tensor's shape is [", + dims(), "] now")); size_t size = numel() * SizeOfType(type); if (requested_size) { - PADDLE_ENFORCE_GE(requested_size, size); + PADDLE_ENFORCE_GE( + requested_size, size, + platform::errors::InvalidArgument( + "The requested memory size is less than the memory size of Tensor. " + "But received requested memory size is d%, " + "memory size of Tensor is %d.", + requested_size, size)); size = requested_size; } /* some versions of boost::variant don't have operator!= */ @@ -62,8 +72,8 @@ void* Tensor::mutable_data(const platform::Place& place, void* Tensor::mutable_data(const platform::Place& place, size_t requested_size) { - PADDLE_ENFORCE_NOT_NULL( - this->holder_, "Cannot invoke mutable data if current hold nothing."); + PADDLE_ENFORCE_NOT_NULL(this->holder_, platform::errors::PreconditionNotMet( + "The tensor is not initialized.")); return mutable_data(place, type_, requested_size); } @@ -75,12 +85,20 @@ Tensor& Tensor::ShareDataWith(const Tensor& src) { Tensor Tensor::Slice(int64_t begin_idx, int64_t end_idx) const { check_memory_size(); - PADDLE_ENFORCE_GE(begin_idx, 0, - "The start row index must be greater than 0."); - PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound."); + PADDLE_ENFORCE_GE( + begin_idx, 0, + platform::errors::OutOfRange("The start row index must be greater than 0." + "But received the start index is d%.", + begin_idx)); + PADDLE_ENFORCE_LE( + end_idx, dims_[0], + platform::errors::OutOfRange("The end row index is out of bound.")); PADDLE_ENFORCE_LT( begin_idx, end_idx, - "The start row index must be lesser than the end row index."); + platform::errors::InvalidArgument( + "The start row index must be less than the end row index." + "But received the start index = %d, the end index = %d.", + begin_idx, end_idx)); if (dims_[0] == 1) { return *this; diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index d9fddc4c77d99efcb119ae37591b26bff0e51c0a..f2ccff2c133a238d02e25c65faf41dd519fdb506 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -131,13 +131,17 @@ class Tensor { const platform::Place& place() const { PADDLE_ENFORCE_NOT_NULL( - holder_, "Tensor not initialized yet when Tensor::place() is called."); + holder_, + platform::errors::PreconditionNotMet( + "Tensor not initialized yet when Tensor::place() is called.")); return holder_->place(); } proto::VarType::Type type() const { PADDLE_ENFORCE_NOT_NULL( - holder_, "Tensor not initialized yet when Tensor::type() is called."); + holder_, + platform::errors::PreconditionNotMet( + "Tensor not initialized yet when Tensor::type() is called.")); return type_; } diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index f5171b0a8d1efc33521c2d731e88f0f96bdf41c7..986551b935e8811a6b257c2b4a613a493b3b644b 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -43,9 +43,13 @@ inline T* Tensor::data() { check_memory_size(); bool valid = std::is_same::value || type_ == DataTypeTrait::DataType(); - PADDLE_ENFORCE( - valid, "Tensor holds the wrong type, it holds %s, but desires to be %s", - DataTypeToString(type_), DataTypeToString(DataTypeTrait::DataType())); + PADDLE_ENFORCE_EQ( + valid, true, + platform::errors::InvalidArgument( + "Tensor holds the wrong type, it holds %s, but desires to be %s", + DataTypeToString(type_), + DataTypeToString(DataTypeTrait::DataType()))); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); } @@ -69,9 +73,12 @@ inline T* Tensor::mutable_data(const platform::Place& place, inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { int rank = src.dims().size(); PADDLE_ENFORCE_GE( - rank, 2, - "'ReshapeToMatrix()' is only used for flatten high rank " - "tensors to matrixs. Can not be used in reshaping vectors."); + rank, 2, platform::errors::InvalidArgument( + "'ReshapeToMatrix()' is only used for flatten high rank " + "tensors to matrixs. The dimensions of Tensor must be " + "greater or equal than 2. " + "But received dimensions of Tensor is %d", + rank)); if (rank == 2) { return src; } diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index 84f98d339a29b7d23de6e8be4b069b216be31ab2..cc972dd93d032c19015c86debebc27f7c8c0d155 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -41,7 +41,7 @@ TEST(Tensor, DataAssert) { std::string ex_msg = err.what(); EXPECT_TRUE(ex_msg.find("holder_ should not be null") != std::string::npos); EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " - "Tensor::mutable_data first.") != + "Tensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); @@ -157,7 +157,7 @@ TEST(Tensor, ShareDataWith) { EXPECT_TRUE(ex_msg.find("holder_ should not be null") != std::string::npos); EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " - "Tensor::mutable_data first.") != + "Tensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 6dd13d32e6e25f1657f351ff3a54562435b098f3..fd4b1a54d2b44c5bb99d3aa2e0c776c806fb5c8d 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -363,9 +363,12 @@ if(WITH_MKLDNN) inference_analysis_api_test_build(${QUANT_IMG_CLASS_TEST_APP} ${QUANT_IMG_CLASS_TEST_APP_SRC}) # MobileNetV1 FP32 vs. Quant INT8 + # The FP32 model should already be downloaded for slim Quant unit tests on Linux set(QUANT2_MobileNetV1_MODEL_DIR "${QUANT_DATA_DIR}/MobileNetV1_quant2") set(QUANT2_INT8_MobileNetV1_MODEL_DIR "${QUANT_DATA_DIR}/MobileNetV1_quant2_int8") - download_quant_data(${QUANT2_MobileNetV1_MODEL_DIR} "MobileNet_qat_perf.tar.gz") + if(NOT LINUX) + download_quant_data(${QUANT2_MobileNetV1_MODEL_DIR} "MobileNet_qat_perf.tar.gz") + endif(NOT LINUX) download_quant_data(${QUANT2_INT8_MobileNetV1_MODEL_DIR} "MobileNet_qat_perf_int8.tar.gz") inference_analysis_api_quant_test_run(test_analyzer_quant_performance_benchmark ${QUANT_IMG_CLASS_TEST_APP} ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float ${QUANT2_INT8_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf_int8 ${IMAGENET_DATA_PATH}) diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a2e6ff214bfa30af2e25b7feac41d22f02ab75a7 --- /dev/null +++ b/paddle/fluid/operators/correlation_op.cc @@ -0,0 +1,181 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +inline std::vector CorrelationOutputSize(int batch, int input_height, + int input_width, int stride1, + int stride2, int kernel_size, + int pad_size, + int max_displacement) { + std::vector output_shape({batch}); + int kernel_radius = (kernel_size - 1) / 2; + int border_radius = kernel_radius + max_displacement; + int padded_input_height = input_height + 2 * pad_size; + int padded_input_width = input_width + 2 * pad_size; + int output_channel = ((max_displacement / stride2) * 2 + 1) * + ((max_displacement / stride2) * 2 + 1); + output_shape.push_back(output_channel); + int output_height = + std::ceil(static_cast(padded_input_height - 2 * border_radius) / + static_cast(stride1)); + int output_width = + std::ceil(static_cast(padded_input_width - 2 * border_radius) / + static_cast(stride1)); + output_shape.push_back(output_height); + output_shape.push_back(output_width); + return output_shape; +} + +class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Input1", "Input is a 4-D Tensor with shape [N, C, H, W]"); + AddInput("Input2", "Input is a 4-D Tensor with shape [N, C, H, W]"); + AddOutput("Output", + "(Tensor) The output tensor of correlation operator. " + "It has same data fromat and data type as the Input."); + AddAttr("pad_size", "pad size for input1 and input2"); + AddAttr("kernel_size", "kernel size of input1 and input2"); + AddAttr("max_displacement", "max displacement of input1 and input2"); + AddAttr("stride1", "Input1 stride"); + AddAttr("stride2", "Input2 stride"); + AddAttr("corr_type_multiply", "correlation coefficient").SetDefault(1); + AddComment( + R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC"); + } +}; + +class CorrelationOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("Input1"), "Input", "X", "CorrelationOp"); + OP_INOUT_CHECK(ctx->HasInput("Input2"), "Input", "Y", "CorrelationOp"); + int stride1 = ctx->Attrs().Get("stride1"); + int stride2 = ctx->Attrs().Get("stride2"); + int max_displacement = ctx->Attrs().Get("max_displacement"); + int pad_size = ctx->Attrs().Get("pad_size"); + int kernel_size = ctx->Attrs().Get("kernel_size"); + + auto in_dims = ctx->GetInputDim("Input1"); + auto in2_dims = ctx->GetInputDim("Input2"); + + PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, + platform::errors::InvalidArgument( + "Input(X) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in_dims.size())); + + PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, + platform::errors::InvalidArgument( + "Input(Y) of CorrelationOp must be 4 dims." + "But received dims is %d.", + in2_dims.size())); + std::vector output_shape = + CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, + stride2, kernel_size, pad_size, max_displacement); + ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); + PADDLE_ENFORCE_EQ(input_data_type, ctx.Input("Input2")->type(), + platform::errors::InvalidArgument( + "X and Y shoule have the same datatype")); + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +template +class CorrelationOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("correlation_grad"); + op->SetInput("Input1", this->Input("Input1")); + op->SetInput("Input2", this->Input("Input2")); + op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output")); + op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1")); + op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2")); + op->SetAttrMap(this->Attrs()); + } +}; + +class CorrelationOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("Input1"), "Input", "X", "CorrelationOp"); + OP_INOUT_CHECK(ctx->HasInput("Input2"), "Input", "Y", "CorrelationOp"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Output")), "Input", + "Output@GRAD", "CorrelationGradOp"); + + auto in1_dims = ctx->GetInputDim("Input1"); + auto in2_dims = ctx->GetInputDim("Input2"); + ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims); + ctx->SetOutputDim(framework::GradVarName("Input2"), in2_dims); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace()); + } +}; + +template +class CorrelationKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::Unimplemented("Correlation only supports GPU now.")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker, + ops::CorrelationOpGradMaker, + ops::CorrelationOpGradMaker); +REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad); +REGISTER_OP_CPU_KERNEL(correlation, ops::CorrelationKernel, + ops::CorrelationKernel); diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..0d177f653ec3d5cacbd4d938e6e5da6689b1bc74 --- /dev/null +++ b/paddle/fluid/operators/correlation_op.cu @@ -0,0 +1,483 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +#define THREADS_PER_BLOCK 32 +#define FULL_MASK 0xffffffff + +using framework::Tensor; +using DataLayout = framework::DataLayout; + +template +__forceinline__ __device__ T warpReduceSum(T val) { + for (int offset = 16; offset > 0; offset /= 2) { + val += __shfl_down_sync(FULL_MASK, val, offset); + } + return val; +} + +template +__forceinline__ __device__ T blockReduceSum(T val) { + static __shared__ T shared[32]; + int lane = threadIdx.x % warpSize; + int wid = threadIdx.x / warpSize; + + val = warpReduceSum(val); + if (lane == 0) shared[wid] = val; + + __syncthreads(); + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + + if (wid == 0) val = warpReduceSum(val); + + return val; +} + +template +__global__ void set_zero(T *x, int num) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; + i += blockDim.x * gridDim.x) + x[i] = static_cast(0); +} + +template +__global__ void channel_first(const T *input, T *rinput, const int channel, + const int height, const int width, + const int pad_size) { + int n = blockIdx.x; + int h = blockIdx.y; + int w = blockIdx.z; + + int ch_off = threadIdx.x; + T value; + int dimchw = channel * height * width; + int dimhw = height * width; + + int p_dimw = (width + 2 * pad_size); + int p_dimh = (height + 2 * pad_size); + int p_dimchw = channel * p_dimw * p_dimh; + int p_dimcw = channel * p_dimw; + + for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) { + value = input[n * dimchw + c * dimhw + h * width + w]; + rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + + c] = value; + } +} + +template +__global__ void correlation_forward( + T *output, const int output_channel, const int output_height, + const int output_width, const T *rinput1, const int input_channel, + const int input_height, const int input_width, const T *rinput2, + const int pad_size, const int kernel_size, const int max_displacement, + const int stride1, const int stride2) { + int p_input_width = input_width + 2 * pad_size; + int p_input_height = input_height + 2 * pad_size; + + int kernel_rad = (kernel_size - 1) / 2; + int displacement_rad = max_displacement / stride2; + + int displacement_size = 2 * displacement_rad + 1; + + int n = blockIdx.x; + int h1 = blockIdx.y * stride1 + max_displacement; + int w1 = blockIdx.z * stride1 + max_displacement; + int c = threadIdx.x; + + int p_dimchw = p_input_height * p_input_width * input_channel; + int p_dimcw = p_input_width * input_channel; + int p_dimc = input_channel; + + int t_dimchw = output_channel * output_height * output_width; + int t_dimhw = output_height * output_width; + int t_dimw = output_width; + + int nelems = kernel_size * kernel_size * p_dimc; + + for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) { + for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) { + int w2 = w1 + ti * stride2; + int h2 = h1 + tj * stride2; + + T acc0 = 0; + for (int j = -kernel_rad; j <= kernel_rad; ++j) { + for (int i = -kernel_rad; i <= kernel_rad; ++i) { + for (int ch = c; ch < p_dimc; ch += blockDim.x) { + int index1 = + n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch; + int index2 = + n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch; + acc0 += static_cast(rinput1[index1] * rinput2[index2]); + } + } + } + if (blockDim.x == warpSize) { + __syncwarp(); + acc0 = warpReduceSum(acc0); + } else { + __syncthreads(); + acc0 = blockReduceSum(acc0); + } + + if (threadIdx.x == 0) { + int tc = (tj + displacement_rad) * displacement_size + + (ti + displacement_rad); + const int t_index = + n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z; + output[t_index] = static_cast(acc0 / nelems); + } + } + } +} + +// class CorrelationKernel +template +class CorrelationCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::InvalidArgument( + "Correlation only supports GPU now.")); + + auto *input1 = ctx.Input("Input1"); + auto *input2 = ctx.Input("Input2"); + int pad_size = ctx.Attr("pad_size"); + int kernel_size = ctx.Attr("kernel_size"); + int stride1 = ctx.Attr("stride1"); + int stride2 = ctx.Attr("stride2"); + int max_displacement = ctx.Attr("max_displacement"); + int corr_type_multiply = ctx.Attr("corr_type_multiply"); + + auto *output = ctx.Output("Output"); + output->mutable_data(ctx.GetPlace()); + auto &dev_ctx = ctx.template device_context(); + + // base on input1, NCHW + auto in_dims = input1->dims(); + int N = in_dims[0]; + int C = in_dims[1]; + int H = in_dims[2]; + int W = in_dims[3]; + + int padded_input_height = H + 2 * pad_size; + int padded_input_width = W + 2 * pad_size; + + Tensor rinput1 = ctx.AllocateTmpTensor( + {N, padded_input_height, padded_input_width, C}, dev_ctx); + rinput1.mutable_data(ctx.GetPlace()); + + Tensor rinput2 = ctx.AllocateTmpTensor( + {N, padded_input_height, padded_input_width, C}, dev_ctx); + rinput2.mutable_data(ctx.GetPlace()); + + set_zero<<<(rinput1.numel() + 512 - 1) / 512, 512, 0, dev_ctx.stream()>>>( + rinput1.data(), rinput1.numel()); + set_zero<<<(rinput2.numel() + 512 - 1) / 512, 512, 0, dev_ctx.stream()>>>( + rinput2.data(), rinput2.numel()); + set_zero<<<(output->numel() + 512 - 1) / 512, 512, 0, dev_ctx.stream()>>>( + output->data(), output->numel()); + + auto out_dims = output->dims(); + int OC = out_dims[1]; + int OH = out_dims[2]; + int OW = out_dims[3]; + + dim3 blocks_grid(N, H, W); + dim3 threads_block(THREADS_PER_BLOCK); + + channel_first<<>>( + input1->data(), rinput1.data(), C, H, W, pad_size); + channel_first<<>>( + input2->data(), rinput2.data(), C, H, W, pad_size); + + dim3 threadsPerBlock(THREADS_PER_BLOCK); + dim3 totalBlocksCorr(N, OH, OW); + + correlation_forward< + T><<>>( + output->data(), OC, OH, OW, rinput1.data(), C, H, W, + rinput2.data(), pad_size, kernel_size, max_displacement, stride1, + stride2); + } +}; + +template +__global__ void correlation_backward_input1( + int item, T *grad_input1, const int input_channel, const int input_height, + const int input_width, const T *grad_output, const int output_channel, + const int output_height, const int output_width, const T *rinput2, + const int pad_size, const int kernel_size, const int max_displacement, + const int stride1, const int stride2) { + int n = item; + int h = blockIdx.x * stride1 + pad_size; + int w = blockIdx.y * stride1 + pad_size; + int c = blockIdx.z; + int tch_off = threadIdx.x; + + int kernel_rad = (kernel_size - 1) / 2; + int displacement_rad = max_displacement / stride2; + int displacement_size = 2 * displacement_rad + 1; + + int xmin = (w - kernel_rad - max_displacement) / stride1; + int ymin = (h - kernel_rad - max_displacement) / stride1; + + int xmax = (w + kernel_rad - max_displacement) / stride1; + int ymax = (h + kernel_rad - max_displacement) / stride1; + + if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { + return; + } + + if (xmin > xmax || ymin > ymax) { + return; + } + + xmin = max(0, xmin); + xmax = min(output_width - 1, xmax); + + ymin = max(0, ymin); + ymax = min(output_height - 1, ymax); + + int p_input_width = input_width + 2 * pad_size; + int p_input_height = input_height + 2 * pad_size; + int p_dimchw = input_channel * p_input_height * p_input_width; + int p_dimcw = input_channel * p_input_width; + int p_dimc = input_channel; + + int t_dimchw = output_channel * output_height * output_width; + int t_dimhw = output_height * output_width; + int t_dimw = output_width; + + int o_dimchw = input_channel * input_height * input_width; + int o_dimhw = input_height * input_width; + int o_dimw = input_width; + + int nelems = kernel_size * kernel_size * input_channel; + + __shared__ T prod_sum[THREADS_PER_BLOCK]; + prod_sum[tch_off] = 0; + + for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { + int i2 = (tc % displacement_size - displacement_rad) * stride2; + int j2 = (tc / displacement_size - displacement_rad) * stride2; + + int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c; + + T val2 = rinput2[index2]; + for (int j = ymin; j <= ymax; ++j) { + for (int i = xmin; i <= xmax; ++i) { + int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; + prod_sum[tch_off] += grad_output[t_index] * val2; + } + } + } + + __syncthreads(); + + if (tch_off == 0) { + T reduce_sum = 0; + for (int index = 0; index < THREADS_PER_BLOCK; index++) { + reduce_sum += prod_sum[index]; + } + const int index1 = + n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); + grad_input1[index1] = static_cast(reduce_sum / nelems); + } +} + +template +__global__ void correlation_backward_input2( + int item, T *grad_input2, const int input_channel, const int input_height, + const int input_width, const T *grad_output, const int output_channel, + const int output_height, const int output_width, const T *rinput1, + const int pad_size, const int kernel_size, const int max_displacement, + const int stride1, const int stride2) { + int n = item; + int h = blockIdx.x * stride1 + pad_size; + int w = blockIdx.y * stride1 + pad_size; + int c = blockIdx.z; + + int tch_off = threadIdx.x; + + int kernel_rad = (kernel_size - 1) / 2; + int displacement_rad = max_displacement / stride2; + int displacement_size = 2 * displacement_rad + 1; + + int p_input_width = input_width + 2 * pad_size; + int p_input_height = input_height + 2 * pad_size; + int p_dimchw = input_channel * p_input_height * p_input_width; + int p_dimcw = input_channel * p_input_width; + int p_dimc = input_channel; + + int t_dimchw = output_channel * output_height * output_width; + int t_dimhw = output_height * output_width; + int t_dimw = output_width; + + int o_dimchw = input_channel * input_height * input_width; + int o_dimhw = input_height * input_width; + int o_dimw = input_width; + + int nelems = kernel_size * kernel_size * input_channel; + + __shared__ T prod_sum[THREADS_PER_BLOCK]; + prod_sum[tch_off] = 0; + + for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { + int i2 = (tc % displacement_size - displacement_rad) * stride2; + int j2 = (tc / displacement_size - displacement_rad) * stride2; + + int xmin = (w - kernel_rad - max_displacement - i2) / stride1; + int ymin = (h - kernel_rad - max_displacement - j2) / stride1; + + int xmax = (w + kernel_rad - max_displacement - i2) / stride1; + int ymax = (h + kernel_rad - max_displacement - j2) / stride1; + + if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { + continue; + } + + if (xmin > xmax || ymin > ymax) { + continue; + } + + xmin = max(0, xmin); + xmax = min(output_width - 1, xmax); + + ymin = max(0, ymin); + ymax = min(output_height - 1, ymax); + + int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c; + T val1 = rinput1[index1]; + for (int j = ymin; j <= ymax; ++j) { + for (int i = xmin; i <= xmax; ++i) { + int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; + prod_sum[tch_off] += grad_output[t_index] * val1; + } + } + } + + __syncthreads(); + + if (tch_off == 0) { + T reduce_sum = 0; + for (int index = 0; index < THREADS_PER_BLOCK; index++) { + reduce_sum += prod_sum[index]; + } + const int index2 = + n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); + grad_input2[index2] = static_cast(reduce_sum / nelems); + } +} + +template +class CorrelationCUDAGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::InvalidArgument( + "Correlation only supports GPU now.")); + const auto *input1 = ctx.Input("Input1"); + const auto *input2 = ctx.Input("Input2"); + const auto *grad_output = + ctx.Input(framework::GradVarName("Output")); + const int pad_size = ctx.Attr("pad_size"); + const int kernel_size = ctx.Attr("kernel_size"); + const int stride1 = ctx.Attr("stride1"); + const int stride2 = ctx.Attr("stride2"); + const int max_displacement = ctx.Attr("max_displacement"); + const int corr_type_multiply = ctx.Attr("corr_type_multiply"); + + auto *grad_input1 = ctx.Output(framework::GradVarName("Input1")); + grad_input1->mutable_data(ctx.GetPlace()); + auto *grad_input2 = ctx.Output(framework::GradVarName("Input2")); + grad_input2->mutable_data(ctx.GetPlace()); + auto &dev_ctx = ctx.template device_context(); + + auto in_dims = input1->dims(); + int N = in_dims[0]; + int C = in_dims[1]; + int H = in_dims[2]; + int W = in_dims[3]; + + int padded_input_height = H + 2 * pad_size; + int padded_input_width = W + 2 * pad_size; + + Tensor rinput1 = ctx.AllocateTmpTensor( + {N, padded_input_height, padded_input_width, C}, dev_ctx); + rinput1.mutable_data(ctx.GetPlace()); + + Tensor rinput2 = ctx.AllocateTmpTensor( + {N, padded_input_height, padded_input_width, C}, dev_ctx); + rinput2.mutable_data(ctx.GetPlace()); + + set_zero<<<(rinput1.numel() + 512 - 1) / 512, 512, 0, dev_ctx.stream()>>>( + rinput1.data(), rinput1.numel()); + set_zero<<<(rinput2.numel() + 512 - 1) / 512, 512, 0, dev_ctx.stream()>>>( + rinput2.data(), rinput2.numel()); + set_zero<<<(grad_input1->numel() + 512 - 1) / 512, 512, 0, + dev_ctx.stream()>>>(grad_input1->data(), + grad_input1->numel()); + set_zero<<<(grad_input2->numel() + 512 - 1) / 512, 512, 0, + dev_ctx.stream()>>>(grad_input2->data(), + grad_input2->numel()); + + auto grad_out_dims = grad_output->dims(); + int GOC = grad_out_dims[1]; + int GOH = grad_out_dims[2]; + int GOW = grad_out_dims[3]; + + dim3 blocks_grid(N, H, W); + dim3 threads_block(THREADS_PER_BLOCK); + + channel_first<<>>( + input1->data(), rinput1.data(), C, H, W, pad_size); + channel_first<<>>( + input2->data(), rinput2.data(), C, H, W, pad_size); + + dim3 threadsPerBlock(THREADS_PER_BLOCK); + dim3 totalBlocksCorr(H, W, C); + + for (int n = 0; n < N; n++) { + correlation_backward_input1< + T><<>>( + n, grad_input1->data(), C, H, W, grad_output->data(), GOC, GOH, + GOW, rinput2.data(), pad_size, kernel_size, max_displacement, + stride1, stride2); + } + + for (int n = 0; n < N; n++) { + correlation_backward_input2< + T><<>>( + n, grad_input2->data(), C, H, W, grad_output->data(), GOC, GOH, + GOW, rinput1.data(), pad_size, kernel_size, max_displacement, + stride1, stride2); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(correlation, ops::CorrelationCUDAKernel, + ops::CorrelationCUDAKernel); +REGISTER_OP_CUDA_KERNEL(correlation_grad, ops::CorrelationCUDAGradKernel, + ops::CorrelationCUDAGradKernel); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index b3e3332fe3425301a3dede3a3d810697ad4debf3..44f602237da2e2c8fa26e39326f977d10235155d 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License.*/ #include "paddle/fluid/operators/detection/collect_fpn_proposals_op.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { @@ -54,11 +55,14 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel { score_dim[1])); } context->SetOutputDim("FpnRois", {post_nms_topN, 4}); + if (context->HasOutput("RoisNum")) { + context->SetOutputDim("RoisNum", {-1}); + } if (!context->IsRuntime()) { // Runtime LoD infershape will be computed // in Kernel. context->ShareLoD("MultiLevelRois", "FpnRois"); } - if (context->IsRuntime()) { + if (context->IsRuntime() && !context->HasInputs("MultiLevelRoIsNum")) { std::vector roi_inputs = context->GetInputVarPtrs("MultiLevelRois"); std::vector score_inputs = @@ -99,7 +103,16 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker { "(LoDTensor) Multiple score LoDTensors from each level in shape" " (N, 1), N is the number of RoIs.") .AsDuplicable(); + AddInput( + "MultiLevelRoIsNum", + "(List of Tensor) The RoIs' number of each image on multiple levels." + "The number on each level has the shape of (N), N is the number of " + "images.") + .AsDuplicable() + .AsDispensable(); AddOutput("FpnRois", "(LoDTensor) All selected RoIs with highest scores"); + AddOutput("RoisNum", "(Tensor), Number of RoIs in each images.") + .AsDispensable(); AddAttr("post_nms_topN", "Select post_nms_topN RoIs from" " all images and all fpn layers"); @@ -123,3 +136,14 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(collect_fpn_proposals, ops::CollectFpnProposalsOpKernel, ops::CollectFpnProposalsOpKernel); +REGISTER_OP_VERSION(collect_fpn_proposals) + .AddCheckpoint( + R"ROC( + Upgrade collect_fpn_proposals add a new input + [MultiLevelRoIsNum] and add a new output [RoisNum].)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewInput("MultiLevelRoIsNum", + "The RoIs' number of each image on multiple levels." + "The number on each level has the shape of (N), " + "N is the number of images.") + .NewOutput("RoisNum", "The number of RoIs in each image.")); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index 35222a85cd388f6fef3c61c440be7b36598d9e01..86207052bb2bef4f7bea34c2614fe7686f579de8 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -80,14 +80,27 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { int lod_size; auto place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()); + auto multi_rois_num = ctx.MultiInput("MultiLevelRoIsNum"); for (size_t i = 0; i < roi_ins.size(); ++i) { auto roi_in = roi_ins[i]; auto score_in = score_ins[i]; - auto roi_lod = roi_in->lod().back(); - lod_size = roi_lod.size() - 1; - for (size_t n = 0; n < lod_size; ++n) { - for (size_t j = roi_lod[n]; j < roi_lod[n + 1]; ++j) { - roi_batch_id_data[index++] = n; + if (multi_rois_num.size() > 0) { + framework::Tensor temp; + TensorCopySync(*multi_rois_num[i], platform::CPUPlace(), &temp); + const int* length_in = temp.data(); + lod_size = multi_rois_num[i]->numel(); + for (size_t n = 0; n < lod_size; ++n) { + for (size_t j = 0; j < length_in[n]; ++j) { + roi_batch_id_data[index++] = n; + } + } + } else { + auto length_in = roi_in->lod().back(); + lod_size = length_in.size() - 1; + for (size_t n = 0; n < lod_size; ++n) { + for (size_t j = length_in[n]; j < length_in[n + 1]; ++j) { + roi_batch_id_data[index++] = n; + } } } @@ -190,6 +203,13 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { offset.emplace_back(offset.back() + length_lod_cpu[i]); } + if (ctx.HasOutput("RoisNum")) { + auto* rois_num = ctx.Output("RoisNum"); + int* rois_num_data = rois_num->mutable_data({lod_size}, place); + memory::Copy(place, rois_num_data, place, length_lod_data, + lod_size * sizeof(int), dev_ctx.stream()); + } + framework::LoD lod; lod.emplace_back(offset); fpn_rois->set_lod(lod); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index badd88f0689ba9defcb3f26eb57fef89308aa877..950b8b78933bff6bf1692df61142258dfbc87a8c 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -17,6 +17,7 @@ limitations under the License.*/ #include #include #include +#include #include #include #include "paddle/fluid/framework/op_registry.h" @@ -65,6 +66,8 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { auto multi_layer_scores = context.MultiInput("MultiLevelScores"); + auto multi_rois_num = context.MultiInput("MultiLevelRoIsNum"); + int num_size = multi_rois_num.size(); auto* fpn_rois = context.Output("FpnRois"); @@ -88,11 +91,21 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { const int num_fpn_level = multi_layer_rois.size(); std::vector integral_of_all_rois(num_fpn_level + 1, 0); for (int i = 0; i < num_fpn_level; ++i) { - auto cur_rois_lod = multi_layer_rois[i]->lod().back(); - integral_of_all_rois[i + 1] = - integral_of_all_rois[i] + cur_rois_lod[cur_rois_lod.size() - 1]; + int all_rois = 0; + if (num_size == 0) { + auto cur_rois_lod = multi_layer_rois[i]->lod().back(); + all_rois = cur_rois_lod[cur_rois_lod.size() - 1]; + } else { + const int* cur_rois_num = multi_rois_num[i]->data(); + all_rois = std::accumulate( + cur_rois_num, cur_rois_num + multi_rois_num[i]->numel(), 0); + } + integral_of_all_rois[i + 1] = integral_of_all_rois[i] + all_rois; } + const int batch_size = (num_size == 0) + ? multi_layer_rois[0]->lod().back().size() - 1 + : multi_rois_num[0]->numel(); // concatenate all fpn rois scores into a list // create a vector to store all scores std::vector> scores_of_all_rois( @@ -100,11 +113,20 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { for (int i = 0; i < num_fpn_level; ++i) { const T* cur_level_scores = multi_layer_scores[i]->data(); int cur_level_num = integral_of_all_rois[i + 1] - integral_of_all_rois[i]; - auto cur_scores_lod = multi_layer_scores[i]->lod().back(); int cur_batch_id = 0; + int pre_num = 0; for (int j = 0; j < cur_level_num; ++j) { - if (static_cast(j) >= cur_scores_lod[cur_batch_id + 1]) { - cur_batch_id++; + if (num_size == 0) { + auto cur_scores_lod = multi_layer_scores[i]->lod().back(); + if (static_cast(j) >= cur_scores_lod[cur_batch_id + 1]) { + cur_batch_id++; + } + } else { + const int* rois_num_data = multi_rois_num[i]->data(); + if (j >= pre_num + rois_num_data[cur_batch_id]) { + pre_num += rois_num_data[cur_batch_id]; + cur_batch_id++; + } } int cur_index = j + integral_of_all_rois[i]; scores_of_all_rois[cur_index].score = cur_level_scores[j]; @@ -134,6 +156,9 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { T* fpn_rois_data = fpn_rois->data(); std::vector lod0(1, 0); int cur_batch_id = 0; + std::vector num_per_batch; + int pre_idx = 0; + int cur_num = 0; for (int i = 0; i < post_nms_topN; ++i) { int cur_fpn_level = scores_of_all_rois[i].level; int cur_level_index = scores_of_all_rois[i].index; @@ -144,6 +169,18 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { if (scores_of_all_rois[i].batch_id != cur_batch_id) { cur_batch_id = scores_of_all_rois[i].batch_id; lod0.emplace_back(i); + cur_num = i - pre_idx; + pre_idx = i; + num_per_batch.emplace_back(cur_num); + } + } + num_per_batch.emplace_back(post_nms_topN - pre_idx); + if (context.HasOutput("RoisNum")) { + auto* rois_num = context.Output("RoisNum"); + int* rois_num_data = + rois_num->mutable_data({batch_size}, context.GetPlace()); + for (int i = 0; i < batch_size; i++) { + rois_num_data[i] = num_per_batch[i]; } } lod0.emplace_back(post_nms_topN); diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc index 160d43a917b3c74ff905e070714415d35c5c877c..614b37e703e721337057e04c5611386ff87a1e9e 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { @@ -48,6 +49,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel { } ctx->SetOutputsDim("MultiFpnRois", outs_dims); ctx->SetOutputDim("RestoreIndex", {-1, 1}); + + if (ctx->HasOutputs("MultiLevelRoIsNum")) { + std::vector outs_num_dims; + for (size_t i = 0; i < num_out_rois; ++i) { + outs_num_dims.push_back({-1}); + } + ctx->SetOutputsDim("MultiLevelRoIsNum", outs_num_dims); + } if (!ctx->IsRuntime()) { for (size_t i = 0; i < num_out_rois; ++i) { ctx->SetLoDLevel("MultiFpnRois", ctx->GetLoDLevel("FpnRois"), i); @@ -66,12 +75,22 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel { class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("FpnRois", "(LoDTensor) The rois at all levels in shape (-1, 4)"); + AddInput("FpnRois", "(LoDTensor) The RoIs at all levels in shape (-1, 4)"); + AddInput("RoisNum", + "(Tensor) The number of RoIs in shape (B)," + "B is the number of images") + .AsDispensable(); AddOutput("MultiFpnRois", "(LoDTensor) Output with distribute operator") .AsDuplicable(); AddOutput("RestoreIndex", "(Tensor) An array of positive number which is " "used to restore the order of FpnRois"); + AddOutput("MultiLevelRoIsNum", + "(List of Tensor) The RoIs' number of each image on multiple " + "levels. The number on each level has the shape of (B)," + "B is the number of images.") + .AsDuplicable() + .AsDispensable(); AddAttr("min_level", "The lowest level of FPN layer where the" " proposals come from"); @@ -105,3 +124,14 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(distribute_fpn_proposals, ops::DistributeFpnProposalsOpKernel, ops::DistributeFpnProposalsOpKernel); +REGISTER_OP_VERSION(distribute_fpn_proposals) + .AddCheckpoint( + R"ROC( + Upgrade distribute_fpn_proposals add a new input + [RoisNum] and add a new output [MultiLevelRoIsNum].)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewInput("RoIsNum", "The number of RoIs in each image.") + .NewOutput("MultiLevelRoisNum", + "The RoIs' number of each image on multiple " + "levels. The number on each level has the shape of (B)," + "B is the number of images.")); diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu index 1e3cd9f36c595f978f5b5e5f5c5cf5cad6dc9059..27c06a0f8fb207b5dc85c7875ea91428b16e606c 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu @@ -76,12 +76,20 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel { int num_level = max_level - min_level + 1; // check that the fpn_rois is not empty - PADDLE_ENFORCE_EQ( - fpn_rois->lod().size(), 1UL, - platform::errors::InvalidArgument("DistributeFpnProposalsOp needs LoD" - "with one level")); + if (!ctx.HasInput("RoisNum")) { + PADDLE_ENFORCE_EQ( + fpn_rois->lod().size(), 1UL, + platform::errors::InvalidArgument("DistributeFpnProposalsOp needs LoD" + "with one level")); + } - auto fpn_rois_lod = fpn_rois->lod().back(); + std::vector fpn_rois_lod; + if (ctx.HasInput("RoisNum")) { + auto* rois_num = ctx.Input("RoisNum"); + fpn_rois_lod = GetLodFromRoisNum(rois_num); + } else { + fpn_rois_lod = fpn_rois->lod().back(); + } int lod_size = fpn_rois_lod.size() - 1; int roi_num = fpn_rois_lod[lod_size]; @@ -154,6 +162,8 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel { restore_idx_data, roi_num); int start = 0; + auto multi_rois_num = ctx.MultiOutput("MultiLevelRoIsNum"); + for (int i = 0; i < num_level; ++i) { Tensor sub_lod = sub_lod_list.Slice(i, i + 1); int* sub_lod_data = sub_lod.data(); @@ -180,6 +190,11 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel { multi_fpn_rois[i]->mutable_data({sub_rois_num, kBoxDim}, dev_ctx.GetPlace()); } + if (multi_rois_num.size() > 0) { + Tensor* rois_num_t = multi_rois_num[i]; + TensorCopySync(sub_lod, dev_ctx.GetPlace(), rois_num_t); + rois_num_t->Resize({lod_size}); + } framework::LoD lod; lod.emplace_back(offset); multi_fpn_rois[i]->set_lod(lod); diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h index 0c84b385ccbc1dd26453bd957661c0310b7137e3..79498f01536d2fb2616921a2ef1ffa04f13fae64 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h @@ -28,6 +28,21 @@ namespace operators { const int kBoxDim = 4; +inline std::vector GetLodFromRoisNum(const Tensor* rois_num) { + std::vector rois_lod; + auto* rois_num_data = rois_num->data(); + Tensor cpu_tensor; + if (platform::is_gpu_place(rois_num->place())) { + TensorCopySync(*rois_num, platform::CPUPlace(), &cpu_tensor); + rois_num_data = cpu_tensor.data(); + } + rois_lod.push_back(static_cast(0)); + for (int i = 0; i < rois_num->numel(); ++i) { + rois_lod.push_back(rois_lod.back() + static_cast(rois_num_data[i])); + } + return rois_lod; +} + template static inline T BBoxArea(const T* box, bool normalized) { if (box[2] < box[0] || box[3] < box[1]) { @@ -65,13 +80,22 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel { const int num_level = max_level - min_level + 1; // check that the fpn_rois is not empty - PADDLE_ENFORCE_EQ( - fpn_rois->lod().size(), 1UL, - platform::errors::InvalidArgument("DistributeFpnProposalsOp needs LoD " - "with one level.")); + if (!context.HasInput("RoisNum")) { + PADDLE_ENFORCE_EQ(fpn_rois->lod().size(), 1UL, + platform::errors::InvalidArgument( + "DistributeFpnProposalsOp needs LoD " + "with one level.")); + } - auto fpn_rois_lod = fpn_rois->lod().back(); - int fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1]; + std::vector fpn_rois_lod; + int fpn_rois_num; + if (context.HasInput("RoisNum")) { + auto* rois_num = context.Input("RoisNum"); + fpn_rois_lod = GetLodFromRoisNum(rois_num); + } else { + fpn_rois_lod = fpn_rois->lod().back(); + } + fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1]; std::vector target_level; // std::vector target_level(fpn_rois_num, -1); // record the number of rois in each level @@ -136,6 +160,18 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel { for (int i = 0; i < fpn_rois_num; ++i) { restore_index_data[restore_index_inter[i]] = i; } + auto multi_rois_num = context.MultiOutput("MultiLevelRoIsNum"); + if (multi_rois_num.size() > 0) { + int batch_size = fpn_rois_lod.size() - 1; + for (int i = 0; i < num_level; ++i) { + int* rois_num_data = multi_rois_num[i]->mutable_data( + {batch_size}, context.GetPlace()); + for (int j = 0; j < batch_size; ++j) { + rois_num_data[j] = static_cast(multi_fpn_rois_lod0[i][j + 1] - + multi_fpn_rois_lod0[i][j]); + } + } + } // merge lod information into LoDTensor for (int i = 0; i < num_level; ++i) { framework::LoD lod; diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index 981a368e8564fbcd3d688bc67d2def8664bcfe8d..06e560f86d4e0a74f7ae04b155829618ce634697 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/operators/math/math_function.h" @@ -61,6 +62,10 @@ class GenerateProposalsOp : public framework::OperatorWithKernel { ctx->SetOutputDim("RpnRois", {-1, 4}); ctx->SetOutputDim("RpnRoiProbs", {-1, 1}); + if (!ctx->IsRuntime()) { + ctx->SetLoDLevel("RpnRois", std::max(ctx->GetLoDLevel("Scores"), 1)); + ctx->SetLoDLevel("RpnRoiProbs", std::max(ctx->GetLoDLevel("Scores"), 1)); + } } protected: @@ -347,7 +352,7 @@ class GenerateProposalsKernel : public framework::OpKernel { lod0.push_back(0); anchors.Resize({anchors.numel() / 4, 4}); variances.Resize({variances.numel() / 4, 4}); - std::vector tmp_lod; + std::vector tmp_num; int64_t num_proposals = 0; for (int64_t i = 0; i < num; ++i) { @@ -369,16 +374,16 @@ class GenerateProposalsKernel : public framework::OpKernel { AppendProposals(rpn_roi_probs, num_proposals, scores); num_proposals += proposals.dims()[0]; lod0.push_back(num_proposals); - tmp_lod.push_back(num_proposals); + tmp_num.push_back(proposals.dims()[0]); } - if (context.HasOutput("RpnRoisLod")) { - auto *rpn_rois_lod = context.Output("RpnRoisLod"); - rpn_rois_lod->mutable_data({num}, context.GetPlace()); - int64_t *lod_data = rpn_rois_lod->data(); + if (context.HasOutput("RpnRoisNum")) { + auto *rpn_rois_num = context.Output("RpnRoisNum"); + rpn_rois_num->mutable_data({num}, context.GetPlace()); + int *num_data = rpn_rois_num->data(); for (int i = 0; i < num; i++) { - lod_data[i] = tmp_lod[i]; + num_data[i] = tmp_num[i]; } - rpn_rois_lod->Resize({num}); + rpn_rois_num->Resize({num}); } rpn_rois->set_lod(lod); rpn_roi_probs->set_lod(lod); @@ -433,6 +438,16 @@ class GenerateProposalsKernel : public framework::OpKernel { Tensor keep; FilterBoxes(ctx, &proposals, min_size, im_info_slice, &keep); + // Handle the case when there is no keep index left + if (keep.numel() == 0) { + math::SetConstant set_zero; + bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); + set_zero(ctx, &bbox_sel, static_cast(0)); + Tensor scores_filter; + scores_filter.mutable_data({1, 1}, ctx.GetPlace()); + set_zero(ctx, &scores_filter, static_cast(0)); + return std::make_pair(bbox_sel, scores_filter); + } Tensor scores_filter; bbox_sel.mutable_data({keep.numel(), 4}, ctx.GetPlace()); @@ -481,7 +496,8 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker { "(LoDTensor), Output proposals with shape (rois_num, 4)."); AddOutput("RpnRoiProbs", "(LoDTensor) Scores of proposals with shape (rois_num, 1)."); - AddOutput("RpnRoisLod", "(Tensor), rpn rois's lod info").AsDispensable(); + AddOutput("RpnRoisNum", "(Tensor), The number of Rpn RoIs in each image") + .AsDispensable(); AddAttr("pre_nms_topN", "Number of top scoring RPN proposals to keep before " "applying NMS."); @@ -515,3 +531,11 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(generate_proposals, ops::GenerateProposalsKernel, ops::GenerateProposalsKernel); +REGISTER_OP_VERSION(generate_proposals) + .AddCheckpoint( + R"ROC( + Upgrade generate_proposals add a new output [RpnRoisNum])ROC", + paddle::framework::compatible::OpVersionDesc().NewOutput( + "RpnRoisNum", + "The number of Rpn RoIs in each image. RpnRoisNum is " + "dispensable.")); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index fa7670f6d680a95da1c1abd5befe1651ccb7265f..485136d8e2f7ab66f6b1c58deb09036ea5d4e1ec 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -330,6 +330,15 @@ static std::pair ProposalForOneImage( keep_index.Resize({keep_num}); Tensor scores_filter, proposals_filter; + // Handle the case when there is no keep index left + if (keep_num == 0) { + math::SetConstant set_zero; + proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); + scores_filter.mutable_data({1, 1}, ctx.GetPlace()); + set_zero(ctx, &proposals_filter, static_cast(0)); + set_zero(ctx, &scores_filter, static_cast(0)); + return std::make_pair(proposals_filter, scores_filter); + } proposals_filter.mutable_data({keep_num, 4}, ctx.GetPlace()); scores_filter.mutable_data({keep_num, 1}, ctx.GetPlace()); GPUGather(ctx, proposals, keep_index, &proposals_filter); @@ -421,7 +430,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { int64_t num_proposals = 0; std::vector offset(1, 0); - std::vector tmp_lod; + std::vector tmp_num; for (int64_t i = 0; i < num; ++i) { Tensor im_info_slice = im_info->Slice(i, i + 1); @@ -448,15 +457,15 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { dev_ctx.Wait(); num_proposals += proposals.dims()[0]; offset.emplace_back(num_proposals); - tmp_lod.push_back(num_proposals); + tmp_num.push_back(proposals.dims()[0]); } - if (context.HasOutput("RpnRoisLod")) { - auto *rpn_rois_lod = context.Output("RpnRoisLod"); - rpn_rois_lod->mutable_data({num}, context.GetPlace()); - int64_t *lod_data = rpn_rois_lod->data(); - memory::Copy(place, lod_data, cpu_place, &tmp_lod[0], - sizeof(int64_t) * num, dev_ctx.stream()); - rpn_rois_lod->Resize({num}); + if (context.HasOutput("RpnRoisNum")) { + auto *rpn_rois_num = context.Output("RpnRoisNum"); + rpn_rois_num->mutable_data({num}, context.GetPlace()); + int *num_data = rpn_rois_num->data(); + memory::Copy(place, num_data, cpu_place, &tmp_num[0], sizeof(int) * num, + dev_ctx.stream()); + rpn_rois_num->Resize({num}); } framework::LoD lod; lod.emplace_back(offset); diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index deb71b807128e5c0b173b517e60832894ced41e5..f5224239eb2ded9a156aadc9185eca89f4e3396f 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -115,7 +115,7 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr( "padding_mode", "(bool, default true) The padding method used when source" - "index is out of input images. It can be 'zeros', 'reflect' and " + "index is out of input images. It can be 'zeros', 'reflection' and " "'border'.") .SetDefault("zeros"); @@ -174,6 +174,10 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output", + framework::GradVarName("X"), "grid_sampler"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Grid")), "Output", + framework::GradVarName("Grid"), "grid_sampler"); auto input_dims = ctx->GetInputDim("X"); auto grid_dims = ctx->GetInputDim("Grid"); if (ctx->HasOutput(framework::GradVarName("X"))) { diff --git a/paddle/fluid/operators/grid_sampler_op.cu b/paddle/fluid/operators/grid_sampler_op.cu index 999f990448ca6370dadacbdaee5bf3bcadcaca0e..4e61d0c2ea7f91e4199c3e9daa3e93ac45bc0eb8 100644 --- a/paddle/fluid/operators/grid_sampler_op.cu +++ b/paddle/fluid/operators/grid_sampler_op.cu @@ -268,7 +268,7 @@ class GridSampleOpCUDAKernel : public framework::OpKernel { Mode mode; if (padding_mode_s == "border") { padding_mode = PaddingMode::border; - } else if (padding_mode_s == "reflect") { + } else if (padding_mode_s == "reflection") { padding_mode = PaddingMode::reflect; } else { padding_mode = PaddingMode::zeros; @@ -432,7 +432,7 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel { Mode mode; if (padding_mode_s == "border") { padding_mode = PaddingMode::border; - } else if (padding_mode_s == "reflect") { + } else if (padding_mode_s == "reflection") { padding_mode = PaddingMode::reflect; } else { padding_mode = PaddingMode::zeros; diff --git a/paddle/fluid/operators/grid_sampler_op.h b/paddle/fluid/operators/grid_sampler_op.h index eda800e78faf5da2bb379b8101e4823c5bc2d2f8..b8faef759ae90e14d1e83b66130bfe957b51907b 100644 --- a/paddle/fluid/operators/grid_sampler_op.h +++ b/paddle/fluid/operators/grid_sampler_op.h @@ -76,7 +76,7 @@ static inline void clip(const platform::CPUDeviceContext& ctx, if (padding_mode == "border") { grid_slice_t.device(place) = grid_slice_t.cwiseMax(static_cast(0)) .cwiseMin(static_cast(max_val)); - } else if (padding_mode == "reflect") { + } else if (padding_mode == "reflection") { if (align_corners) { auto double_range = static_cast(max_val * 2); auto grid_abs = grid_slice_t.abs(); @@ -117,7 +117,7 @@ static inline void clipWithMask(const platform::CPUDeviceContext& ctx, auto in_bound = (res == grid_slice_t); grid_scale_t.device(place) = grid_scale_t * in_bound.template cast(); grid_slice_t.device(place) = res; - } else if (padding_mode == "reflect") { + } else if (padding_mode == "reflection") { if (align_corners) { auto double_range = static_cast(max_val * 2); auto is_neg = (grid_slice_t < static_cast(0)); diff --git a/paddle/fluid/operators/kldiv_loss_op.h b/paddle/fluid/operators/kldiv_loss_op.h index 369fdb4872b4184d706a5264b58f70f63051fca1..857ecda303c2607b1b6fb9a5d2ec132b335d6c29 100644 --- a/paddle/fluid/operators/kldiv_loss_op.h +++ b/paddle/fluid/operators/kldiv_loss_op.h @@ -72,7 +72,11 @@ class KLDivLossKernel : public framework::OpKernel { loss_t.device(place) = output; } else if ("batchmean" == reduction) { auto output_sum = output.sum(); - loss_t.device(place) = output_sum / output_sum.constant(n); + if (n > 0) { + loss_t.device(place) = output_sum / output_sum.constant(n); + } else { + loss_t.device(place) = output_sum; + } } else if ("mean" == reduction) { loss_t.device(place) = output.mean(); } else if ("sum" == reduction) { diff --git a/paddle/fluid/operators/math/sequence2batch.cc b/paddle/fluid/operators/math/sequence2batch.cc index e4ffeedb5a0061dd60ca3a30aa9928ef8b05887c..300a3692012ab9631d7049d2042e91fb99ad3c21 100644 --- a/paddle/fluid/operators/math/sequence2batch.cc +++ b/paddle/fluid/operators/math/sequence2batch.cc @@ -29,11 +29,24 @@ class CopyMatrixRowsFunctor { auto src_dims = src.dims(); auto dst_dims = dst->dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, - "The src must be matrix with rank 2."); + platform::errors::InvalidArgument( + "The source tensor must be a matrix with rank 2, but " + "got the source tensor rank is %lu. " + "Please check the rank of the source tensor", + src_dims.size())); PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL, - "The dst must be matrix with rank 2."); - PADDLE_ENFORCE_EQ(src_dims[1], dst_dims[1], - "The width of src and dst must be same."); + platform::errors::InvalidArgument( + "The destination tensor must be a matrix with rank, " + "but got the destination tensor rank is %lu. " + "Please check the rank of the destination tensor", + dst_dims.size())); + PADDLE_ENFORCE_EQ( + src_dims[1], dst_dims[1], + platform::errors::InvalidArgument( + "The width of the source tensor and the destination tensor must be " + "same. But got %lu != %lu.Please check the rank of the source " + "tensor", + src_dims.size(), dst_dims.size())); auto height = dst_dims[0]; auto width = dst_dims[1]; auto* src_data = src.data(); diff --git a/paddle/fluid/operators/math/sequence2batch.cu b/paddle/fluid/operators/math/sequence2batch.cu index 9ab13659c1cc5b59d28395bcebcfb43fac5b4544..cd1ca572689bc701da801384e5ed08fe6dc10749 100644 --- a/paddle/fluid/operators/math/sequence2batch.cu +++ b/paddle/fluid/operators/math/sequence2batch.cu @@ -46,11 +46,24 @@ class CopyMatrixRowsFunctor { auto src_dims = src.dims(); auto dst_dims = dst->dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2, - "The src must be matrix with rank 2."); + platform::errors::InvalidArgument( + "The source tensor must be a matrix with rank 2, but " + "got the source tensor rank is %lu. " + "Please check the rank of the source tensor", + src_dims.size())); PADDLE_ENFORCE_EQ(dst_dims.size(), 2, - "The dst must be matrix with rank 2."); - PADDLE_ENFORCE_EQ(src_dims[1], dst_dims[1], - "The width of src and dst must be same."); + platform::errors::InvalidArgument( + "The destination tensor must be a matrix with rank, " + "but got the destination tensor rank is %lu. " + "Please check the rank of the destination tensor", + dst_dims.size())); + PADDLE_ENFORCE_EQ( + src_dims[1], dst_dims[1], + platform::errors::InvalidArgument( + "The width of the source tensor and the destination tensor must be " + "same. But got %lu != %lu.Please check the rank of the source " + "tensor", + src_dims.size(), dst_dims.size())); auto height = dst_dims[0]; auto width = dst_dims[1]; auto* src_data = src.data(); diff --git a/paddle/fluid/operators/math/sequence2batch.h b/paddle/fluid/operators/math/sequence2batch.h index 9d9f7ef00b8a12088225fd3620cb30b43ef9dce9..6aa513e4d10eef49c02417e98b31cddd57088d7c 100644 --- a/paddle/fluid/operators/math/sequence2batch.h +++ b/paddle/fluid/operators/math/sequence2batch.h @@ -64,19 +64,30 @@ class LoDTensor2BatchFunctor { bool is_reverse = false) const { if (!is_cal_batch_lod) { auto lods = batch->lod(); - PADDLE_ENFORCE_GT(lods.size(), 2UL, - "The LoD of LoDTensor should inlcude at least 2-level " - "sequence information."); + PADDLE_ENFORCE_GT( + lods.size(), 2UL, + platform::errors::InvalidArgument( + "The LoD of LoDTensor should inlcude at least 2-level " + "sequence information, but got the LoD level is %lu. Please " + "check the input value.", + lods.size())); PADDLE_ENFORCE_EQ( lods[1].size(), static_cast(lod_tensor.dims()[0]), - "The LoD information should be consistent with the dims."); + platform::errors::InvalidArgument( + "The LoD information should be consistent with the dims, but got " + "%lu != %lu. Please check the input value.", + lods[1].size(), static_cast(lod_tensor.dims()[0]))); CopyMatrixRowsFunctor to_batch; to_batch(context, lod_tensor, lods[1], batch, true); return; } auto lods = lod_tensor.lod(); - PADDLE_ENFORCE_EQ(lods.size(), 1UL, "Only support one level sequence now."); + PADDLE_ENFORCE_EQ(lods.size(), 1UL, + platform::errors::InvalidArgument( + "Only support one level sequence now, but got the " + "LoD level is %lu. Please check the input value.", + lods.size())); const auto& lod = lods[0]; @@ -161,12 +172,19 @@ class Batch2LoDTensorFunctor { const framework::LoDTensor& batch, framework::LoDTensor* lod_tensor) const { auto in_lod = batch.lod(); - PADDLE_ENFORCE_GT(in_lod.size(), 2UL, - "The LoD of LoDTensor should inlcude at least 2-level " - "sequence information."); + PADDLE_ENFORCE_GT( + in_lod.size(), 2UL, + platform::errors::InvalidArgument( + "The LoD of LoDTensor should inlcude at least 2-level " + "sequence information, but got the LoD level is %lu. Please check " + "the input value.", + in_lod.size())); PADDLE_ENFORCE_EQ( in_lod[1].size(), static_cast(lod_tensor->dims()[0]), - "The LoD information should be consistent with the dims."); + platform::errors::InvalidArgument( + "The LoD information should be consistent with the dims, but got " + "%lu != %lu. Please check the input value.", + in_lod[1].size(), static_cast(lod_tensor->dims()[0]))); CopyMatrixRowsFunctor to_seq; to_seq(context, batch, in_lod[1], lod_tensor, false); } diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 4630689dec160da145e607f662a802444ac98b55..076df0176429c7bbd350698af0137fbcca18f806 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -35,7 +35,11 @@ void CopyValidData(framework::Tensor* dst_tensor, int valid_seq_len = seq_offsets[seq_idx + 1] - seq_offsets[seq_idx]; PADDLE_ENFORCE_GE( pad_seq_len, valid_seq_len, - "The padded sequence length can not be less than its original length."); + platform::errors::InvalidArgument( + "The padded sequence length can not " + "be less than its original length. Expected %ld >= %ld, but got " + "%ld < %ld. Please check input value.", + pad_seq_len, valid_seq_len, pad_seq_len, valid_seq_len)); int seq_data_offset = seq_offsets[seq_idx] * step_width; int pad_data_offset = layout == kBatchLengthWidth ? seq_idx * pad_seq_len * step_width @@ -95,9 +99,14 @@ class PaddingLoDTensorFunctor { CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); - PADDLE_ENFORCE(pad_value.numel() == 1 || pad_value.numel() == step_width, - "The numel of 'pad_value' can only be 1 or be equal to the " - "'step_width'."); + + PADDLE_ENFORCE_EQ( + pad_value.numel() == 1 || pad_value.numel() == step_width, true, + platform::errors::InvalidArgument( + "The numel of 'pad_value' can only be 1 or be equal to the " + "'step_width', but got %ld != 1 and %ld. Please check the input " + "value.", + pad_value.numel(), step_width)); // fill padding value T* pad_data = pad_tensor->data(); diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 1b433067900af71bb8a6833cef019d41f9c76858..19c3af03411b8ce95d274532707c7ee3e93f1d55 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -66,17 +66,25 @@ class PaddingLoDTensorFunctor { if (pad_seq_len == -1) { pad_seq_len = max_seq_len; } - PADDLE_ENFORCE_GE(pad_seq_len, max_seq_len, - "The pad_seq_len must be equal to or greater than the " - "original max sequence length."); + PADDLE_ENFORCE_GE( + pad_seq_len, max_seq_len, + platform::errors::InvalidArgument( + "The pad_seq_len must be equal to or greater than the " + "original max sequence length. Expected %ld >= %ld, but got %ld < " + "%ld. Please check the input value.", + pad_seq_len, max_seq_len, pad_seq_len, max_seq_len)); int step_width = seq_tensor.numel() / seq_tensor_dims[0]; int seq_num = seq_offsets.size() - 1; CheckDims(seq_tensor_dims, pad_tensor_dims, seq_offsets, pad_seq_len, step_width, layout); - PADDLE_ENFORCE(pad_value.numel() == 1 || pad_value.numel() == step_width, - "The numel of 'pad_value' can only be 1 or be equal to the " - "'step_width'."); + PADDLE_ENFORCE_EQ( + pad_value.numel() == 1 || pad_value.numel() == step_width, true, + platform::errors::InvalidArgument( + "The numel of 'pad_value' can only be 1 or be equal to " + "the 'step_width', but got %ld != 1 and %ld. Please check the " + "input value.", + pad_value.numel(), step_width)); const int kBlockSize = 512; diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 5580ee5374658c3b7b8e31962cd50f1d72113ba0..956a4ff6a2d45cb619183f9beba1b7e35b7f229c 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -52,14 +52,25 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims, const framework::Vector& seq_offset, int64_t padded_seq_len, int64_t step_width, const PadLayout& layout) { - PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), seq_offset.back(), - "Value of 1st dimension of the sequence tensor should be " - "equal to sum of lengths of all sequences."); + PADDLE_ENFORCE_EQ( + static_cast(seq_tensor_dims[0]), seq_offset.back(), + platform::errors::InvalidArgument( + "Value of 1st dimension of the sequence tensor should be " + "equal to sum of lengths of all sequences. Expected %ld == %ld, but " + "got %ld != %ld. Please check the input value.", + static_cast(seq_tensor_dims[0]), seq_offset.back(), + static_cast(seq_tensor_dims[0]), seq_offset.back())); - PADDLE_ENFORCE(seq_tensor_dims.size() + 1 == pad_tensor_dims.size() || - seq_tensor_dims.size() == pad_tensor_dims.size(), - "pad_tensor's rank should be 1 greater than seq_tensor's " - "rank, or be equal with it."); + PADDLE_ENFORCE_EQ( + seq_tensor_dims.size() + 1 == pad_tensor_dims.size() || + seq_tensor_dims.size() == pad_tensor_dims.size(), + true, platform::errors::InvalidArgument( + "pad_tensor's rank should be 1 greater than seq_tensor's " + "rank, or be equal with it. The pad_tensor's rank is %ld, " + "expected the seq_tensor's rank is %ld or %ld, but got %ld. " + "Please check the input value.", + pad_tensor_dims.size(), pad_tensor_dims.size(), + pad_tensor_dims.size() - 1, seq_tensor_dims.size())); } /* diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index cc3fbd587668b17b7edde50b157adca83e81eddc..2eee4d0a6c14e8b6134b71294745c71302450347 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -42,15 +42,29 @@ class MaxSeqPoolFunctor { auto out_dims = output->dims(); auto idx_dims = index->dims(); PADDLE_ENFORCE_GT(in_dims.size(), 1, - "The rank of input shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of input shall be greater than 1, but got " + "the rank is %ld. Please check the input value", + in_dims.size())); PADDLE_ENFORCE_GT(out_dims.size(), 1, - "The rank of output shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of output shall be greater than 1, but got " + "the rank is %ld. Please check the input value", + out_dims.size())); for (int64_t i = 1; i < in_dims.size(); ++i) { - PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i], - "The dimension of input and output shall be same."); + PADDLE_ENFORCE_EQ( + in_dims[i], out_dims[i], + platform::errors::InvalidArgument( + "The dimension of input and output shall be same. Expected %ld " + "== %ld, but got %ld != %ld. Please check the input value.", + in_dims[i], out_dims[i], in_dims[i], out_dims[i])); } - PADDLE_ENFORCE_EQ(idx_dims, out_dims, - "The dimension of index and output shall be same."); + PADDLE_ENFORCE_EQ( + idx_dims, out_dims, + platform::errors::InvalidArgument( + "The dimension of index and output shall be same. Expected %ld == " + "%ld, but got %ld != %ld. Please check the input value.", + idx_dims, out_dims, idx_dims, out_dims)); auto lod_level = input.lod().size(); auto starts = input.lod()[lod_level - 1]; @@ -94,12 +108,22 @@ class MaxSeqPoolFunctor { auto in_dims = input.dims(); auto out_dims = output->dims(); PADDLE_ENFORCE_GT(in_dims.size(), 1, - "The rank of input shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of input shall be greater than 1, but got " + "%ld <= 1. Please check the input value.", + in_dims.size())); PADDLE_ENFORCE_GT(out_dims.size(), 1, - "The rank of output shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of output shall be greater than 1, but got " + "%ld <= 1. Please check the input value.", + out_dims.size())); for (int64_t i = 1; i < in_dims.size(); ++i) { - PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i], - "The dimension of input and output shall be same."); + PADDLE_ENFORCE_EQ( + in_dims[i], out_dims[i], + platform::errors::InvalidArgument( + "The dimension of input and output shall be same. Expected %ld " + "== %ld, but got %ld != %ld. Please check the input value.", + in_dims[i], out_dims[i], in_dims[i], out_dims[i])); } auto lod_level = input.lod().size(); @@ -139,16 +163,29 @@ class MaxSeqPoolGradFunctor { auto ig_dims = in_grad->dims(); auto idx_dims = index.dims(); PADDLE_ENFORCE_GT(og_dims.size(), 1, - "The rank of output@Grad shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of output@Grad shall be greater than 1, " + "but got %ld <= 1. Please check the input value.", + og_dims.size())); PADDLE_ENFORCE_GT(ig_dims.size(), 1, - "The rank of input@Grad shall be greater than 1."); + platform::errors::InvalidArgument( + "The rank of input@Grad shall be greater than 1, but " + "got %ld <= 1. Please check the input value.", + ig_dims.size())); for (int64_t i = 1; i < og_dims.size(); ++i) { - PADDLE_ENFORCE_EQ( - og_dims[i], ig_dims[i], - "The dimension of input@Grad and output@Grad shall be same."); + PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i], + platform::errors::InvalidArgument( + "The dimension of input@Grad and output@Grad shall " + "be same. Expected %ld == %ld, but got %ld != %ld. " + "Please check the input value.", + og_dims[i], ig_dims[i], og_dims[i], ig_dims[i])); } - PADDLE_ENFORCE_EQ(idx_dims, og_dims, - "The dimension of index and output@Grad shall be same."); + PADDLE_ENFORCE_EQ( + idx_dims, og_dims, + platform::errors::InvalidArgument( + "The dimension of index and output@Grad shall be same. Expected " + "%ld == %ld, but got %ld != %ld. Please check the input value.", + idx_dims, og_dims, idx_dims, og_dims)); const T* og_data = out_grad.data(); const int* max_index = index.data(); @@ -244,9 +281,12 @@ class SumSeqPoolGradFunctor { auto lod = in_grad->lod()[lod_level - 1]; int64_t out_w = out_grad.numel() / out_grad.dims()[0]; int64_t in_w = in_grad->numel() / in_grad->dims()[0]; - PADDLE_ENFORCE_EQ( - in_w, out_w, - "The feature size of input@Grad and output@Grad shall be same."); + PADDLE_ENFORCE_EQ(in_w, out_w, + platform::errors::InvalidArgument( + "The feature size of input@Grad and output@Grad " + "shall be same. Expected %ld == %ld, but got %ld != " + "%ld. Please check the input value.", + in_w, out_w, in_w, out_w)); const T* out_g_data = out_grad.data(); T* in_g_data = in_grad->mutable_data(context.GetPlace()); auto blas = math::GetBlas(context); @@ -298,7 +338,8 @@ class SequencePoolFunctor { auto place = context.GetPlace(); PADDLE_ENFORCE_EQ( platform::is_cpu_place(place), true, - "Sequence_pool should run on CPU Device when pooltype is SUM"); + platform::errors::InvalidArgument( + "Sequence_pool should run on CPU Device when pooltype is SUM")); const T* src = input.data(); T* dst = output->mutable_data(place); jit::seq_pool_attr_t attr( @@ -342,7 +383,10 @@ class SequencePoolFunctor { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); } else { - PADDLE_THROW("unsupported pooling pooltype"); + PADDLE_THROW(platform::errors::InvalidArgument( + "unsupported pooling pooltype: %s. Only support \"AVERAGE\" and " + "\"SQRT\"", + pooltype)); } } } @@ -400,7 +444,10 @@ class SequencePoolGradFunctor { } else if (pooltype == "FIRST") { in_g_e.chip(0, 0).device(place) = out_g_e_v; } else { - PADDLE_THROW("unsupported pooling pooltype"); + PADDLE_THROW(platform::errors::InvalidArgument( + "unsupported pooling pooltype: %s. Only support \"AVERAGE\", " + "\"SQRT\", \"LAST\" and \"FIRST\"", + pooltype)); } } } diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index 422b06c70eb2107659666edf58223ae8e4666b1d..cba8dd935ef1b3625f5f68578e411aec81eaa4f4 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -205,7 +205,10 @@ class SequencePoolFunctor { lod.CUDAData(context.GetPlace()), lod.size(), item_dim, output->mutable_data(context.GetPlace()), nullptr); } else { - PADDLE_THROW("unsupported pooling pooltype"); + PADDLE_THROW(platform::errors::InvalidArgument( + "unsupported pooling pooltype: %s. Only support \"MAX\", " + "\"AVERAGE\", \"SUM\", \"SQRT\", \"LAST\" and \"FIRST\"", + pooltype)); } } }; @@ -370,7 +373,10 @@ class SequencePoolGradFunctor { in_grad->mutable_data(context.GetPlace()), nullptr); } else { - PADDLE_THROW("unsupported pooling pooltype"); + PADDLE_THROW(platform::errors::InvalidArgument( + "unsupported pooling pooltype: %s. Only support \"MAX\", " + "\"AVERAGE\", \"SUM\", \"SQRT\", \"LAST\" and \"FIRST\"", + pooltype)); } } }; diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index efab1a375b56bea3caec2c8169dc390298a37cbe..4b5f484e52c6acb2d7fb2cea6265c8dd7826571b 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -50,9 +50,21 @@ void TestSequencePoolingSum(const DeviceContext &context, in_grad.mutable_data(in_dims, place); // check tensor contruction result - PADDLE_ENFORCE_EQ(in_grad.dims().size(), out_grad.dims().size()); + PADDLE_ENFORCE_EQ( + in_grad.dims().size(), out_grad.dims().size(), + paddle::platform::errors::InvalidArgument( + "The dimension of input and output shall be same. Expected %ld == " + "%ld, but got %ld != %ld. Please check the input value.", + in_grad.dims().size(), out_grad.dims().size(), in_grad.dims().size(), + out_grad.dims().size())); for (int64_t i = 1; i < out_grad.dims().size(); ++i) { - PADDLE_ENFORCE_EQ(in_grad.dims()[i], out_grad.dims()[i]); + PADDLE_ENFORCE_EQ( + in_grad.dims()[i], out_grad.dims()[i], + paddle::platform::errors::InvalidArgument( + "The dimension of input and output shall be same. Expected %ld == " + "%ld, but got %ld != %ld. Please check the input value.", + in_grad.dims()[i], out_grad.dims()[i], in_grad.dims()[i], + out_grad.dims()[i])); } // call functor diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index cafcf6319326cc9a496de8ee6aa1033e1320f4b0..0344226ea66e2a64fce0574cb45a8c2ce918359c 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -55,7 +55,11 @@ void Tree2ColUtil::construct_tree(const paddle::Tensor &EdgeSet, std::vector> *tr, size_t *node_count) { auto edge_set_dims = EdgeSet.dims(); - PADDLE_ENFORCE_EQ(edge_set_dims[1], 2); + PADDLE_ENFORCE_EQ(edge_set_dims[1], 2, + platform::errors::InvalidArgument( + "The second dimension of the EdgeSet shall be 2, but " + "got %ld != 2. Please check the input value.", + edge_set_dims[1])); int64_t edge_count = EdgeSet.numel(); const int *edge_data = EdgeSet.data(); diff --git a/paddle/fluid/operators/math/unpooling.cc b/paddle/fluid/operators/math/unpooling.cc index 13f0845bb8579615381c06072eb1e32507ffd3cf..9ad2ec5005203d02032aaa6f7e48b27f52631059 100644 --- a/paddle/fluid/operators/math/unpooling.cc +++ b/paddle/fluid/operators/math/unpooling.cc @@ -37,7 +37,13 @@ class Unpool2dMaxFunctor { for (int c = 0; c < output_channels; ++c) { for (int i = 0; i < input_feasize; ++i) { int index = indices_data[i]; - PADDLE_ENFORCE(index < output_feasize, "err index in unpooling!"); + PADDLE_ENFORCE_LT( + index, output_feasize, + platform::errors::InvalidArgument( + "index should less than output tensor height * output tensor " + "width. Expected %ld < %ld, but got " + "%ld >= %ld. Please check input value.", + index, output_feasize, index, output_feasize)); output_data[index] = input_data[i]; } input_data += input_feasize; @@ -72,7 +78,13 @@ class Unpool2dMaxGradFunctor { for (int c = 0; c < output_channels; ++c) { for (int i = 0; i < input_feasize; ++i) { int index = indices_data[i]; - PADDLE_ENFORCE(index < output_feasize, "err index in unpooling!"); + PADDLE_ENFORCE_LT( + index, output_feasize, + platform::errors::InvalidArgument( + "index should less than output tensor height * output tensor " + "width. Expected %ld < %ld, but got " + "%ld >= %ld. Please check input value.", + index, output_feasize, index, output_feasize)); input_grad_data[i] = output_grad_data[index]; } input_grad_data += input_feasize; diff --git a/paddle/fluid/operators/p_norm_op.cc b/paddle/fluid/operators/p_norm_op.cc index 59035d5a8ca5d4214f1370e1b14b2be9b234fa6a..cd7a8c6d24eaaca096b630b74dd7cd1bb9d35d09 100644 --- a/paddle/fluid/operators/p_norm_op.cc +++ b/paddle/fluid/operators/p_norm_op.cc @@ -105,6 +105,12 @@ class PnormOp : public framework::OperatorWithKernel { bool asvector = ctx->Attrs().Get("asvector"); if (asvector) { reduce_dims.emplace_back(1); + if (keepdim) { + for (int i = 1; i < x_dim.size(); ++i) { + reduce_dims.emplace_back(1); + } + x_dim = framework::make_ddim(reduce_dims); + } } else { if (axis < 0) axis = x_dim.size() + axis; for (int i = 0; i < x_dim.size(); ++i) { diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc index 6e470e3af4e5860796d898a5e3138c28958264cb..54818470b277443e411ea5f7d9c7561eddc7046a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc @@ -51,6 +51,20 @@ class ReduceSumOpGradMaker : public framework::SingleGradOpMaker { } }; +template +class ReduceSumDoubleOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetInput("X", this->OutputGrad(framework::GradVarName("X"))); + op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out"))); + op->SetAttrMap(this->Attrs()); + op->SetType("reduce_sum"); + } +}; + DECLARE_NO_NEED_BUFFER_VARS_INFERER(ReduceSumGradNoNeedBufferVarInferer, "X"); class ReduceSumVarTypeInference : public paddle::framework::VarTypeInference { public: @@ -77,6 +91,8 @@ REGISTER_OPERATOR(reduce_sum, ops::ReduceOp, ReduceSumOpMaker, ops::ReduceSumOpGradMaker, ops::ReduceSumOpGradMaker); REGISTER_OPERATOR(reduce_sum_grad, ops::ReduceGradOp, + ops::ReduceSumDoubleOpGradMaker, + ops::ReduceSumDoubleOpGradMaker, ops::ReduceSumGradNoNeedBufferVarInferer); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index 911dfea50e2e2cf8ec8f230bfc1e0bf4836463b6..0eeb7e0bb24f512aa6859e92de9f490e491543aa 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include "paddle/fluid/operators/roi_align_op.h" #include +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { @@ -35,13 +36,13 @@ class ROIAlignOp : public framework::OperatorWithKernel { auto input_dims = ctx->GetInputDim("X"); auto rois_dims = ctx->GetInputDim("ROIs"); - if (ctx->HasInput("RoisLod")) { - auto rois_lod_dims = ctx->GetInputDim("RoisLod"); + if (ctx->HasInput("RoisNum")) { + auto rois_num_dims = ctx->GetInputDim("RoisNum"); PADDLE_ENFORCE_EQ( - rois_lod_dims.size(), 1, - platform::errors::InvalidArgument("The RoisLod dimension should be 1" - ", but got dimension = %d", - rois_lod_dims.size())); + rois_num_dims.size(), 1, + platform::errors::InvalidArgument("The size of RoisNum should be 1" + ", but received size = %d", + rois_num_dims.size())); } PADDLE_ENFORCE_EQ( input_dims.size(), 4, @@ -145,9 +146,9 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker { "given as [[x1, y1, x2, y2], ...]. " "(x1, y1) is the top left coordinates, and " "(x2, y2) is the bottom right coordinates."); - AddInput("RoisLod", + AddInput("RoisNum", "(Tensor), " - "The lod info of rois.") + "The number of RoIs in each image.") .AsDispensable(); AddOutput("Out", "(Tensor), " @@ -203,7 +204,7 @@ class ROIAlignGradMaker : public framework::SingleGradOpMaker { op->SetType("roi_align_grad"); op->SetInput("X", this->Input("X")); op->SetInput("ROIs", this->Input("ROIs")); - op->SetInput("RoisLod", this->Input("RoisLod")); + op->SetInput("RoisNum", this->Input("RoisNum")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); op->SetAttrMap(this->Attrs()); @@ -231,3 +232,10 @@ REGISTER_OP_CPU_KERNEL( ops::CPUROIAlignGradOpKernel, ops::CPUROIAlignGradOpKernel, ops::CPUROIAlignGradOpKernel); +REGISTER_OP_VERSION(roi_align) + .AddCheckpoint( + R"ROC( + Upgrade roi_align add a new input [RoisNum])ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "RoisNum", + "The number of RoIs in each image. RoisNum is dispensable.")); diff --git a/paddle/fluid/operators/roi_align_op.cu b/paddle/fluid/operators/roi_align_op.cu index f7ec13e5bccd63d2f6552ed52f8d709a57320ddd..3a4ce55f4fb77160e7fc645539c1868fe2864b19 100644 --- a/paddle/fluid/operators/roi_align_op.cu +++ b/paddle/fluid/operators/roi_align_op.cu @@ -257,24 +257,26 @@ class GPUROIAlignOpKernel : public framework::OpKernel { int* roi_batch_id_data = roi_batch_id_list.mutable_data(cplace); auto& dev_ctx = ctx.cuda_device_context(); auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); - if (ctx.HasInput("RoisLod")) { - auto* rois_lod = ctx.Input("RoisLod"); - int rois_batch_size = rois_lod->numel(); + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + int rois_batch_size = rois_num_t->numel(); PADDLE_ENFORCE_EQ( - rois_batch_size - 1, batch_size, + rois_batch_size, batch_size, platform::errors::InvalidArgument( "The rois_batch_size and imgs " "batch_size must be the same. But received rois_batch_size = %d, " "batch_size = %d", rois_batch_size, batch_size)); - std::vector rois_lod_(rois_batch_size); - memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data(), - sizeof(int64_t) * rois_batch_size, 0); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) { + std::vector rois_num_list(rois_batch_size); + memory::Copy(cplace, rois_num_list.data(), gplace, + rois_num_t->data(), sizeof(int) * rois_batch_size, 0); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_list[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_list[n]; } } else { auto lod = rois->lod(); @@ -348,16 +350,18 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.cuda_device_context(); auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); - if (ctx.HasInput("RoisLod")) { - auto* rois_lod = ctx.Input("RoisLod"); - int rois_batch_size = rois_lod->numel(); - std::vector rois_lod_(rois_batch_size); - memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data(), - sizeof(int64_t) * rois_batch_size, 0); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) { + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + int rois_batch_size = rois_num_t->numel(); + std::vector rois_num_list(rois_batch_size); + memory::Copy(cplace, rois_num_list.data(), gplace, + rois_num_t->data(), sizeof(int) * rois_batch_size, 0); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = start; i < start + rois_num_list[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_list[n]; } } else { auto rois_lod = rois->lod().back(); diff --git a/paddle/fluid/operators/roi_align_op.h b/paddle/fluid/operators/roi_align_op.h index 366f865411461c91b4ec88203390b15fdba4414c..066125a92fbd9d1d49f0ba023366865620674e1f 100644 --- a/paddle/fluid/operators/roi_align_op.h +++ b/paddle/fluid/operators/roi_align_op.h @@ -165,21 +165,23 @@ class CPUROIAlignOpKernel : public framework::OpKernel { int* roi_batch_id_data = roi_batch_id_list.mutable_data(ctx.GetPlace()); int rois_batch_size; - if (ctx.HasInput("RoisLod")) { - auto* rois_lod_t = ctx.Input("RoisLod"); - rois_batch_size = rois_lod_t->numel(); + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); PADDLE_ENFORCE_EQ( - rois_batch_size - 1, batch_size, + rois_batch_size, batch_size, platform::errors::InvalidArgument( "The batch size of rois and the batch size of images " " must be the same. But received the batch size of rois is %d, " "and the batch size of images is %d", rois_batch_size, batch_size)); - auto* rois_lod = rois_lod_t->data(); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + auto* rois_num_data = rois_num_t->data(); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_data[n]; } } else { auto lod = rois->lod(); @@ -303,14 +305,16 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel { roi_batch_id_list.mutable_data(ctx.GetPlace()); int rois_batch_size; - if (ctx.HasInput("RoisLod")) { - auto* rois_lod_t = ctx.Input("RoisLod"); - rois_batch_size = rois_lod_t->numel(); - auto* rois_lod = rois_lod_t->data(); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + auto* rois_num_data = rois_num_t->data(); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_data[n]; } } else { auto rois_lod = rois->lod().back(); diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index 8a34cb35f6bf8dde97d29c02749d8a52fdf5f090..be3187b7513144f583458f3d7902a102e531a981 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/roi_pool_op.h" #include +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { @@ -34,12 +35,13 @@ class ROIPoolOp : public framework::OperatorWithKernel { auto input_dims = ctx->GetInputDim("X"); auto rois_dims = ctx->GetInputDim("ROIs"); - if (ctx->HasInput("RoisLod")) { - auto rois_lod_dims = ctx->GetInputDim("RoisLod"); - PADDLE_ENFORCE_EQ(rois_lod_dims.size(), 1, + if (ctx->HasInput("RoisNum")) { + auto rois_num_dims = ctx->GetInputDim("RoisNum"); + PADDLE_ENFORCE_EQ(rois_num_dims.size(), 1, platform::errors::InvalidArgument( - "The lod information tensor of ROIs should " - "be one-dimensional")); + "The second dimension of RoisNum should " + "be 1, but received dimension is %d", + rois_num_dims.size())); } PADDLE_ENFORCE_EQ(input_dims.size(), 4, platform::errors::InvalidArgument( @@ -140,7 +142,8 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { "Where batch_id is the id of the data, " "(x1, y1) is the top left coordinates, and " "(x2, y2) is the bottom right coordinates."); - AddInput("RoisLod", "(Tensor), The lod info of rois.").AsDispensable(); + AddInput("RoisNum", "(Tensor), The number of RoIs in each image.") + .AsDispensable(); AddOutput("Out", "(Tensor), " "The output of ROIPoolOp is a 4-D tensor with shape " @@ -197,7 +200,7 @@ class ROIPoolGradMaker : public framework::SingleGradOpMaker { op->SetType("roi_pool_grad"); op->SetInput("X", this->Input("X")); op->SetInput("ROIs", this->Input("ROIs")); - op->SetInput("RoisLod", this->Input("RoisLod")); + op->SetInput("RoisNum", this->Input("RoisNum")); op->SetInput("Argmax", this->Output("Argmax")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); @@ -223,3 +226,10 @@ REGISTER_OP_CPU_KERNEL( ops::CPUROIPoolGradOpKernel, ops::CPUROIPoolGradOpKernel, ops::CPUROIPoolGradOpKernel); +REGISTER_OP_VERSION(roi_pool) + .AddCheckpoint( + R"ROC( + Upgrade roi_pool add a new input [RoisNum])ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "RoisNum", + "The number of RoIs in each image. RoisNum is dispensable.")); diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu index 1e8a8e3037d84f980d963d359ce791b1ddba47d3..98d9ef6b6e11440d38abbedbfd93f6d3544d77bc 100644 --- a/paddle/fluid/operators/roi_pool_op.cu +++ b/paddle/fluid/operators/roi_pool_op.cu @@ -157,19 +157,21 @@ class GPUROIPoolOpKernel : public framework::OpKernel { int* roi_batch_id_data = roi_batch_id_list.mutable_data(cplace); auto& dev_ctx = ctx.cuda_device_context(); auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); - if (ctx.HasInput("RoisLod")) { - auto* rois_lod = ctx.Input("RoisLod"); - int rois_batch_size = rois_lod->numel(); + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + int rois_batch_size = rois_num_t->numel(); PADDLE_ENFORCE_EQ( - rois_batch_size - 1, batch_size, + rois_batch_size, batch_size, "The rois_batch_size and imgs batch_size must be the same."); - std::vector rois_lod_(rois_batch_size); - memory::Copy(cplace, rois_lod_.data(), gplace, rois_lod->data(), - sizeof(int64_t) * rois_batch_size, 0); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) { + std::vector rois_num_list(rois_batch_size); + memory::Copy(cplace, rois_num_list.data(), gplace, + rois_num_t->data(), sizeof(int) * rois_batch_size, 0); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_list[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_list[n]; } } else { auto rois_lod = rois->lod().back(); @@ -206,7 +208,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* rois_lod = ctx.Input("RoisLod"); + auto* rois_lod = ctx.Input("RoisNum"); auto* argmax = ctx.Input("Argmax"); auto* out_grad = ctx.Input(framework::GradVarName("Out")); @@ -229,17 +231,18 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.cuda_device_context(); auto gplace = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); - if (ctx.HasInput("RoisLod")) { - auto* rois_lod = ctx.Input("RoisLod"); - int rois_batch_size = rois_lod->numel(); - std::vector rois_lod_(rois_batch_size); - memory::Copy(cplace, rois_lod_.data(), gplace, - rois_lod->data(), - sizeof(int64_t) * rois_batch_size, 0); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (size_t i = rois_lod_[n]; i < rois_lod_[n + 1]; ++i) { + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + int rois_batch_size = rois_num_t->numel(); + std::vector rois_num_list(rois_batch_size); + memory::Copy(cplace, rois_num_list.data(), gplace, + rois_num_t->data(), sizeof(int) * rois_batch_size, 0); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_list[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_list[n]; } } else { auto rois_lod = rois->lod().back(); diff --git a/paddle/fluid/operators/roi_pool_op.h b/paddle/fluid/operators/roi_pool_op.h index 145b170dedf0613328223526b0a40a3c064f3028..40de6d0cf6abbcc4a1505cb6eb121ca70813c780 100644 --- a/paddle/fluid/operators/roi_pool_op.h +++ b/paddle/fluid/operators/roi_pool_op.h @@ -58,18 +58,20 @@ class CPUROIPoolOpKernel : public framework::OpKernel { roi_batch_id_list.mutable_data(ctx.GetPlace()); int rois_batch_size; - if (ctx.HasInput("RoisLod")) { - auto* rois_lod_t = ctx.Input("RoisLod"); - rois_batch_size = rois_lod_t->numel(); + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); PADDLE_ENFORCE_EQ( - rois_batch_size - 1, batch_size, + rois_batch_size, batch_size, platform::errors::InvalidArgument("The rois_batch_size and imgs " "batch_size must be the same.")); - auto* rois_lod = rois_lod_t->data(); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + auto* rois_num_data = rois_num_t->data(); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_data[n]; } } else { auto rois_lod = rois->lod().back(); @@ -185,14 +187,16 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel { roi_batch_id_list.mutable_data(ctx.GetPlace()); int rois_batch_size; - if (ctx.HasInput("RoisLod")) { - auto* rois_lod_t = ctx.Input("RoisLod"); - rois_batch_size = rois_lod_t->numel(); - auto* rois_lod = rois_lod_t->data(); - for (int n = 0; n < rois_batch_size - 1; ++n) { - for (int i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + auto* rois_num_data = rois_num_t->data(); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { roi_batch_id_data[i] = n; } + start += rois_num_data[n]; } } else { auto rois_lod = rois->lod().back(); diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 256faf04ea6de5835f22113537caac49ca1dbab4..178ecaff7e8d2e575cd64927fe4e39c773b2cb99 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -43,6 +43,11 @@ std::map> op_ins_map = { {"nll_loss", {"X", "Label", "Weight"}}, {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}}, {"gather", {"X", "Index", "Axis"}}, + {"roi_pool", {"X", "ROIs", "RoisNum"}}, + {"roi_align", {"X", "ROIs", "RoisNum"}}, + {"collect_fpn_proposals", + {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}}, + {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -63,6 +68,10 @@ std::map> op_outs_map = { {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", "ReserveSpace"}}, {"unique", {"Out", "Index", "Indices", "Counts"}}, + {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, + {"collect_fpn_proposals", {"FpnRois", "RoisNum"}}, + {"distribute_fpn_proposals", + {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, }; // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 11932ce728889efde2c6cf5ecd8b8252e1192c19..1616e237092037303073236809a16a72651568cd 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -111,6 +111,8 @@ goto:success :CASE_wincheck_openblas set WITH_MKL=OFF set WITH_GPU=ON +rem Temporarily turn off WITH_INFERENCE_API_TEST on GPU due to compile hang +set WITH_INFERENCE_API_TEST=OFF call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -242,7 +244,7 @@ dir %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin dir %THIRD_PARTY_PATH:/=\%\install\warpctc\bin set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% -ctest.exe --output-on-failure -C Release -j 8 --repeat until-pass:4 +ctest.exe --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 goto:eof :unit_test_error diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index f394a792e3a5750b13da65f50d84e3a0f516f617..7dca7b9cb88a37fb7954c3e8059a7ae09e2ef3a6 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -13,6 +13,10 @@ from paddle import fluid from .meta_optimizer_base import MetaOptimizerBase +from paddle.fluid import core +import subprocess +import re +import platform class ParameterServerOptimizer(MetaOptimizerBase): @@ -28,6 +32,9 @@ class ParameterServerOptimizer(MetaOptimizerBase): def _can_apply(self): if self.role_maker._is_collective: return False + if self.user_defined_strategy.auto == True: + return True + k_steps = self.user_defined_strategy.a_sync_configs["k_steps"] return True if k_steps >= 0 else False @@ -127,6 +134,105 @@ class ParameterServerOptimizer(MetaOptimizerBase): return _main, _startup + def _try_auto_apply_geo(self, program, compiled_config): + def get_sys_free_mem(): + plat = platform.system() + if platform.system() == "Darwin": + vm = subprocess.Popen( + ['vm_stat'], stdout=subprocess.PIPE).communicate()[0] + # Process vm_stat + vmLines = vm.split('\n') + sep = re.compile(':[\s]+') + vmStats = {} + for row in range(1, len(vmLines) - 2): + rowText = vmLines[row].strip() + rowElements = sep.split(rowText) + vmStats[(rowElements[0] + )] = int(rowElements[1].strip('\.')) * 4096 + return vmStats["Pages free"] + elif platform.system() == "Linux": + mems = {} + with open('/proc/meminfo', 'rb') as f: + for line in f: + fields = line.split() + mems[fields[0]] = int(fields[1]) * 1024 + free = mems[b'MemFree:'] + return free + else: + raise ValueError( + "%s platform is unsupported is parameter server optimizer" % + (platform.system())) + + if self.user_defined_strategy.auto == False: + return + + a_sync_configs = self.user_defined_strategy.a_sync_configs + if a_sync_configs["k_steps"] >= 0: + return + + self.user_defined_strategy.a_sync = True + if not isinstance(self.inner_opt, fluid.optimizer.SGDOptimizer): + # auto async + a_sync_configs["k_steps"] = 0 + self.user_defined_strategy.a_sync_configs = a_sync_configs + return + + from paddle.fluid.incubate.fleet.parameter_server.ir.vars_metatools import dtype_to_size + free = get_sys_free_mem() + + param_grad_pairs = compiled_config.origin_sparse_pairs + compiled_config.origin_dense_pairs + processed_var_names = set(["@EMPTY@"]) + + param_memory_size = 0 + for param_grad_pair in param_grad_pairs: + param, grad = param_grad_pair + param_memory_size += param.m_size + processed_var_names.add(param.name) + + upper_mem_use = param_memory_size * 5.0 + + program_tmp_vars = dict() + batch_size = 1024 + for op in program.global_block().ops: + for var_name in op.output_arg_names: + if var_name in processed_var_names: + continue + processed_var_names.add(var_name) + var = program.global_block().vars[var_name] + + if var.desc.type() != core.VarDesc.VarType.LOD_TENSOR: + continue + + data_count = 1 + neg_dim_count = 0 + for x in var.shape: + if x < 0: + if neg_dim_count >= 1: + raise ValueError( + "Var %s has more than one negative dim." % + (var_name)) + neg_dim_count += 1 + data_count *= (-x) + else: + data_count *= x + program_tmp_vars[var_name] = (data_count, neg_dim_count, + dtype_to_size[var.dtype]) + + for varname in program_tmp_vars: + data_count, neg_dim_count, type_size = program_tmp_vars[varname] + if neg_dim_count == 1: + data_count *= batch_size + var_memory = data_count * type_size + upper_mem_use += var_memory + + if upper_mem_use < free: + # auto geo + a_sync_configs["k_steps"] = 800 + else: + # auto async + a_sync_configs["k_steps"] = 0 + self.user_defined_strategy.a_sync_configs = a_sync_configs + def minimize_impl(self, loss, startup_program=None, @@ -134,7 +240,6 @@ class ParameterServerOptimizer(MetaOptimizerBase): no_grad_set=None): self.inner_opt.minimize(loss, startup_program, parameter_list, no_grad_set) - strategy = self._get_distributed_strategy() _origin_main_program = loss.block.program _origin_startup_program = startup_program @@ -142,7 +247,12 @@ class ParameterServerOptimizer(MetaOptimizerBase): compiled_config = public.CompileTimeStrategy(_origin_main_program, _origin_startup_program, - strategy, self.role_maker) + None, self.role_maker) + + self._try_auto_apply_geo(_origin_main_program, compiled_config) + + strategy = self._get_distributed_strategy() + compiled_config.strategy = strategy if self.role_maker.is_worker() or self.role_maker._is_heter_worker(): main_program, startup_program = self._build_trainer_programs( diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index 0e187d4174cd5cca65f79e4ab84b4cc32ecefd21..7b564b3f837c001673bdd272ba60edf31cde21fb 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -37,7 +37,7 @@ import warnings import inspect import numpy as np - +import paddle from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import utils from ... import unique_name @@ -56,7 +56,8 @@ __all__ = [ 'match_matrix_tensor', 'tree_conv', 'fused_embedding_seq_pool', 'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat', 'sparse_embedding', 'partial_sum', 'tdm_child', 'rank_attention', - 'tdm_sampler', 'batch_fc', '_pull_box_extended_sparse', 'bilateral_slice' + 'tdm_sampler', 'batch_fc', '_pull_box_extended_sparse', 'bilateral_slice', + 'correlation' ] @@ -1546,3 +1547,81 @@ def bilateral_slice(x, guide, grid, has_offset, name=None): attrs={'has_offset': has_offset}, outputs={'Out': out}) return out + + +def correlation(x, + y, + pad_size, + kernel_size, + max_displacement, + stride1, + stride2, + corr_type_multiply=1): + """ + + This operation compute correlation of two tensor. + For more information of correlation, please refer to PWC-Net: + CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume + _ + + Args: + x(Tensor): The input x is 4-D Tensor with shape [N, C, H, W]. The data type is float32 and float64. + y(Tensor): The input y is 4-D Tensor with shape [N, C, H, W]. The data type is float32 and float64. + pad_size(int): Pad size. The data type is int. + max_displacement(int): Max displacement. The data type is int. + stride1(int): stride size of x. The data type is int. + stride2(int): stride size of y. The data type is int. + corr_type_multiply(int, optional): The type of multiply. The data type is int. Default: 1. + + Returns: + Tensor: The data type is same as input tensor. + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + + x1 = fluid.layers.data(name='x1', + shape=x_shape, + dtype=x_type, + append_batch_size=False) + x2 = fluid.layers.data(name='x2', + shape=x_shape, + dtype=x_type, + append_batch_size=False) + + + out = fluid.contrib.correlation( + x1, + x2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + + """ + + helper = LayerHelper("correlation", **locals()) + output = helper.create_variable_for_type_inference(dtype=x.dtype) + if paddle.fluid.in_dygraph_mode(): + attrs = ("pad_size", pad_size, "kernel_size", kernel_size, + "max_displacement", max_displacement, "stride1", stride1, + "stride2", stride2, "corr_type_multiply", corr_type_multiply) + output = getattr(core.ops, "correlation")(x, y, *attrs) + else: + helper.append_op( + type="correlation", + inputs={"Input1": x, + "Input2": y}, + attrs={ + "pad_size": pad_size, + "kernel_size": kernel_size, + "max_displacement": max_displacement, + "stride1": stride1, + "stride2": stride2, + "corr_type_multiply": corr_type_multiply + }, + outputs={"Output": output}) + return output diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py new file mode 100644 index 0000000000000000000000000000000000000000..7fcef4dbcd1efd3655b6339ed5ec880d8cd33fc0 --- /dev/null +++ b/python/paddle/fluid/contrib/tests/test_correlation.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.dygraph.base import to_variable + + +def corr(x_1, + x_2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1, + corr_multiply=1): + K = kernel_size + + rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size), + (pad_size, pad_size)), + mode='constant') + rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size), + (pad_size, pad_size)), + mode='constant') + rinput1 = np.transpose(rinput1, (0, 2, 3, 1)) + rinput2 = np.transpose(rinput2, (0, 2, 3, 1)) + B = int(rinput1.shape[0]) + H = int(x_1.shape[2]) + W = int(x_2.shape[3]) + d = max_displacement + D = 2 * d + 1 + output = np.zeros((B, D * D, H, W), dtype=np.float32) + + for b in range(B): + for i in range(H): + for j in range(W): + for k in range(-d, d + 1): + for l in range(-d, d + 1): + x1_index = i + pad_size + y1_index = j + pad_size + x2_index = x1_index + k + y2_index = y1_index + l + output[b, l + d + D * (k + d), i, j] = np.mean( + rinput1[b, x1_index:x1_index + K, y1_index:y1_index + + K] * rinput2[b, x2_index:x2_index + K, + y2_index:y2_index + K]) + + return output + + +class TestCorrelationOp(unittest.TestCase): + def test_check_output(self): + if not fluid.core.is_compiled_with_cuda(): + return + np.random.seed(13) + np.set_printoptions(threshold=np.inf) + x_shape = (2, 10, 3, 3) + x_type = 'float32' + x1 = fluid.layers.data( + name='x1', + shape=x_shape, + dtype=x_type, + append_batch_size=False, + stop_gradient=False) + x2 = fluid.layers.data( + name='x2', + shape=x_shape, + dtype=x_type, + append_batch_size=False, + stop_gradient=False) + + x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) + x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) + out_np = corr( + x1_np, + x2_np, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + + out = fluid.contrib.correlation( + x1, + x2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + + loss = fluid.layers.reduce_mean(out) + optimizer = fluid.optimizer.Momentum(0.0001, 0.9) + optimizer.minimize(loss) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + res = exe.run(feed={'x1': x1_np, + 'x2': x2_np}, + fetch_list=[out.name, loss.name]) + + self.assertTrue(np.allclose(res[0], out_np)) + + +class Net(fluid.dygraph.Layer): + def __init__(self, name_scope): + super(Net, self).__init__(name_scope) + + def forward(self, x1, x2): + y = fluid.contrib.correlation( + x1, + x2, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + return y + + +class TestCorrelationOpDyGraph(unittest.TestCase): + def test_check_output(self): + if not fluid.core.is_compiled_with_cuda(): + return + np.random.seed(13) + np.set_printoptions(threshold=np.inf) + x_shape = (2, 10, 3, 3) + x_type = 'float32' + place = fluid.CUDAPlace(0) + with fluid.dygraph.guard(place): + x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) + x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) + out_np = corr( + x1_np, + x2_np, + pad_size=4, + kernel_size=1, + max_displacement=4, + stride1=1, + stride2=1) + + x1 = to_variable(x1_np) + x2 = to_variable(x2_np) + corr_pd = Net('corr_pd') + y = corr_pd(x1, x2) + out = y.numpy() + self.assertTrue(np.allclose(out, out_np)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py b/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py index 90e38bd98863ff62174bd569a483b11984480b5a..37ce8b0a152ff8e258e8aee2a54ed7215f77c146 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/function_spec.py @@ -135,6 +135,11 @@ class FunctionSpec(object): input_with_spec = pack_sequence_as(args, input_with_spec) + # If without specificing name in input_spec, add default name + # according to argument name from decorated function. + input_with_spec = replace_spec_empty_name(self._arg_names, + input_with_spec) + return input_with_spec @switch_to_static_graph @@ -309,3 +314,61 @@ def convert_to_input_spec(inputs, input_spec): raise TypeError( "The type(input_spec) should be a `InputSpec` or dict/list/tuple of it, but received {}.". type_name(input_spec)) + + +def replace_spec_empty_name(args_name, input_with_spec): + """ + Adds default name according to argument name from decorated function + if without specificing InputSpec.name + + The naming rule are as followed: + 1. If InputSpec.name is not None, do nothing. + 2. If each argument `x` corresponds to an InputSpec, using the argument name like `x` + 3. If the arguments `inputs` corresponds to a list(InputSpec), using name like `inputs_0`, `inputs_1` + 4. If the arguments `input_dic` corresponds to a dict(InputSpec), using key as name. + + For example: + + # case 1: foo(x, y) + foo = to_static(foo, input_spec=[InputSpec([None, 10]), InputSpec([None])]) + print([in_var.name for in_var in foo.inputs]) # [x, y] + + # case 2: foo(inputs) where inputs is a list + foo = to_static(foo, input_spec=[[InputSpec([None, 10]), InputSpec([None])]]) + print([in_var.name for in_var in foo.inputs]) # [inputs_0, inputs_1] + + # case 3: foo(inputs) where inputs is a dict + foo = to_static(foo, input_spec=[{'x': InputSpec([None, 10]), 'y': InputSpec([None])}]) + print([in_var.name for in_var in foo.inputs]) # [x, y] + """ + input_with_spec = list(input_with_spec) + candidate_arg_names = args_name[:len(input_with_spec)] + + for i, arg_name in enumerate(candidate_arg_names): + input_spec = input_with_spec[i] + input_with_spec[i] = _replace_spec_name(arg_name, input_spec) + + return input_with_spec + + +def _replace_spec_name(name, input_spec): + """ + Replaces InputSpec.name with given `name` while not specificing it. + """ + if isinstance(input_spec, paddle.static.InputSpec): + if input_spec.name is None: + input_spec.name = name + return input_spec + elif isinstance(input_spec, (list, tuple)): + processed_specs = [] + for i, spec in enumerate(input_spec): + new_name = "{}_{}".format(name, i) + processed_specs.append(_replace_spec_name(new_name, spec)) + return processed_specs + elif isinstance(input_spec, dict): + processed_specs = {} + for key, spec in six.iteritems(input_spec): + processed_specs[key] = _replace_spec_name(key, spec) + return processed_specs + else: + return input_spec diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index d520fe61888cf3b11efc61d67ce566a3407dc6ff..ad2088116243e3a0a75fb43b9bb34b19456c84de 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -37,7 +37,7 @@ from paddle.fluid.wrapped_decorator import wrap_decorator __all__ = [ 'TracedLayer', 'declarative', 'dygraph_to_static_func', 'set_code_level', - 'set_verbosity' + 'set_verbosity', 'save', 'load', 'SaveLoadConfig' ] diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 5281df9ead10acea5ae8656dcc4a0eed14fb3e83..797b32f5d4768af59fa4e6aceb75e4b6d9029d91 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -217,7 +217,7 @@ def _dygraph_not_support_(func): def _dygraph_only_(func): def __impl__(*args, **kwargs): assert in_dygraph_mode( - ), "We Only support %s in imperative mode, please use fluid.dygraph.guard() as context to run it in imperative Mode" % func.__name__ + ), "We Only support %s in dynamic mode, please call 'paddle.disable_static()' to enter dynamic mode." % func.__name__ return func(*args, **kwargs) return __impl__ diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py index c8f3643b25be0780bbdfd1668d849ab00ece355c..c80b4a800bd149cced13c0e25322cd03ef94e468 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/vars_metatools.py @@ -12,9 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function +from functools import reduce + from paddle.fluid.framework import Variable from paddle.fluid import core +dtype_to_size = { + core.VarDesc.VarType.FP16: 2, + core.VarDesc.VarType.FP32: 4, + core.VarDesc.VarType.FP64: 8, + core.VarDesc.VarType.INT16: 2, + core.VarDesc.VarType.INT32: 4, + core.VarDesc.VarType.INT64: 8, + core.VarDesc.VarType.BOOL: 1, + core.VarDesc.VarType.UINT8: 1, +} + class VarBlock: def __init__(self, varname, offset, size): @@ -51,11 +64,14 @@ class VarStruct(object): self.type = type self.lod_level = lod_level self.persistable = persistable + self.m_size = 1 + self.m_size = reduce(lambda x, y: x * y, shape) + self.m_size *= dtype_to_size[dtype] def __str__(self): - return "N: {}, S: {}, D: {}, T: {}, LL: {}, P: {}".format( + return "N: {}, S: {}, D: {}, T: {}, LL: {}, P: {}, M: {}".format( self.name, self.shape, self.dtype, self.type, self.lod_level, - self.persistable) + self.persistable, self.m_size) class VarDistributed(object): diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index ea6abe2d335e6669b27ba278c0faaca62ca0fdbb..bf87d1fc5a947e48845a3783fd71922641e28819 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -20,7 +20,8 @@ from __future__ import print_function from .layer_function_generator import generate_layer_fn from .layer_function_generator import autodoc, templatedoc from ..layer_helper import LayerHelper -from ..framework import Variable +from ..framework import Variable, in_dygraph_mode +from .. import core from .loss import softmax_with_cross_entropy from . import tensor from . import nn @@ -2893,8 +2894,8 @@ def generate_proposals(scores, nms_thresh=0.5, min_size=0.1, eta=1.0, - name=None, - return_rois_num=False): + return_rois_num=False, + name=None): """ :alias_main: paddle.nn.functional.generate_proposals :alias: paddle.nn.functional.generate_proposals,paddle.nn.functional.vision.generate_proposals @@ -2949,6 +2950,10 @@ def generate_proposals(scores, num of each image in one batch. The N is the image's num. For example, the tensor has values [4,5] that represents the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model. 'False' by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: tuple: A tuple with format ``(rpn_rois, rpn_roi_probs)``. @@ -2969,6 +2974,14 @@ def generate_proposals(scores, im_info, anchors, variances) """ + if in_dygraph_mode(): + assert return_rois_num, "return_rois_num should be True in dygraph mode." + attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n, + 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta) + rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals( + scores, bbox_deltas, im_info, anchors, variances, *attrs) + return rpn_rois, rpn_roi_probs, rpn_rois_num + helper = LayerHelper('generate_proposals', **locals()) check_variable_and_dtype(scores, 'scores', ['float32'], @@ -2986,7 +2999,14 @@ def generate_proposals(scores, dtype=bbox_deltas.dtype) rpn_roi_probs = helper.create_variable_for_type_inference( dtype=scores.dtype) - rpn_rois_lod = helper.create_variable_for_type_inference(dtype='int32') + outputs = { + 'RpnRois': rpn_rois, + 'RpnRoiProbs': rpn_roi_probs, + } + if return_rois_num: + rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32') + rpn_rois_num.stop_gradient = True + outputs['RpnRoisNum'] = rpn_rois_num helper.append_op( type="generate_proposals", @@ -3004,17 +3024,12 @@ def generate_proposals(scores, 'min_size': min_size, 'eta': eta }, - outputs={ - 'RpnRois': rpn_rois, - 'RpnRoiProbs': rpn_roi_probs, - 'RpnRoisLod': rpn_rois_lod - }) + outputs=outputs) rpn_rois.stop_gradient = True rpn_roi_probs.stop_gradient = True - rpn_rois_lod.stop_gradient = True if return_rois_num: - return rpn_rois, rpn_roi_probs, rpn_rois_lod + return rpn_rois, rpn_roi_probs, rpn_rois_num else: return rpn_rois, rpn_roi_probs @@ -3656,6 +3671,7 @@ def distribute_fpn_proposals(fpn_rois, max_level, refer_level, refer_scale, + rois_num=None, name=None): """ :alias_main: paddle.nn.functional.distribute_fpn_proposals @@ -3687,6 +3703,11 @@ def distribute_fpn_proposals(fpn_rois, come from. refer_level(int32): The referring level of FPN layer with specified scale. refer_scale(int32): The referring scale of FPN layer with specified level. + rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image. + The shape is [B] and data type is int32. B is the number of images. + If it is not None then return a list of 1-D Tensor. Each element + is the output RoIs' number of each image on the corresponding level + and the shape is [B]. None by default. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -3702,6 +3723,10 @@ def distribute_fpn_proposals(fpn_rois, the number of total rois. The data type is int32. It is used to restore the order of fpn_rois. + rois_num_per_level(List): A list of 1-D Tensor and each Tensor is + the RoIs' number in each image on the corresponding level. The shape + is [B] and data type of int32. B is the number of images + Examples: .. code-block:: python @@ -3716,26 +3741,52 @@ def distribute_fpn_proposals(fpn_rois, refer_level=4, refer_scale=224) """ + num_lvl = max_level - min_level + 1 + + if in_dygraph_mode(): + assert rois_num is not None, "rois_num should not be None in dygraph mode." + attrs = ('min_level', min_level, 'max_level', max_level, 'refer_level', + refer_level, 'refer_scale', refer_scale) + multi_rois, restore_ind, rois_num_per_level = core.ops.distribute_fpn_proposals( + fpn_rois, rois_num, num_lvl, num_lvl, *attrs) + return multi_rois, restore_ind, rois_num_per_level + check_variable_and_dtype(fpn_rois, 'fpn_rois', ['float32', 'float64'], 'distribute_fpn_proposals') helper = LayerHelper('distribute_fpn_proposals', **locals()) dtype = helper.input_dtype('fpn_rois') - num_lvl = max_level - min_level + 1 multi_rois = [ helper.create_variable_for_type_inference(dtype) for i in range(num_lvl) ] + restore_ind = helper.create_variable_for_type_inference(dtype='int32') + + inputs = {'FpnRois': fpn_rois} + outputs = { + 'MultiFpnRois': multi_rois, + 'RestoreIndex': restore_ind, + } + + if rois_num is not None: + inputs['RoisNum'] = rois_num + rois_num_per_level = [ + helper.create_variable_for_type_inference(dtype='int32') + for i in range(num_lvl) + ] + outputs['MultiLevelRoIsNum'] = rois_num_per_level + helper.append_op( type='distribute_fpn_proposals', - inputs={'FpnRois': fpn_rois}, - outputs={'MultiFpnRois': multi_rois, - 'RestoreIndex': restore_ind}, + inputs=inputs, + outputs=outputs, attrs={ 'min_level': min_level, 'max_level': max_level, 'refer_level': refer_level, 'refer_scale': refer_scale }) + if rois_num is not None: + return multi_rois, restore_ind, rois_num_per_level return multi_rois, restore_ind @@ -3820,6 +3871,7 @@ def collect_fpn_proposals(multi_rois, min_level, max_level, post_nms_top_n, + rois_num_per_level=None, name=None): """ :alias_main: paddle.nn.functional.collect_fpn_proposals @@ -3846,6 +3898,12 @@ def collect_fpn_proposals(multi_rois, min_level(int): The lowest level of FPN layer to collect max_level(int): The highest level of FPN layer to collect post_nms_top_n(int): The number of selected RoIs + rois_num_per_level(list, optional): The List of RoIs' numbers. + Each element is 1-D Tensor which contains the RoIs' number of each + image on each level and the shape is [B] and data type is + int32, B is the number of images. If it is not None then return + a 1-D Tensor contains the output RoIs' number of each image and + the shape is [B]. Default: None name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -3856,6 +3914,9 @@ def collect_fpn_proposals(multi_rois, fpn_rois(Variable): 2-D LoDTensor with shape [N, 4] and data type is float32 or float64. Selected RoIs. + rois_num(Tensor): 1-D Tensor contains the RoIs's number of each + image. The shape is [B] and data type is int32. B is the number of + images. Examples: .. code-block:: python @@ -3879,21 +3940,38 @@ def collect_fpn_proposals(multi_rois, """ check_type(multi_rois, 'multi_rois', list, 'collect_fpn_proposals') check_type(multi_scores, 'multi_scores', list, 'collect_fpn_proposals') + num_lvl = max_level - min_level + 1 + input_rois = multi_rois[:num_lvl] + input_scores = multi_scores[:num_lvl] + + if in_dygraph_mode(): + assert rois_num_per_level is not None, "rois_num_per_level should not be None in dygraph mode." + attrs = ('post_nms_topN', post_nms_top_n) + output_rois, rois_num = core.ops.collect_fpn_proposals( + input_rois, input_scores, rois_num_per_level, *attrs) + helper = LayerHelper('collect_fpn_proposals', **locals()) dtype = helper.input_dtype('multi_rois') check_dtype(dtype, 'multi_rois', ['float32', 'float64'], 'collect_fpn_proposals') - num_lvl = max_level - min_level + 1 - input_rois = multi_rois[:num_lvl] - input_scores = multi_scores[:num_lvl] output_rois = helper.create_variable_for_type_inference(dtype) output_rois.stop_gradient = True + + inputs = { + 'MultiLevelRois': input_rois, + 'MultiLevelScores': input_scores, + } + outputs = {'FpnRois': output_rois} + if rois_num_per_level is not None: + inputs['MultiLevelRoIsNum'] = rois_num_per_level + rois_num = helper.create_variable_for_type_inference(dtype='int32') + rois_num.stop_gradient = True + outputs['RoisNum'] = rois_num helper.append_op( type='collect_fpn_proposals', - inputs={ - 'MultiLevelRois': input_rois, - 'MultiLevelScores': input_scores - }, - outputs={'FpnRois': output_rois}, + inputs=inputs, + outputs=outputs, attrs={'post_nms_topN': post_nms_top_n}) + if rois_num_per_level is not None: + return output_rois, rois_num return output_rois diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 70f48e82fdf6564698098cbdf6e1290f7fba0e18..5a14b9fdc7b6d963f77eefc29476bc332f3938df 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6862,7 +6862,8 @@ def roi_pool(input, pooled_height=1, pooled_width=1, spatial_scale=1.0, - rois_lod=None): + rois_num=None, + name=None): """ :alias_main: paddle.nn.functional.roi_pool :alias: paddle.nn.functional.roi_pool,paddle.nn.functional.vision.roi_pool @@ -6882,10 +6883,14 @@ def roi_pool(input, Args: input (Variable): Input feature, 4D-Tensor with the shape of [N,C,H,W], where N is the batch size, C is the input channel, H is Height, W is weight. The data type is float32 or float64. rois (Variable): ROIs (Regions of Interest) to pool over. 2D-LoDTensor with the shape of [num_rois,4], the lod level is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, and (x2, y2) is the bottom right coordinates. - rois_lod (Variable): The lod info of rois. Default: None pooled_height (int, optional): The pooled output height, data type is int32. Default: 1 pooled_width (int, optional): The pooled output height, data type is int32. Default: 1 spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0 + rois_num (Tensor): The number of RoIs in each image. Default: None + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: Variable: The pooled feature, 4D-Tensor with the shape of [num_rois, C, pooled_height, pooled_width]. @@ -6905,11 +6910,11 @@ def roi_pool(input, input_data = np.array([i for i in range(1,17)]).reshape(1,1,4,4).astype(DATATYPE) roi_data =fluid.create_lod_tensor(np.array([[1., 1., 2., 2.], [1.5, 1.5, 3., 3.]]).astype(DATATYPE),[[2]], place) - rois_lod_data = np.array([0, 2]) + rois_num_data = np.array([2]).astype('int32') x = fluid.data(name='input', shape=[None,1,4,4], dtype=DATATYPE) rois = fluid.data(name='roi', shape=[None,4], dtype=DATATYPE) - rois_lod = fluid.data(name='rois_lod', shape=[None], dtype='int64') + rois_num = fluid.data(name='rois_num', shape=[None], dtype='int32') pool_out = fluid.layers.roi_pool( input=x, @@ -6917,24 +6922,36 @@ def roi_pool(input, pooled_height=1, pooled_width=1, spatial_scale=1.0, - rois_lod=rois_lod) + rois_num=rois_num) exe = fluid.Executor(place) - out, = exe.run(feed={'input':input_data ,'roi':roi_data, 'rois_lod': rois_lod_data}, fetch_list=[pool_out.name]) + out, = exe.run(feed={'input':input_data ,'roi':roi_data, 'rois_num': rois_num_data}, fetch_list=[pool_out.name]) print(out) #array([[[[11.]]], [[[16.]]]], dtype=float32) print(np.array(out).shape) # (2, 1, 1, 1) """ + if in_dygraph_mode(): + assert rois_num is not None, "rois_num should not be None in dygraph mode." + pool_out, argmaxes = core.ops.roi_pool( + input, rois, rois_num, "pooled_height", pooled_height, + "pooled_width", pooled_width, "spatial_scale", spatial_scale) + return pool_out, argmaxes + check_variable_and_dtype(input, 'input', ['float32'], 'roi_pool') check_variable_and_dtype(rois, 'rois', ['float32'], 'roi_pool') helper = LayerHelper('roi_pool', **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) argmaxes = helper.create_variable_for_type_inference(dtype='int32') + + inputs = { + "X": input, + "ROIs": rois, + } + if rois_num is not None: + inputs['RoisNum'] = rois_num helper.append_op( type="roi_pool", - inputs={"X": input, - "ROIs": rois, - "RoisLod": rois_lod}, + inputs=inputs, outputs={"Out": pool_out, "Argmax": argmaxes}, attrs={ @@ -6952,8 +6969,8 @@ def roi_align(input, pooled_width=1, spatial_scale=1.0, sampling_ratio=-1, - name=None, - rois_lod=None): + rois_num=None, + name=None): """ :alias_main: paddle.nn.functional.roi_align :alias: paddle.nn.functional.roi_align,paddle.nn.functional.vision.roi_align @@ -6968,11 +6985,11 @@ def roi_align(input, data type is float32 or float64. Given as [[x1, y1, x2, y2], ...], (x1, y1) is the top left coordinates, and (x2, y2) is the bottom right coordinates. - rois_lod (Variable): The lod info of rois. Default: None pooled_height (int32, optional): ${pooled_height_comment} Default: 1 pooled_width (int32, optional): ${pooled_width_comment} Default: 1 spatial_scale (float32, optional): ${spatial_scale_comment} Default: 1.0 sampling_ratio(int32, optional): ${sampling_ratio_comment} Default: -1 + rois_num (Tensor): The number of RoIs in each image. Default: None name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -6991,26 +7008,38 @@ def roi_align(input, name='data', shape=[None, 256, 32, 32], dtype='float32') rois = fluid.data( name='rois', shape=[None, 4], dtype='float32') - rois_lod = fluid.data(name='rois_lod', shape=[None], dtype='int64') + rois_num = fluid.data(name='rois_num', shape=[None], dtype='int32') align_out = fluid.layers.roi_align(input=x, rois=rois, pooled_height=7, pooled_width=7, spatial_scale=0.5, sampling_ratio=-1, - rois_lod=rois_lod) + rois_num=rois_num) """ + if in_dygraph_mode(): + assert rois_num is not None, "rois_num should not be None in dygraph mode." + align_out = core.ops.roi_align( + input, rois, rois_num, "pooled_height", pooled_height, + "pooled_width", pooled_width, "spatial_scale", spatial_scale, + "sampling_ratio", sampling_ratio) + return align_out + check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'roi_align') check_variable_and_dtype(rois, 'rois', ['float32', 'float64'], 'roi_align') helper = LayerHelper('roi_align', **locals()) dtype = helper.input_dtype() align_out = helper.create_variable_for_type_inference(dtype) + inputs = { + "X": input, + "ROIs": rois, + } + if rois_num is not None: + inputs['RoisNum'] = rois_num helper.append_op( type="roi_align", - inputs={"X": input, - "ROIs": rois, - "RoisLod": rois_lod}, + inputs=inputs, outputs={"Out": align_out}, attrs={ "pooled_height": pooled_height, @@ -10850,8 +10879,7 @@ def slice(input, axes, starts, ends): result = [ [2, 3, 4], ] # result = data[0:1, 1:4] Args: input (Variable): A ``Tensor`` or ``LoDTensor`` . The data type is ``float16``, ``float32``, ``float64``, ``int32`` or ``int64``. - axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to. - It's optional. If it is not provides, it will be treated as :math:`[0,1,...,len(starts)-1]`. + axes (list|tuple): The data type is ``int32`` . Axes that `starts` and `ends` apply to . starts (list|tuple|Variable): The data type is ``int32`` . If ``starts`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``starts`` is an Variable, it should be an 1-D Tensor. It represents starting indices of corresponding axis in ``axes``. diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index a90551c1b7b4fd45ae9a0e1cfa225a87db811295..89acfc6075be0b625da04d187cd46dd47ac699c9 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -36,7 +36,7 @@ __all__ = [ 'tensor_array_to_tensor', 'concat', 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite', - 'range', 'linspace', 'zeros_like', 'ones_like', 'diag', 'eye' + 'range', 'linspace', 'zeros_like', 'ones_like', 'diag', 'eye', 'triu' ] @@ -1725,3 +1725,9 @@ def ones_like(x, out=None): attrs={'value': 1.0}, outputs={'Out': [out]}) return out + + +@deprecated(since="2.0.0", update_to="paddle.triu") +def triu(input, diagonal=0, name=None): + import paddle + return paddle.tensor.triu(x=input, diagonal=diagonal, name=name) diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index a1526934f4aa1415c529e79bfa8dea6c0754bea9..425c4e3c7e38cff2f892eff28428082b57b3727d 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -19,6 +19,57 @@ import paddle.fluid.layers as layers from paddle.fluid.layers import detection from paddle.fluid.framework import Program, program_guard import unittest +import contextlib +import numpy as np +from unittests.test_imperative_base import new_program_scope +from paddle.fluid.dygraph import base +from paddle.fluid import core + + +class LayerTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.seed = 111 + + @classmethod + def tearDownClass(cls): + pass + + def _get_place(self, force_to_use_cpu=False): + # this option for ops that only have cpu kernel + if force_to_use_cpu: + return core.CPUPlace() + else: + if core.is_compiled_with_cuda(): + return core.CUDAPlace(0) + return core.CPUPlace() + + @contextlib.contextmanager + def static_graph(self): + with new_program_scope(): + fluid.default_startup_program().random_seed = self.seed + fluid.default_main_program().random_seed = self.seed + yield + + def get_static_graph_result(self, + feed, + fetch_list, + with_lod=False, + force_to_use_cpu=False): + exe = fluid.Executor(self._get_place(force_to_use_cpu)) + exe.run(fluid.default_startup_program()) + return exe.run(fluid.default_main_program(), + feed=feed, + fetch_list=fetch_list, + return_numpy=(not with_lod)) + + @contextlib.contextmanager + def dynamic_graph(self, force_to_use_cpu=False): + with fluid.dygraph.guard( + self._get_place(force_to_use_cpu=force_to_use_cpu)): + fluid.default_startup_program().random_seed = self.seed + fluid.default_main_program().random_seed = self.seed + yield class TestDetection(unittest.TestCase): @@ -481,45 +532,67 @@ class TestRpnTargetAssign(unittest.TestCase): print(str(program)) -class TestGenerateProposals(unittest.TestCase): +class TestGenerateProposals(LayerTest): def test_generate_proposals(self): - program = Program() - with program_guard(program): - data_shape = [20, 64, 64] - images = fluid.layers.data( - name='images', shape=data_shape, dtype='float32') - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32') - anchors, variances = fluid.layers.anchor_generator( - name='anchor_generator', - input=images, - anchor_sizes=[32, 64], - aspect_ratios=[1.0], - variance=[0.1, 0.1, 0.2, 0.2], - stride=[16.0, 16.0], - offset=0.5) - num_anchors = anchors.shape[2] - scores = fluid.layers.data( - name='scores', shape=[num_anchors, 8, 8], dtype='float32') - bbox_deltas = fluid.layers.data( - name='bbox_deltas', - shape=[num_anchors * 4, 8, 8], - dtype='float32') - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - name='generate_proposals', - scores=scores, - bbox_deltas=bbox_deltas, - im_info=im_info, - anchors=anchors, - variances=variances, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=0.5, - min_size=0.1, - eta=1.0) - self.assertIsNotNone(rpn_rois) - self.assertIsNotNone(rpn_roi_probs) - print(rpn_rois.shape) + scores_np = np.random.rand(2, 3, 4, 4).astype('float32') + bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32') + im_info_np = np.array([[8, 8, 0.5], [6, 6, 0.5]]).astype('float32') + anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4), + [4, 4, 3, 4]).astype('float32') + variances_np = np.ones((4, 4, 3, 4)).astype('float32') + + with self.static_graph(): + scores = fluid.data( + name='scores', shape=[2, 3, 4, 4], dtype='float32') + bbox_deltas = fluid.data( + name='bbox_deltas', shape=[2, 12, 4, 4], dtype='float32') + im_info = fluid.data(name='im_info', shape=[2, 3], dtype='float32') + anchors = fluid.data( + name='anchors', shape=[4, 4, 3, 4], dtype='float32') + variances = fluid.data( + name='var', shape=[4, 4, 3, 4], dtype='float32') + rois, roi_probs, rois_num = fluid.layers.generate_proposals( + scores, + bbox_deltas, + im_info, + anchors, + variances, + pre_nms_top_n=10, + post_nms_top_n=5, + return_rois_num=True) + rois_stat, roi_probs_stat, rois_num_stat = self.get_static_graph_result( + feed={ + 'scores': scores_np, + 'bbox_deltas': bbox_deltas_np, + 'im_info': im_info_np, + 'anchors': anchors_np, + 'var': variances_np + }, + fetch_list=[rois, roi_probs, rois_num], + with_lod=True) + + with self.dynamic_graph(): + scores_dy = base.to_variable(scores_np) + bbox_deltas_dy = base.to_variable(bbox_deltas_np) + im_info_dy = base.to_variable(im_info_np) + anchors_dy = base.to_variable(anchors_np) + variances_dy = base.to_variable(variances_np) + rois, roi_probs, rois_num = fluid.layers.generate_proposals( + scores_dy, + bbox_deltas_dy, + im_info_dy, + anchors_dy, + variances_dy, + pre_nms_top_n=10, + post_nms_top_n=5, + return_rois_num=True) + rois_dy = rois.numpy() + roi_probs_dy = roi_probs.numpy() + rois_num_dy = rois_num.numpy() + + self.assertTrue(np.array_equal(np.array(rois_stat), rois_dy)) + self.assertTrue(np.array_equal(np.array(roi_probs_stat), roi_probs_dy)) + self.assertTrue(np.array_equal(np.array(rois_num_stat), rois_num_dy)) class TestYoloDetection(unittest.TestCase): @@ -648,30 +721,81 @@ class TestMulticlassNMS2(unittest.TestCase): self.assertIsNotNone(index) -class TestCollectFpnPropsals(unittest.TestCase): +class TestCollectFpnPropsals(LayerTest): def test_collect_fpn_proposals(self): - program = Program() - with program_guard(program): + multi_bboxes_np = [] + multi_scores_np = [] + rois_num_per_level_np = [] + for i in range(4): + bboxes_np = np.random.rand(5, 4).astype('float32') + scores_np = np.random.rand(5, 1).astype('float32') + rois_num = np.array([2, 3]).astype('int32') + multi_bboxes_np.append(bboxes_np) + multi_scores_np.append(scores_np) + rois_num_per_level_np.append(rois_num) + + with self.static_graph(): multi_bboxes = [] multi_scores = [] + rois_num_per_level = [] for i in range(4): - bboxes = layers.data( + bboxes = fluid.data( name='rois' + str(i), - shape=[10, 4], + shape=[5, 4], dtype='float32', - lod_level=1, - append_batch_size=False) - scores = layers.data( + lod_level=1) + scores = fluid.data( name='scores' + str(i), - shape=[10, 1], + shape=[5, 1], dtype='float32', - lod_level=1, - append_batch_size=False) + lod_level=1) + rois_num = fluid.data( + name='rois_num' + str(i), shape=[None], dtype='int32') + multi_bboxes.append(bboxes) multi_scores.append(scores) - fpn_rois = layers.collect_fpn_proposals(multi_bboxes, multi_scores, - 2, 5, 10) - self.assertIsNotNone(fpn_rois) + rois_num_per_level.append(rois_num) + + fpn_rois, rois_num = layers.collect_fpn_proposals( + multi_bboxes, + multi_scores, + 2, + 5, + 10, + rois_num_per_level=rois_num_per_level) + feed = {} + for i in range(4): + feed['rois' + str(i)] = multi_bboxes_np[i] + feed['scores' + str(i)] = multi_scores_np[i] + feed['rois_num' + str(i)] = rois_num_per_level_np[i] + fpn_rois_stat, rois_num_stat = self.get_static_graph_result( + feed=feed, fetch_list=[fpn_rois, rois_num], with_lod=True) + fpn_rois_stat = np.array(fpn_rois_stat) + rois_num_stat = np.array(rois_num_stat) + + with self.dynamic_graph(): + multi_bboxes_dy = [] + multi_scores_dy = [] + rois_num_per_level_dy = [] + for i in range(4): + bboxes_dy = base.to_variable(multi_bboxes_np[i]) + scores_dy = base.to_variable(multi_scores_np[i]) + rois_num_dy = base.to_variable(rois_num_per_level_np[i]) + multi_bboxes_dy.append(bboxes_dy) + multi_scores_dy.append(scores_dy) + rois_num_per_level_dy.append(rois_num_dy) + fpn_rois_dy, rois_num_dy = fluid.layers.collect_fpn_proposals( + multi_bboxes_dy, + multi_scores_dy, + 2, + 5, + 10, + rois_num_per_level=rois_num_per_level_dy) + fpn_rois_dy = fpn_rois_dy.numpy() + rois_num_dy = rois_num_dy.numpy() + + self.assertTrue(np.array_equal(fpn_rois_stat, fpn_rois_dy)) + self.assertTrue(np.array_equal(rois_num_stat, rois_num_dy)) def test_collect_fpn_proposals_error(self): def generate_input(bbox_type, score_type, name): @@ -717,20 +841,51 @@ class TestCollectFpnPropsals(unittest.TestCase): post_nms_top_n=2000) -class TestDistributeFpnProposals(unittest.TestCase): +class TestDistributeFpnProposals(LayerTest): def test_distribute_fpn_proposals(self): - program = Program() - with program_guard(program): - fpn_rois = fluid.layers.data( - name='data', shape=[4], dtype='float32', lod_level=1) - multi_rois, restore_ind = layers.distribute_fpn_proposals( - fpn_rois=fpn_rois, + rois_np = np.random.rand(10, 4).astype('float32') + rois_num_np = np.array([4, 6]).astype('int32') + with self.static_graph(): + rois = fluid.data(name='rois', shape=[10, 4], dtype='float32') + rois_num = fluid.data(name='rois_num', shape=[None], dtype='int32') + multi_rois, restore_ind, rois_num_per_level = layers.distribute_fpn_proposals( + fpn_rois=rois, min_level=2, max_level=5, refer_level=4, - refer_scale=224) - self.assertIsNotNone(multi_rois) - self.assertIsNotNone(restore_ind) + refer_scale=224, + rois_num=rois_num) + fetch_list = multi_rois + [restore_ind] + rois_num_per_level + output_stat = self.get_static_graph_result( + feed={'rois': rois_np, + 'rois_num': rois_num_np}, + fetch_list=fetch_list, + with_lod=True) + output_stat_np = [] + for output in output_stat: + output_np = np.array(output) + if len(output_np) > 0: + output_stat_np.append(output_np) + + with self.dynamic_graph(): + rois_dy = base.to_variable(rois_np) + rois_num_dy = base.to_variable(rois_num_np) + multi_rois_dy, restore_ind_dy, rois_num_per_level_dy = layers.distribute_fpn_proposals( + fpn_rois=rois_dy, + min_level=2, + max_level=5, + refer_level=4, + refer_scale=224, + rois_num=rois_num_dy) + output_dy = multi_rois_dy + [restore_ind_dy] + rois_num_per_level_dy + output_dy_np = [] + for output in output_dy: + output_np = output.numpy() + if len(output_np) > 0: + output_dy_np.append(output_np) + + for res_stat, res_dy in zip(output_stat_np, output_dy_np): + self.assertTrue(np.array_equal(res_stat, res_dy)) def test_distribute_fpn_proposals_error(self): program = Program() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 935813251930b8093dd0c8a9f11ac30772133d20..db472ec01662c2a93e6d7f4b2c2a577544b6a90a 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -440,6 +440,8 @@ if(WITH_DISTRIBUTE) # FIXME(seiriosX) will fix this list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_sparse_embedding_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_a_sync_optimizer_auto") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ctr") py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) py_test_modules(test_transpiler_ops MODULES test_transpiler_ops ENVS ${dist_ENVS}) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index 0b8df63d666b6547d5dccfc2ce0b420d653cc544..eed02ea655ed406fe59d8117e4fd836dfe1bfd4e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -47,8 +47,8 @@ class SimpleNet(Layer): return z @declarative(input_spec=[[InputSpec([None, 10]), InputSpec([None, 10])]]) - def func_with_list(self, l): - x, y, int_val = l + def func_with_list(self, l, int_val=1): + x, y = l z = x + y z = z + int_val return z @@ -60,10 +60,7 @@ class SimpleNet(Layer): def func_with_dict(self, d): x = d['x'] y = d['y'] - int_val = d['int_val'] - z = x + y - z = z + int_val return z @@ -131,10 +128,10 @@ class TestInputSpec(unittest.TestCase): self.assertTrue(len(net.add_func.program_cache) == 1) # 5. test input with list - out = net.func_with_list([x, y, int_val]) + out = net.func_with_list([x, y], int_val) # 6. test input with dict - out = net.func_with_dict({'x': x, 'y': y, 'int_val': int_val}) + out = net.func_with_dict({'x': x, 'y': y}) # 7. test input with lits contains dict int_np = np.ones([1]).astype('float32') @@ -293,6 +290,30 @@ class TestDifferentInputSpecCacheProgram(unittest.TestCase): foo_3.concrete_program +class TestInputDefaultName(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.net = SimpleNet() + + def assert_default_name(self, func_name, input_names): + decorated_func = getattr(self.net, func_name) + + spec_names = [x.name for x in decorated_func.inputs] + self.assertListEqual(spec_names, input_names) + + def test_common_input(self): + self.assert_default_name('forward', ['x']) + + def test_list_input(self): + self.assert_default_name('func_with_list', ['l_0', 'l_1']) + + def test_dict_input(self): + self.assert_default_name('func_with_dict', ['x', 'y']) + + def test_nest_input(self): + self.assert_default_name('func_with_list_dict', ['dl_0', 'x', 'y']) + + class TestDeclarativeAPI(unittest.TestCase): def test_error(self): func = declarative(dyfunc_to_variable) diff --git a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py index 034bb7f8dc7e00a321b6c6a5a4776fa4f7398ab5..a2f56c428012c615dcf55b6832a54ca473771d08 100644 --- a/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_collect_fpn_proposals_op.py @@ -33,10 +33,14 @@ class TestCollectFPNProposalstOp(OpTest): for i in range(self.num_level)] self.inputs = { 'MultiLevelRois': inputs_x, - "MultiLevelScores": self.scores_input + "MultiLevelScores": self.scores_input, + 'MultiLevelRoIsNum': [] } self.attrs = {'post_nms_topN': self.post_nms_top_n, } - self.outputs = {'FpnRois': (self.rois, [self.lod])} + self.outputs = { + 'FpnRois': (self.rois, [self.lod]), + 'RoisNum': np.array(self.lod).astype('int32') + } def init_test_case(self): self.post_nms_top_n = 20 @@ -96,5 +100,32 @@ class TestCollectFPNProposalstOp(OpTest): self.check_output(check_dygraph=False) +class TestCollectFPNProposalstOpWithRoisNum(TestCollectFPNProposalstOp): + def set_data(self): + self.init_test_case() + self.make_rois() + self.scores_input = [('y%d' % i, + (self.scores[i].reshape(-1, 1), self.rois_lod[i])) + for i in range(self.num_level)] + self.rois, self.lod = self.calc_rois_collect() + inputs_x = [('x%d' % i, (self.roi_inputs[i][:, 1:], self.rois_lod[i])) + for i in range(self.num_level)] + rois_num_per_level = [ + ('rois%d' % i, np.array(self.rois_lod[i][0]).astype('int32')) + for i in range(self.num_level) + ] + + self.inputs = { + 'MultiLevelRois': inputs_x, + "MultiLevelScores": self.scores_input, + 'MultiLevelRoIsNum': rois_num_per_level + } + self.attrs = {'post_nms_topN': self.post_nms_top_n, } + self.outputs = { + 'FpnRois': (self.rois, [self.lod]), + 'RoisNum': np.array(self.lod).astype('int32') + } + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py new file mode 100644 index 0000000000000000000000000000000000000000..ab47659a88de44f60291ef056c0fd5ed2e01b5f2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py @@ -0,0 +1,143 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import os +import paddle.distributed.fleet.base.role_maker as role_maker +import time + + +class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_PSERVER_NUMS"] = "2" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINER_ID"] = "0" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ + "127.0.0.1:36001,127.0.0.2:36001" + + def test_a_sync_optimizer1(self): + os.environ["TRAINING_ROLE"] = "TRAINER" + import paddle.distributed.fleet as fleet + + main_program = paddle.fluid.Program() + startup_program = paddle.fluid.Program() + + paddle.fluid.framework.switch_main_program(main_program) + paddle.fluid.framework.switch_startup_program(startup_program) + + fleet.init(role_maker.PaddleCloudRoleMaker()) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.auto = True + optimizer = paddle.fluid.optimizer.Adam(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + self.assertTrue(optimizer.user_defined_strategy.a_sync) + a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(a_sync_configs['k_steps'] == 0) + + def test_a_sync_optimizer2(self): + os.environ["TRAINING_ROLE"] = "TRAINER" + import paddle.distributed.fleet as fleet + + main_program = paddle.fluid.Program() + startup_program = paddle.fluid.Program() + + paddle.fluid.framework.switch_main_program(main_program) + paddle.fluid.framework.switch_startup_program(startup_program) + + fleet.init(role_maker.PaddleCloudRoleMaker()) + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.auto = True + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + self.assertTrue(optimizer.user_defined_strategy.a_sync) + a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(a_sync_configs['k_steps'] == 800) + + def test_a_sync_optimizer3(self): + os.environ["TRAINING_ROLE"] = "TRAINER" + import paddle.distributed.fleet as fleet + + main_program = paddle.fluid.Program() + startup_program = paddle.fluid.Program() + + paddle.fluid.framework.switch_main_program(main_program) + paddle.fluid.framework.switch_startup_program(startup_program) + + fleet.init(role_maker.PaddleCloudRoleMaker()) + input_x = paddle.fluid.layers.data( + name="x", + shape=[-1, 1], + dtype="int64", + lod_level=1, + append_batch_size=False) + x_embedding = paddle.fluid.layers.embedding( + is_distributed=False, + input=input_x, + size=[1000000000, 100000], + param_attr=paddle.fluid.ParamAttr( + name="embedding", + initializer=paddle.fluid.initializer.Constant(value=0.01)), + is_sparse=True) + input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=x_embedding, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.auto = True + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + self.assertTrue(optimizer.user_defined_strategy.a_sync) + a_sync_configs = optimizer.user_defined_strategy.a_sync_configs + self.assertTrue(a_sync_configs['k_steps'] == 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index beb0069eb770f25d7834749ff9c188e5252e13c0..3a923dbf3f72e28c64c3f01d22d4d6f2d897f79b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -76,9 +76,10 @@ class FleetDistRunnerBase(object): return role def build_strategy(self, args): - self.strategy = paddle.distributed.fleet.DistributedStrategy() - self.strategy.a_sync = False - if args.mode == "async": + if args.mode == "sync": + self.strategy = paddle.distributed.fleet.DistributedStrategy() + self.strategy.a_sync = False + elif args.mode == "async": self.strategy = paddle.distributed.fleet.DistributedStrategy() self.strategy.a_sync = True elif args.mode == "geo": @@ -87,6 +88,10 @@ class FleetDistRunnerBase(object): self.strategy.a_sync_configs = { "k_steps": args.geo_sgd_need_push_nums } + elif args.mode == "auto": + self.strategy = paddle.distributed.fleet.DistributedStrategy() + self.strategy.auto = True + self.dump_param = os.getenv("dump_param", "").split(",") self.dump_fields = os.getenv("dump_fields", "").split(",") self.dump_fields_path = os.getenv("dump_fields_path", "") @@ -232,14 +237,17 @@ class TestFleetBase(unittest.TestCase): tr0_pipe = open(tempfile.gettempdir() + "/tr0_err.log", "wb+") tr1_pipe = open(tempfile.gettempdir() + "/tr1_err.log", "wb+") + tr0_out = open(tempfile.gettempdir() + "/tr0_stdout.log", "wb+") + tr1_out = open(tempfile.gettempdir() + "/tr1_stdout.log", "wb+") + tr0_proc = subprocess.Popen( tr0_cmd.strip().split(" "), - stdout=subprocess.PIPE, + stdout=tr0_out, stderr=tr0_pipe, env=required_envs) tr1_proc = subprocess.Popen( tr1_cmd.strip().split(" "), - stdout=subprocess.PIPE, + stdout=tr1_out, stderr=tr1_pipe, env=required_envs) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index e2336caac1c07f555280b82ba8fcfa7e5ec7f5b8..02ca0588e7452d44817f6c288ea9cf77b80dbfe8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -52,6 +52,38 @@ class TestDistMnistSync2x2(TestFleetBase): "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) +class TestDistMnistAuto2x2(TestFleetBase): + def _setup_config(self): + self._mode = "auto" + self._reader = "pyreader" + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "CPU_NUM": "2" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) + + class TestDistMnistAsync2x2(TestFleetBase): def _setup_config(self): self._mode = "async" diff --git a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py index 55b21f1a722f822f1bfcb7bbbda645109092b8a3..ec0125b28ed1b870025adbfd2bd4ba78244bcc11 100644 --- a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py @@ -35,9 +35,10 @@ class TestDistributeFPNProposalsOp(OpTest): } output = [('out%d' % i, self.rois_fpn[i]) for i in range(len(self.rois_fpn))] + self.outputs = { 'MultiFpnRois': output, - 'RestoreIndex': self.rois_idx_restore.reshape(-1, 1) + 'RestoreIndex': self.rois_idx_restore.reshape(-1, 1), } def init_test_case(self): @@ -117,5 +118,34 @@ class TestDistributeFPNProposalsOp(OpTest): self.check_output() +class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): + def set_data(self): + self.init_test_case() + self.make_rois() + self.rois_fpn, self.rois_idx_restore = self.calc_rois_distribute() + self.inputs = { + 'FpnRois': (self.rois[:, 1:5], self.rois_lod), + 'RoisNum': np.array(self.rois_lod[0]).astype('int32') + } + self.attrs = { + 'max_level': self.roi_max_level, + 'min_level': self.roi_min_level, + 'refer_scale': self.canonical_scale, + 'refer_level': self.canonical_level + } + output = [('out%d' % i, self.rois_fpn[i]) + for i in range(len(self.rois_fpn))] + rois_num_per_level = [ + ('rois_num%d' % i, np.array(self.rois_fpn[i][1][0]).astype('int32')) + for i in range(len(self.rois_fpn)) + ] + + self.outputs = { + 'MultiFpnRois': output, + 'RestoreIndex': self.rois_idx_restore.reshape(-1, 1), + 'MultiLevelRoIsNum': rois_num_per_level + } + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py index ce561cd317c48228c4877a2b65b67fe049a0d84a..26fc01ca04506758599ac5d6fe6842984a8d7a9c 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py @@ -34,18 +34,18 @@ def generate_proposals_in_python(scores, bbox_deltas, im_info, anchors, rpn_rois = [] rpn_roi_probs = [] - lod = [] + rois_num = [] num_images = scores.shape[0] for img_idx in range(num_images): img_i_boxes, img_i_probs = proposal_for_one_image( im_info[img_idx, :], all_anchors, variances, bbox_deltas[img_idx, :, :, :], scores[img_idx, :, :, :], pre_nms_topN, post_nms_topN, nms_thresh, min_size, eta) - lod.append(img_i_probs.shape[0]) + rois_num.append(img_i_probs.shape[0]) rpn_rois.append(img_i_boxes) rpn_roi_probs.append(img_i_probs) - return rpn_rois, rpn_roi_probs, lod + return rpn_rois, rpn_roi_probs, rois_num def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores, @@ -87,6 +87,10 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores, proposals = clip_tiled_boxes(proposals, im_info[:2]) # remove predicted boxes with height or width < min_size keep = filter_boxes(proposals, min_size, im_info) + if len(keep) == 0: + proposals = np.zeros((1, 4)).astype('float32') + scores = np.zeros((1, 1)).astype('float32') + return proposals, scores proposals = proposals[keep, :] scores = scores[keep, :] @@ -280,8 +284,8 @@ class TestGenerateProposalsOp(OpTest): } self.outputs = { - 'RpnRois': (self.rpn_rois[0], [self.lod]), - 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.lod]), + 'RpnRois': (self.rpn_rois[0], [self.rois_num]), + 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), } def test_check_output(self): @@ -320,7 +324,7 @@ class TestGenerateProposalsOp(OpTest): (batch_size, num_anchors * 4, layer_h, layer_w)).astype('float32') def init_test_output(self): - self.rpn_rois, self.rpn_roi_probs, self.lod = generate_proposals_in_python( + self.rpn_rois, self.rpn_roi_probs, self.rois_num = generate_proposals_in_python( self.scores, self.bbox_deltas, self.im_info, self.anchors, self.variances, self.pre_nms_topN, self.post_nms_topN, self.nms_thresh, self.min_size, self.eta) @@ -349,12 +353,21 @@ class TestGenerateProposalsOutLodOp(TestGenerateProposalsOp): } self.outputs = { - 'RpnRois': (self.rpn_rois[0], [self.lod]), - 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.lod]), - 'RpnRoisLod': (np.asarray( - self.lod, dtype=np.int32)) + 'RpnRois': (self.rpn_rois[0], [self.rois_num]), + 'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]), + 'RpnRoisNum': (np.asarray( + self.rois_num, dtype=np.int32)) } +class TestGenerateProposalsOpNoBoxLeft(TestGenerateProposalsOp): + def init_test_params(self): + self.pre_nms_topN = 12000 # train 12000, test 2000 + self.post_nms_topN = 5000 # train 6000, test 1000 + self.nms_thresh = 0.7 + self.min_size = 1000.0 + self.eta = 1. + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py index 4a33f32a0b6977716d8065419f8e0f88d6c4f44a..ea94a8ba69a784efb1a2a12f6f251316553cab50 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sample_function.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sample_function.py @@ -100,7 +100,7 @@ def add_cases(suite): GridSampleTestCase( methodName='runTest', mode='bilinear', - padding_mode='reflect', + padding_mode='reflection', align_corners=True)) suite.addTest( GridSampleTestCase( diff --git a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py index 4d1ed5aeb96ebbe064e35c1bee9d5775812440f7..bf2f9518fb0c720556b7eecdf5b286dea0fff96c 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py @@ -73,7 +73,7 @@ def unnormalizeAndClip(grid_slice, max_val, align_corners, padding_mode): if padding_mode == "border": grid_slice = clip(grid_slice, 0, max_val) - elif padding_mode == "reflect": + elif padding_mode == "reflection": double_range = 2 * max_val if align_corners else (max_val + 1) * 2 grid_abs = np.abs(grid_slice) if align_corners else np.abs(grid_slice + 0.5) @@ -211,7 +211,7 @@ class Case2(TestGridSamplerOp): self.grid_shape = (2, 8, 9, 2) self.theta_shape = (2, 2, 3) self.align_corners = False - self.padding_mode = "reflect" + self.padding_mode = "reflection" self.mode = "bilinear" @@ -221,7 +221,7 @@ class Case3(TestGridSamplerOp): self.grid_shape = (2, 8, 9, 2) self.theta_shape = (2, 2, 3) self.align_corners = True - self.padding_mode = "reflect" + self.padding_mode = "reflection" self.mode = "bilinear" @@ -231,7 +231,7 @@ class Case4(TestGridSamplerOp): self.grid_shape = (2, 8, 9, 2) self.theta_shape = (2, 2, 3) self.align_corners = False - self.padding_mode = "reflect" + self.padding_mode = "reflection" self.mode = "nearest" self.numeric_grad_delta = 0.0001 diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index 8780727e4cb276a989a8d04d05c6419a4874e7f5..041fe4e9043d60852fcaab42bc233b63b39609ce 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -24,7 +24,10 @@ def kldiv_loss(x, target, reduction): loss = np.where(target >= 0, output, np.zeros_like(x)) if reduction == "batchmean": - return loss.sum() / x.shape[0] + if len(x.shape) > 0: + return loss.sum() / x.shape[0] + else: + return loss.sum() if reduction == "mean": return loss.mean() if reduction == "sum": @@ -93,6 +96,9 @@ class TestKLDivLossDygraph(unittest.TestCase): def test_kl_loss_batchmean(self): self.run_kl_loss('batchmean') + def test_kl_loss_batchmean_shape(self): + self.run_kl_loss('batchmean', ()) + def test_kl_loss_mean(self): self.run_kl_loss('mean') diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index b76887f0965ca64b2b40bf9c0ce6e82b44fdad2f..89e9f7aad8581228411b1983580ced5566e65765 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3318,15 +3318,29 @@ class TestBook(LayerTest): return (out) def test_roi_pool(self): - # TODO(minqiyang): dygraph do not support lod now + x_np = np.random.rand(2, 3, 8, 8).astype('float32') + rois_np = np.random.rand(3, 4).astype('float32') + rois_num_np = np.array([1, 2]).astype('int32') + with self.static_graph(): - x = layers.data(name="x", shape=[256, 30, 30], dtype="float32") - rois = layers.data( - name="rois", shape=[4], dtype="float32", lod_level=1) - rois_lod = layers.data( - name="rois_lod", shape=[None, ], dtype="int", lod_level=1) - output = layers.roi_pool(x, rois, 7, 7, 0.6, rois_lod) - return (output) + x = layers.data(name="x", shape=[3, 8, 8], dtype="float32") + rois = layers.data(name="rois", shape=[4], dtype="float32") + rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32") + output = layers.roi_pool(x, rois, 4, 4, 0.5, rois_num=rois_num) + static_res = self.get_static_graph_result( + feed={'x': x_np, + 'rois': rois_np, + 'rois_num': rois_num_np}, + fetch_list=[output])[0] + + with self.dynamic_graph(): + x_dy = base.to_variable(x_np) + rois_dy = base.to_variable(rois_np) + rois_num_dy = base.to_variable(rois_num_np) + dy_res = layers.roi_pool( + x_dy, rois_dy, 4, 4, 0.5, rois_num=rois_num_dy) + dy_res_value = dy_res[0].numpy() + self.assertTrue(np.array_equal(static_res, dy_res_value)) def test_sequence_enumerate(self): # TODO(minqiyang): dygraph do not support lod now @@ -3335,16 +3349,29 @@ class TestBook(LayerTest): out = layers.sequence_enumerate(input=x, win_size=2, pad_value=0) def test_roi_align(self): - # TODO(minqiyang): dygraph do not support lod now + x_np = np.random.rand(2, 3, 8, 8).astype('float32') + rois_np = np.random.rand(3, 4).astype('float32') + rois_num_np = np.array([1, 2]).astype('int32') + with self.static_graph(): - x = layers.data(name="x", shape=[256, 30, 30], dtype="float32") - rois = layers.data( - name="rois", shape=[4], dtype="float32", lod_level=1) - rois_lod = layers.data( - name="rois_lod", shape=[None, ], dtype="int", lod_level=1) - output = layers.roi_align(x, rois, 14, 14, 0.5, 2, 'roi_align', - rois_lod) - return (output) + x = layers.data(name="x", shape=[3, 8, 8], dtype="float32") + rois = layers.data(name="rois", shape=[4], dtype="float32") + rois_num = fluid.data(name="rois_num", shape=[None], dtype="int32") + output = layers.roi_align(x, rois, 4, 4, 0.5, 2, rois_num=rois_num) + static_res = self.get_static_graph_result( + feed={'x': x_np, + 'rois': rois_np, + 'rois_num': rois_num_np}, + fetch_list=[output])[0] + + with self.dynamic_graph(): + x_dy = base.to_variable(x_np) + rois_dy = base.to_variable(rois_np) + rois_num_dy = base.to_variable(rois_num_np) + dy_res = layers.roi_align( + x_dy, rois_dy, 4, 4, 0.5, 2, rois_num=rois_num_dy) + dy_res_value = dy_res.numpy() + self.assertTrue(np.array_equal(static_res, dy_res_value)) def test_roi_perspective_transform(self): # TODO(minqiyang): dygraph do not support lod now diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py index e0edf9019356f38eb3c74b9cadfa6ae575e9b823..43a0d481b28fdc47dec52fe9763dd920fd5a76a2 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py @@ -16,20 +16,49 @@ from __future__ import print_function import unittest +import paddle +import paddle.nn as nn +import numpy as np + +paddle.disable_static() + class EmbeddingDygraph(unittest.TestCase): def test_1(self): - import paddle - import paddle.nn as nn - import numpy as np - paddle.disable_static() + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + embedding = paddle.nn.Embedding(10, 3, sparse=True) + + w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32) + embedding.weight.set_value(w0) + + adam = paddle.optimizer.Adam( + parameters=[embedding.weight], learning_rate=0.01) + adam.clear_grad() + + out = embedding(x) + out.backward() + adam.step() + + def test_2(self): + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(10, 3, padding_idx=11, sparse=True) - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(-1, 3, sparse=True) - emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False) + with self.assertRaises(ValueError): + embedding = paddle.nn.Embedding(10, -3, sparse=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py index c9c91ceb39de42c44f9ce81658aa79b896999552..4af0cce12b7334857c54ae9e8e9418848275ff32 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -73,8 +73,13 @@ class EmbeddingStatic(unittest.TestCase): dtype="int32") emb = functional.embedding( - x=label, weight=weight, sparse=True, name="embedding") + x=label, + weight=weight, + padding_idx=129, + sparse=True, + name="embedding") + with self.assertRaises(ValueError): test_bad_x() diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index c6cfe01dce40458684c7464ca5ebddd389c62cbe..0c39dc5e731d25720149af4480020a7ab3ac5bb9 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -101,6 +101,29 @@ class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase): self.func(p) +class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + shape = [7, 11] + eps = 0.05 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.reduce_sum(x, dim=0) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py index c047cf6ddff78641b918de75a284574175bb3bca..352089e1fb75fa4c3423d29012fd85c3d611c81b 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_all.py +++ b/python/paddle/fluid/tests/unittests/test_norm_all.py @@ -26,11 +26,11 @@ def p_norm(x, axis, porder, keepdims=False): if axis is None: x = x.flatten() if porder == np.inf: - r = np.amax(np.abs(x)) + r = np.amax(np.abs(x), keepdims=keepdims) elif porder == -np.inf: - r = np.amin(np.abs(x)) + r = np.amin(np.abs(x), keepdims=keepdims) else: - r = np.linalg.norm(x, ord=porder) + r = np.linalg.norm(x, ord=porder, keepdims=keepdims) elif isinstance(axis, list or tuple) and len(axis) == 2: if porder == np.inf: axis = tuple(axis) @@ -41,10 +41,10 @@ def p_norm(x, axis, porder, keepdims=False): elif porder == 0: axis = tuple(axis) r = x.astype(bool) - r = np.sum(r, axis) + r = np.sum(r, axis, keepdims=keepdims) elif porder == 1: axis = tuple(axis) - r = np.sum(np.abs(x), axis) + r = np.sum(np.abs(x), axis, keepdims=keepdims) else: axis = tuple(axis) xp = np.power(np.abs(x), porder) @@ -61,7 +61,7 @@ def p_norm(x, axis, porder, keepdims=False): def frobenius_norm(x, axis=None, keepdims=False): if isinstance(axis, list): axis = tuple(axis) - if axis is None: axis = (-2, -1) + if axis is None: x = x.reshape(1, x.size) r = np.linalg.norm( x, ord='fro', axis=axis, keepdims=keepdims).astype(x.dtype) return r @@ -217,28 +217,37 @@ class TestPnormOp5(TestPnormOp): self.check_grad(['X'], 'Out', user_defined_grads=self.gradient) -def run_fro(self, p, axis, shape_x, dtype): +def run_fro(self, p, axis, shape_x, dtype, keep_dim, check_dim=False): with fluid.program_guard(fluid.Program()): data = fluid.data(name="X", shape=shape_x, dtype=dtype) - out = paddle.norm(x=data, p=p, axis=axis) + out = paddle.norm(x=data, p=p, axis=axis, keepdim=keep_dim) place = fluid.CPUPlace() exe = fluid.Executor(place) np_input = (np.random.rand(*shape_x) + 1.0).astype(dtype) - expected_result = frobenius_norm(np_input, axis=axis) + expected_result = frobenius_norm(np_input, axis=axis, keepdims=keep_dim) result, = exe.run(feed={"X": np_input}, fetch_list=[out]) self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True) + if keep_dim and check_dim: + self.assertEqual( + (np.abs(np.array(result.shape) - np.array(expected_result.shape)) < + 1e-6).all(), True) -def run_pnorm(self, p, axis, shape_x, dtype): +def run_pnorm(self, p, axis, shape_x, dtype, keep_dim, check_dim=False): with fluid.program_guard(fluid.Program()): data = fluid.data(name="X", shape=shape_x, dtype=dtype) - out = paddle.norm(x=data, p=p, axis=axis) + out = paddle.norm(x=data, p=p, axis=axis, keepdim=keep_dim) place = fluid.CPUPlace() exe = fluid.Executor(place) np_input = (np.random.rand(*shape_x) + 1.0).astype(dtype) - expected_result = p_norm(np_input, porder=p, axis=axis).astype(dtype) + expected_result = p_norm( + np_input, porder=p, axis=axis, keepdims=keep_dim).astype(dtype) result, = exe.run(feed={"X": np_input}, fetch_list=[out]) - self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True) + self.assertEqual((np.abs(result - expected_result) < 1e-6).all(), True) + if keep_dim and check_dim: + self.assertEqual( + (np.abs(np.array(result.shape) - np.array(expected_result.shape)) < + 1e-6).all(), True) def run_graph(self, p, axis, shape_x, dtype): @@ -253,6 +262,7 @@ def run_graph(self, p, axis, shape_x, dtype): # compute frobenius norm along last two dimensions. out_fro = paddle.norm(x, p='fro') + out_fro = paddle.norm(x, p='fro', axis=0) out_fro = paddle.norm(x, p='fro', axis=[0, 1]) # compute 2-order norm along [0,1] dimension. out_pnorm = paddle.norm(x, p=2, axis=[0, 1]) @@ -274,27 +284,133 @@ def run_graph(self, p, axis, shape_x, dtype): class API_NormTest(unittest.TestCase): def test_basic(self): - run_fro(self, p='fro', axis=None, shape_x=[2, 3, 4], dtype="float32") - run_fro(self, p='fro', axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") - run_pnorm(self, p=2, axis=None, shape_x=[3, 4], dtype="float32") - run_pnorm(self, p=2, axis=1, shape_x=[3, 4], dtype="float64") - run_pnorm(self, p=np.inf, axis=0, shape_x=[2, 3, 4], dtype="float32") - run_pnorm(self, p=np.inf, axis=None, shape_x=[2, 3, 4], dtype="float32") - run_pnorm(self, p=-np.inf, axis=0, shape_x=[2, 3, 4], dtype="float64") - run_pnorm( - self, p=-np.inf, axis=None, shape_x=[2, 3, 4], dtype="float64") - run_pnorm(self, p=0, axis=1, shape_x=[3, 4], dtype="float64") - - run_pnorm(self, p=1, axis=1, shape_x=[3, 4], dtype="float64") - run_pnorm(self, p=0, axis=None, shape_x=[3, 4], dtype="float64") - run_pnorm(self, p=2, axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") - run_pnorm(self, p=2, axis=-1, shape_x=[2, 3, 4], dtype="float64") - run_pnorm(self, p=1, axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") - run_pnorm(self, p=0, axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") - run_pnorm( - self, p=np.inf, axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") - run_pnorm( - self, p=-np.inf, axis=[0, 1], shape_x=[2, 3, 4], dtype="float64") + keep_dims = {False, True} + for keep in keep_dims: + run_fro( + self, + p='fro', + axis=None, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep) + run_fro( + self, + p='fro', + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=2, + axis=None, + shape_x=[3, 4], + dtype="float32", + keep_dim=keep) + run_pnorm( + self, + p=2, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=np.inf, + axis=0, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=np.inf, + axis=None, + shape_x=[2, 3, 4], + dtype="float32", + keep_dim=keep) + run_pnorm( + self, + p=-np.inf, + axis=0, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=-np.inf, + axis=None, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep) + run_pnorm( + self, + p=0, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + + run_pnorm( + self, + p=1, + axis=1, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=0, + axis=None, + shape_x=[3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=2, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=2, + axis=-1, + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=1, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=np.inf, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) + run_pnorm( + self, + p=-np.inf, + axis=[0, 1], + shape_x=[2, 3, 4], + dtype="float64", + keep_dim=keep, + check_dim=True) def test_dygraph(self): run_graph(self, p='fro', axis=None, shape_x=[2, 3, 4], dtype="float32") @@ -315,6 +431,7 @@ class API_NormTest(unittest.TestCase): paddle.norm(data, p=p, out=out) self.assertRaises(TypeError, err_dtype, "fro", [2, 2], "int64") + self.assertRaises(ValueError, paddle.norm, "inf", [2], "int64") out = fluid.data(name="out", shape=[1], dtype="int64") self.assertRaises(TypeError, err_dtype, "fro", [2, 2], "float64", out) @@ -325,6 +442,7 @@ class API_NormTest(unittest.TestCase): self.assertRaises(ValueError, paddle.norm, data, p="unsupport norm") self.assertRaises(ValueError, paddle.norm, data, p=[1]) self.assertRaises(ValueError, paddle.norm, data, p=[1], axis=-1) + self.assertRaises(ValueError, paddle.norm, 0, [1, 0], "float64") data = fluid.data(name="data_3d", shape=[2, 2, 2], dtype="float64") self.assertRaises( ValueError, paddle.norm, data, p='unspport', axis=[-3, -2, -1]) diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py index a77f1cdd57d7bade92e2a4f914dc3d91624d4845..505a1c738384194032329f66c33fa27e3ed3045c 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py @@ -165,7 +165,6 @@ class TestPool3d_API(unittest.TestCase): self.assertTrue(np.allclose(result.numpy(), result_np)) def check_max_dygraph_ndhwc_results(self, place): - print("run ndchw max pool3d") with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable( @@ -190,7 +189,6 @@ class TestPool3d_API(unittest.TestCase): np.transpose(result.numpy(), [0, 4, 1, 2, 3]), result_np)) def check_max_dygraph_ceilmode_results(self, place): - print("run ceil mode max pool3d") with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") input = fluid.dygraph.to_variable(input_np) diff --git a/python/paddle/fluid/tests/unittests/test_roi_align_op.py b/python/paddle/fluid/tests/unittests/test_roi_align_op.py index b01863880866e247f2aee4b94ae3121c9d891f92..fb8a090b80700d9b884a72f7f430723754523a13 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_align_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_align_op.py @@ -181,16 +181,11 @@ class TestROIAlignInLodOp(TestROIAlignOp): self.calc_roi_align() seq_len = self.rois_lod[0] - cur_len = 0 - lod = [cur_len] - for l in seq_len: - cur_len += l - lod.append(cur_len) self.inputs = { 'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod), - 'RoisLod': np.asarray(lod).astype('int64') + 'RoisNum': np.asarray(seq_len).astype('int32') } self.attrs = { diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py index 1200b0e3470f650dce4365ee46458c8184281292..c6622cf8d9ce8ae655a6b2e5c130ed9990fd2a5b 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py @@ -174,16 +174,11 @@ class TestROIPoolInLodOp(TestROIPoolOp): self.calc_roi_pool() seq_len = self.rois_lod[0] - cur_len = 0 - lod = [cur_len] - for l in seq_len: - cur_len += l - lod.append(cur_len) self.inputs = { 'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod), - 'RoisLod': np.asarray(lod).astype('int64') + 'RoisNum': np.asarray(seq_len).astype('int32') } self.attrs = { diff --git a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py index aed265b21b5781d88da0380b04872061e893d736..2cd2599f2ea2f4fb26b2d2730ca45384a3b664a7 100644 --- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py +++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py @@ -142,6 +142,18 @@ class TestTrilTriuOpAPI(unittest.TestCase): self.assertTrue(np.allclose(tril_out, np.tril(data))) self.assertTrue(np.allclose(triu_out, np.triu(data))) + def test_fluid_api(self): + data = np.random.random([1, 9, 9, 4]).astype('float32') + x = fluid.data(shape=[1, 9, -1, 4], dtype='float32', name='x') + triu_out = fluid.layers.triu(x) + + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + triu_out = exe.run(fluid.default_main_program(), + feed={"x": data}, + fetch_list=[triu_out]) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py index 716be1b539809ea3f90885b512f51ac45d85cd37..d388ba62f2a244f84497810739e5fd6b50f669d2 100644 --- a/python/paddle/hapi/model_summary.py +++ b/python/paddle/hapi/model_summary.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings import numpy as np +import numbers import paddle import paddle.nn as nn @@ -107,6 +109,11 @@ def summary(net, input_size, batch_size=None, dtypes=None): if batch_size is None: batch_size = -1 + if not paddle.in_dynamic_mode(): + warnings.warn( + "Your model was created in static mode, this may not get correct summary information!" + ) + result, params_info = summary_string(net, _input_size, batch_size, dtypes) print(result) @@ -121,16 +128,16 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): depth = len(list(model.sublayers())) - def register_hook(module): - def hook(module, input, output): - class_name = str(module.__class__).split(".")[-1].split("'")[0] + def register_hook(layer): + def hook(layer, input, output): + class_name = str(layer.__class__).split(".")[-1].split("'")[0] try: - module_idx = int(module._full_name.split('_')[-1]) + layer_idx = int(layer._full_name.split('_')[-1]) except: - module_idx = len(summary) + layer_idx = len(summary) - m_key = "%s-%i" % (class_name, module_idx + 1) + m_key = "%s-%i" % (class_name, layer_idx + 1) summary[m_key] = OrderedDict() summary[m_key]["input_shape"] = list(input[0].shape) summary[m_key]["input_shape"][0] = batch_size @@ -142,23 +149,50 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): summary[m_key]["output_shape"][0] = batch_size params = 0 - if hasattr(module, "weight") and hasattr(module.weight, "shape"): - params += np.prod(module.weight.shape) - summary[m_key]["trainable"] = module.weight.trainable or ( - not module.weight.stop_gradient) - if hasattr(module, "bias") and hasattr(module.bias, "shape"): - params += np.prod(module.bias.shape) + + if paddle.in_dynamic_mode(): + layer_state_dict = layer._parameters + else: + layer_state_dict = layer.state_dict() + + for k, v in layer_state_dict.items(): + params += np.prod(v.shape) + + try: + if (getattr(getattr(layer, k), 'trainable')) and ( + not getattr(getattr(layer, k), 'stop_gradient')): + summary[m_key]["trainable"] = True + else: + summary[m_key]["trainable"] = False + except: + summary[m_key]["trainable"] = True + summary[m_key]["nb_params"] = params - if (not isinstance(module, nn.Sequential) and - not isinstance(module, nn.LayerList) and - (not (module == model) or depth < 1)): + if (not isinstance(layer, nn.Sequential) and + not isinstance(layer, nn.LayerList) and + (not (layer == model) or depth < 1)): + + hooks.append(layer.register_forward_post_hook(hook)) + + def _check_input_size(input_sizes): + for input_size in input_sizes: + for item in input_size: + if not isinstance(item, numbers.Number): + raise TypeError( + "Expected item in input size be a number, but got {}". + format(type(item))) - hooks.append(module.register_forward_post_hook(hook)) + if item <= 0: + raise ValueError( + "Expected item in input size greater than zero, but got {}". + format(item)) if isinstance(input_size, tuple): input_size = [input_size] + _check_input_size(input_size) + x = [ paddle.rand( [2] + list(in_size), dtype=dtype) @@ -197,7 +231,12 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): "{0:,}".format(summary[layer]["nb_params"]), ) total_params += summary[layer]["nb_params"] - total_output += np.prod(summary[layer]["output_shape"]) + try: + total_output += np.prod(summary[layer]["output_shape"]) + except: + for output_shape in summary[layer]["output_shape"]: + total_output += np.prod(output_shape) + if "trainable" in summary[layer]: if summary[layer]["trainable"] == True: trainable_params += summary[layer]["nb_params"] diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index bc48cc21c29e6683602f37fb3eab6c9485fe4977..0794b95c801011da6845eaf82c32a5428e0d5f41 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None): def embedding(x, weight, padding_idx=None, sparse=False, name=None): """ - The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + The operator is used to lookup embeddings vector of ids provided by :attr:`x` . The shape of output Tensor is generated by appending the last dimension of the input Tensor shape with embedding size. - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` , + + **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` , otherwise the program will throw an exception and exit. .. code-block:: text Case 1: - input is a Tensor. + x is a Tensor. padding_idx = -1 x.data = [[1, 3], [2, 4], [4, 127]] x.shape = [3, 2] @@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): [0.0, 0.0, ..., 0.0 ]]] # padding data The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. + It will pad all-zero data when id is 127. Args: x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should @@ -151,10 +152,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . - In these cases, is_sparse must be False. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + In these cases, sparse must be False. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. If set None, it makes no effect to output. Default: None. name(str|None): For detailed information, please refer @@ -162,7 +163,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): None by default. Returns: - Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`. + Tensor: Embedding Tensor mapped by x. The data type is the same as :attr:`weight`. Examples: @@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None): padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( weight.shape[0] + padding_idx) + if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]: + raise ValueError("padding_idx must be within [-{}, {})".format( + weight.shape[0], weight.shape[0])) + helper.append_op( type='lookup_table_v2', inputs={'Ids': x, diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 3d5894064c44cb72259472fc638d46b67c5703fc..6c139b0ddbbb996145e3a611839bf5e2e113f3cd 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -780,10 +780,10 @@ def kl_div(input, label, reduction='mean', name=None): input = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') - # 'batchmean' reduction, loss shape will be [N] + # 'batchmean' reduction, loss shape will be [1] pred_loss = F.kl_div(paddle.to_tensor(input), paddle.to_tensor(target), reduction='batchmean') - # shape=[5] + # shape=[1] # 'mean' reduction, loss shape will be [1] pred_loss = F.kl_div(paddle.to_tensor(input), diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 662205ab69550255406ff5edfda4556b73b98843..042625a3dbd6b07487d6f77442621959f7492af6 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -389,7 +389,7 @@ def avg_pool3d(x, stride=None, padding=0, ceil_mode=False, - count_include_pad=False, + count_include_pad=True, divisor_override=None, data_format="NCDHW", name=None): diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 1dfdac26e990851ac5f192742acd47fb92633d0d..a74a98d5ed45b9f613b0f2f6d5f04544ffae3d2a 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -249,7 +249,7 @@ def grid_sample(x, mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. Default: 'bilinear'. padding_mode(str, optional) The padding method used when source index - is out of input images. It can be 'zeros', 'reflect' and 'border'. + is out of input images. It can be 'zeros', 'reflection' and 'border'. Default: zeros. align_corners(bool, optional): If `align_corners` is true, it will projects -1 and 1 to the centers of the corner pixels. Otherwise, it will @@ -312,7 +312,7 @@ def grid_sample(x, if not isinstance(grid, Variable): raise ValueError("The grid should be a Variable") _modes = ['bilinear', 'nearest'] - _padding_modes = ['zeros', 'reflect', 'border'] + _padding_modes = ['zeros', 'reflection', 'border'] if mode not in _modes: raise ValueError( "The mode of grid sample function should be in {}, but got: {}". diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index a1923542c40828b0886e9f5ab1f97e04e94fec92..433443fee1765a3ecd4cf0bbe53a960bbeaefc71 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -1564,22 +1564,18 @@ class CosineSimilarity(layers.Layer): class Embedding(layers.Layer): """ - :alias_main: paddle.nn.Embedding - :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding - :old_api: paddle.fluid.dygraph.Embedding - **Embedding Layer** This interface is used to construct a callable object of the ``Embedding`` class. For specific usage, refer to code examples. It implements the function of the Embedding Layer. - This layer is used to lookup embeddings vector of ids provided by :attr:`input` . + This layer is used to lookup embeddings vector of ids provided by :attr:`x` . It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + input :attr:`num_embeddings` and attr:`embedding_dim`. The shape of output Tensor is generated by appending an emb_size dimension to the last dimension of the input Tensor shape. - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + **Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` , otherwise the program will throw an exception and exit. .. code-block:: text @@ -1607,7 +1603,7 @@ class Embedding(layers.Layer): num_embeddings (int): Just one element which indicate the size of the dictionary of embeddings. embedding_dim: Just one element which indicate the size of each embedding vector respectively. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. @@ -1618,13 +1614,13 @@ class Embedding(layers.Layer): such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . - In these case, is_sparse must be False. Default: False. + In these case, sparse must be False. Default: False. weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition, user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` - is used to load custom or pre-trained word vectors. See code example 2 for details. + vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example for details. name(str|None): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -1639,20 +1635,34 @@ class Embedding(layers.Layer): .. code-block:: python - import paddle - import paddle.nn as nn - import numpy as np - paddle.disable_static() + import paddle + import numpy as np + + x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) + y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) + paddle.disable_static(paddle.CPUPlace()) + x = paddle.to_tensor(x_data, stop_gradient=False) + y = paddle.to_tensor(y_data, stop_gradient=False) + + embedding = paddle.nn.Embedding(10, 3, sparse=True) + + w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32) + embedding.weight.set_value(w0) - # example 1 - inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') - inp_word.shape # [2, 3] - dict_size = 20 + adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01) + adam.clear_grad() + + # weight.shape = [10, 3] + + # x.data = [[3],[4],[5]] + # x.shape = [3, 1] + + # out.data = [[2,2,2], [2,2,2], [2,2,2]] + # out.shape = [3, 1, 3] + out=embedding(x) + out.backward() + adam.step() - emb = nn.Embedding( - dict_size, - 32, - sparse=False) """ def __init__(self, @@ -1669,13 +1679,24 @@ class Embedding(layers.Layer): self._is_distributed = False self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( num_embeddings + padding_idx) + + if self._num_embeddings <= 0: + raise ValueError("num_embeddings must be gather than 0") + + if self._embedding_dim <= 0: + raise ValueError("embedding_dim must be gather than 0") + + if self._padding_idx >= num_embeddings or self._padding_idx < -num_embeddings: + raise ValueError("padding_idx must be within [-{}, {})".format( + num_embeddings, num_embeddings)) + self._dtype = self._helper.get_default_dtype() self._size = [self._num_embeddings, self._embedding_dim] self._weight_attr = weight_attr self._remote_prefetch = False self._name = name - self._weight = self.create_parameter( + self.weight = self.create_parameter( attr=self._weight_attr, shape=self._size, dtype=self._dtype, @@ -1684,7 +1705,7 @@ class Embedding(layers.Layer): def forward(self, x): return F.embedding( x, - weight=self._weight, + weight=self.weight, padding_idx=self._padding_idx, sparse=self._sparse, name=self._name) diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index a60e615d5064bf4ef2229dd67193774030383888..271dc9b4e685ce06cdb12ccdcb6bb0704a5ef2a1 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -627,10 +627,13 @@ class KLDivLoss(fluid.dygraph.Layer): $$l(x, y) = y * (\log(y) - x)$$ Parameters: - reduction (str, optional): Indicate how to average the loss, - the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. - If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned; - Default is ``'mean'``. + reduction (Tensor): Indicate how to average the loss, + the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``. + If `reduction` is ``'mean'``, the reduced mean loss is returned; + If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned; + if `reduction` is ``'sum'``, the reduced sum loss is returned; + if `reduction` is ``'none'``, no reduction will be apllied. + Default is ``'mean'``. Shape: @@ -654,11 +657,11 @@ class KLDivLoss(fluid.dygraph.Layer): x = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') - # 'batchmean' reduction, loss shape will be [N] + # 'batchmean' reduction, loss shape will be [1] kldiv_criterion = nn.KLDivLoss(reduction='batchmean') pred_loss = kldiv_criterion(paddle.to_tensor(x), paddle.to_tensor(target)) - # shape=[5] + # shape=[1] # 'mean' reduction, loss shape will be [1] kldiv_criterion = nn.KLDivLoss(reduction='mean') @@ -684,7 +687,7 @@ class KLDivLoss(fluid.dygraph.Layer): self.reduction = reduction def forward(self, input, label): - out = paddle.nn.functional.kl_div(input, label, self.reduction) + out = F.kl_div(input, label, self.reduction) return out diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 7ddda5091a0a260f56b29bcedfdcb0786e82ddd6..67e3ce21ffba0c312eb01163cdf32f87c6433ee1 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -183,12 +183,13 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): x (Tensor): The input tensor could be N-D tensor, and the input data type could be float32 or float64. p (float|string, optional): Order of the norm. Supported values are `fro`, `0`, `1`, `2`, - `inf`,`-inf` and any positive real number yielding the corresponding p-norm. - Not supported: ord < 0, nuclear norm. + `inf`, `-inf` and any positive real number yielding the corresponding p-norm. Not supported: ord < 0 and nuclear norm. + Default value is `fro`. axis (int|list|tuple, optional): The axis on which to apply norm operation. If axis is int or list(int)/tuple(int) with only one element, the vector norm is computed over the axis. If `axis < 0`, the dimension to norm operation is rank(input) + axis. If axis is a list(int)/tuple(int) with two elements, the matrix norm is computed over the axis. + Defalut value is `None`. keepdim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have fewer dimension than the :attr:`input` unless :attr:`keepdim` is true, default @@ -197,13 +198,9 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: Tensor, results of norm operation on the specified axis of input tensor, + Tensor: results of norm operation on the specified axis of input tensor, it's data type is the same as input's Tensor. - Raises: - TypeError, if out data type is different with the input data type. - ValueError, If `p` or `axis` is invalid. - Examples: .. code-block:: python @@ -256,15 +253,13 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): "The dim of frobenius norm op should be None or two elements list!" ) if in_dygraph_mode(): - if dim is None: dim = [-1] - return core.ops.frobenius_norm(input, 'dim', dim, 'keepdim', - keepdim) - attrs = { - 'dim': dim if dim != None else [-2, -1], - 'keep_dim': keepdim, - 'reduce_all': False - } - if len(attrs['dim']) == len(input.shape): + if dim is None: + return core.ops.frobenius_norm(input, 'keep_dim', keepdim, + 'reduce_all', True) + return core.ops.frobenius_norm(input, 'dim', dim, 'keep_dim', + keepdim, 'reduce_all', False) + attrs = {'dim': dim, 'keep_dim': keepdim, 'reduce_all': False} + if dim is None: attrs['reduce_all'] = True check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'frobenius_norm') @@ -351,42 +346,6 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): return reduce_out - def p0_matrix_norm(input, porder=0., axis=axis, keepdim=False, name=None): - block = LayerHelper('norm', **locals()) - out = block.create_variable_for_type_inference( - dtype=block.input_dtype()) - - cast_out = block.create_variable_for_type_inference(dtype=bool) - block.append_op( - type='cast', - inputs={'X': input}, - outputs={'Out': cast_out}, - attrs={ - 'in_dtype': input.dtype, - 'out_dtype': int(core.VarDesc.VarType.BOOL) - }) - cast_out2 = block.create_variable_for_type_inference(dtype=bool) - block.append_op( - type='cast', - inputs={'X': cast_out}, - outputs={'Out': cast_out2}, - attrs={ - 'in_dtype': cast_out.dtype, - 'out_dtype': int(core.VarDesc.VarType.FP32) - }) - sum_out = block.create_variable_for_type_inference( - dtype=block.input_dtype()) - block.append_op( - type='reduce_sum', - inputs={'X': cast_out2}, - outputs={'Out': sum_out}, - attrs={ - 'dim': axis, - 'keep_dim': keepdim, - 'reduce_all': True if axis is None else False - }) - return sum_out - def p_matrix_norm(input, porder=1., axis=axis, keepdim=False, name=None): block = LayerHelper('norm', **locals()) out = block.create_variable_for_type_inference( @@ -448,7 +407,20 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): #calculate vector norm, where axis is int or list with only one integer if isinstance(axis, int): - if isinstance(p, (int, float)): + if isinstance(p, str): + if p == "fro": + return vector_norm( + x, + porder=2, + axis=axis, + keepdim=keepdim, + asvector=False, + name=name) + + else: + raise ValueError( + "only valid string values are 'fro', found {}".format(p)) + elif isinstance(p, (int, float)): return vector_norm( x, axis=axis, @@ -464,10 +436,12 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): elif isinstance(axis, list) and len(axis) == 2: if p == "fro": return frobenius_norm(x, dim=axis, keepdim=keepdim, name=name) - elif p == 0: - return p0_matrix_norm(x, axis=axis, keepdim=keepdim, name=name) elif p == np.inf or p == -np.inf: return inf_norm(x, porder=p, axis=axis, keepdim=keepdim, name=name) + elif p == 0: + raise ValueError( + "just suport axis type int or list (length of list <=1) if p = 0, found {}". + format(axis)) else: return p_matrix_norm( x, porder=p, axis=axis, keepdim=keepdim, name=name) diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index b7b5d44650f8d62926241a57feedfd5b932a37f5..5c4e98feaa686217bc78ad3915423593ad4fcdce 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -523,6 +523,24 @@ class TestModelFunction(unittest.TestCase): model.summary(input_size=[(20)]) model.summary(input_size=(20), batch_size=2) + def test_summary_nlp(self): + paddle.enable_static() + nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) + paddle.summary(nlp_net, (1, 2)) + + def test_summary_error(self): + with self.assertRaises(TypeError): + nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) + paddle.summary(nlp_net, (1, '2')) + + with self.assertRaises(ValueError): + nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) + paddle.summary(nlp_net, (-1, -1)) + + paddle.disable_static() + nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) + paddle.summary(nlp_net, (1, 2)) + def test_export_deploy_model(self): for dynamic in [True, False]: fluid.enable_dygraph() if dynamic else None diff --git a/python/unittest_py/requirements.txt b/python/unittest_py/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b6a7546616678c21d4241da42f0965d4b85f2e6 --- /dev/null +++ b/python/unittest_py/requirements.txt @@ -0,0 +1,4 @@ +PyGithub +coverage +pycrypto +mock diff --git a/tools/coverage/coverage_diff.py b/tools/coverage/coverage_diff.py index 051348d358f459a67d39dd9ca798721a82aa2233..38f671fe4089d1f94caafaf26640e4df75870f55 100644 --- a/tools/coverage/coverage_diff.py +++ b/tools/coverage/coverage_diff.py @@ -90,12 +90,12 @@ def get_info_file_lines(info_file, diff_file): continue elif line.startswith('LF:'): - print 'LF:{}'.format(current_lf) + print('LF:{}'.format(current_lf)) continue elif line.startswith('LH:'): - print 'LH:{}'.format(current_lh) + print('LH:{}'.format(current_lh)) continue diff --git a/tools/coverage/coverage_diff_list.py b/tools/coverage/coverage_diff_list.py index 57222da4d9818ebbcb0b9aeea6937494038eecdf..8975185edadfbd567a428bbd90523923f5ab675d 100644 --- a/tools/coverage/coverage_diff_list.py +++ b/tools/coverage/coverage_diff_list.py @@ -40,7 +40,7 @@ def filter_by(list_file, max_rate): except: pass - print name, rate + print(name, rate) if __name__ == '__main__': diff --git a/tools/coverage/coverage_lines.py b/tools/coverage/coverage_lines.py index eb846cc9f249a3ff177fde437362a1122f4409a5..cdec5b8b1bb1873f8b9ef761e9d8575c89fee234 100644 --- a/tools/coverage/coverage_lines.py +++ b/tools/coverage/coverage_lines.py @@ -33,7 +33,7 @@ def get_lines(info_file): hits += 1 if total == 0: - print 'no data found' + print('no data found') exit() return hits / total @@ -47,17 +47,17 @@ if __name__ == '__main__': expected = float(sys.argv[2]) if not os.path.isfile(info_file): - print 'info file {} is not exists, ignored'.format(info_file) + print('info file {} is not exists, ignored'.format(info_file)) exit() actual = get_lines(info_file) actual = round(actual, 3) if actual < expected: - print 'expected >= {} %, actual {} %, failed'.format( - round(expected * 100, 1), round(actual * 100, 1)) + print('expected >= {} %, actual {} %, failed'.format( + round(expected * 100, 1), round(actual * 100, 1))) exit(1) - print 'expected >= {} %, actual {} %, passed'.format( - round(expected * 100, 1), round(actual * 100, 1)) + print('expected >= {} %, actual {} %, passed'.format( + round(expected * 100, 1), round(actual * 100, 1))) diff --git a/tools/coverage/paddle_coverage.sh b/tools/coverage/paddle_coverage.sh index d54434b738db5b5e6192f7d3bf9e48edbebc5b7c..008b35d01ca565a6d32265f595dd2d6aa55be707 100644 --- a/tools/coverage/paddle_coverage.sh +++ b/tools/coverage/paddle_coverage.sh @@ -5,7 +5,7 @@ set -xe PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" # install lcov -curl -o /lcov-1.14.tar.gz -x "" -s https://paddle-ci.gz.bcebos.com/coverage/lcov-1.14.tar.gz +curl -o /lcov-1.14.tar.gz -x "" -s https://paddle-ci.gz.bcebos.com/coverage/lcov-1.14.tar.gz || exit 101 tar -xf /lcov-1.14.tar.gz -C / cd /lcov-1.14 make install @@ -14,7 +14,7 @@ make install cd /paddle/build -python ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID} +python3 ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID} lcov --capture -d ./ -o coverage.info --rc lcov_branch_coverage=0 @@ -53,9 +53,9 @@ gen_full_html_report || true function gen_diff_html_report() { if [ "${GIT_PR_ID}" != "" ]; then - COVERAGE_DIFF_PATTERN="`python ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" + COVERAGE_DIFF_PATTERN="`python3 ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" - python ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out + python3 ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out fi lcov --extract coverage-full.info \ @@ -63,7 +63,7 @@ function gen_diff_html_report() { -o coverage-diff.info \ --rc lcov_branch_coverage=0 - python ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp + python3 ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp mv -f coverage-diff.tmp coverage-diff.info @@ -82,7 +82,7 @@ set -x coverage xml -i -o python-coverage.xml -python ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info +python3 ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info # python full html report # @@ -143,5 +143,6 @@ echo "Assert Python Diff Coverage" python ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1 if [ "$COVERAGE_LINES_ASSERT" = "1" ] || [ "$PYTHON_COVERAGE_LINES_ASSERT" = "1" ]; then + echo "exit 9" > /tmp/paddle_coverage.result exit 9 fi diff --git a/tools/coverage/pull_request.py b/tools/coverage/pull_request.py index 979f476d2a1f3fbf28d43e2b717deb7b2d1b0fff..105460032f7db538eaf7a193776bf8085e2837a1 100644 --- a/tools/coverage/pull_request.py +++ b/tools/coverage/pull_request.py @@ -40,7 +40,7 @@ def get_files(args): pull = get_pull(args.pull_id) for file in pull.get_files(): - print '/paddle/{}'.format(file.filename) + print('/paddle/{}'.format(file.filename)) def diff(args): @@ -55,8 +55,8 @@ def diff(args): pull = get_pull(args.pull_id) for file in pull.get_files(): - print '+++ {}'.format(file.filename) - print file.patch + print('+++ {}'.format(file.filename)) + print(file.patch) if __name__ == '__main__': diff --git a/tools/coverage/python_coverage.py b/tools/coverage/python_coverage.py index ba67e12249bb0ccee608ea120321eaa3a2ccbc91..8ad9d85c1bf6b5ed542fb8469173c4f1815050a4 100644 --- a/tools/coverage/python_coverage.py +++ b/tools/coverage/python_coverage.py @@ -12,10 +12,7 @@ root = tree.getroot() sources = root.findall('sources/source') -if len(sources) > 1: - exit(1) - -source = sources[0].text +source = sources[-1].text for clazz in root.findall('packages/package/classes/class'): clazz_filename = clazz.attrib.get('filename') @@ -28,8 +25,8 @@ for clazz in root.findall('packages/package/classes/class'): if not path.exists(clazz_filename): continue - print 'TN:' - print 'SF:{}'.format(clazz_filename) + print('TN:') + print('SF:{}'.format(clazz_filename)) branch_index = 0 @@ -50,16 +47,16 @@ for clazz in root.findall('packages/package/classes/class'): taken = int(taken) for _ in range(taken): - print 'BRDA:{},{},{},{}'.format(line_number, 0, branch_index, - line_hits) + print('BRDA:{},{},{},{}'.format(line_number, 0, branch_index, + line_hits)) branch_index += 1 if line_missing_branches: for missing_branch in line_missing_branches.split(','): - print 'BRDA:{},{},{},{}'.format(line_number, 0, - branch_index, 0) + print('BRDA:{},{},{},{}'.format(line_number, 0, + branch_index, 0)) branch_index += 1 - print 'DA:{},{}'.format(line_number, line_hits) + print('DA:{},{}'.format(line_number, line_hits)) - print 'end_of_record' + print('end_of_record')