未验证 提交 e0c9d856 编写于 作者: L Leo Chen 提交者: GitHub

add unused input vars check for OpWithKernel, test=develop (#21169)

* add unused input vars check for OpWithKernel, test=develop

* remove unused vars in some ops, test=develop

* fix batch_norm, test=develop

* add white list, test=develop

* add CI check for white list, test=develop

* :ove white list to c++, test=develop

* solve failure of CI, test=develop

* add unittest for unused_var_check, test=develop

* refine code, enable check in operator_test, test=develop

* skip mkldnn, test=develop

* extend white list, test=develop

* refine condition of mkldnn, test=develop

* fix paddle_build, test=develop

* follow comments, test=develop

* fix GetExpectedKernelType

* add wiki ref to err_msg, test=develop

* follow comment, test=develop
上级 664f958a
......@@ -85,6 +85,7 @@ endif()
cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash var_type_traits)
cc_library(scope_pool SRCS scope_pool.cc DEPS scope)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits)
......@@ -128,8 +129,11 @@ cc_test(no_need_buffer_vars_inference_test SRCS no_need_buffer_vars_inference_te
cc_library(transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context)
cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place)
cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack)
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context)
......
......@@ -28,11 +28,13 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/unused_var_check.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark);
DECLARE_bool(check_nan_inf);
DECLARE_bool(enable_unused_var_check);
DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
DEFINE_bool(fast_check_nan_inf, false,
"Fast checking NAN/INF after each operation. It will be a little"
......@@ -434,6 +436,8 @@ bool ExecutionContext::HasOutput(const std::string& name) const {
}
const Variable* ExecutionContext::InputVar(const std::string& name) const {
LogVarUsageIfUnusedVarCheckEnabled(name);
auto it = ctx_.inputs.find(name);
if (it == ctx_.inputs.end()) return nullptr;
......@@ -463,6 +467,8 @@ const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
template <>
const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
const std::string& name) const {
LogVarUsageIfUnusedVarCheckEnabled(name);
auto vars = MultiInputVar(name);
if (vars.size() == 0) {
return {};
......@@ -958,6 +964,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
this->InferShape(&infer_shape_ctx);
}
if (FLAGS_enable_unused_var_check) {
GetThreadLocalUsedVarNameSet()->clear();
}
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs.
(*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx,
......@@ -967,6 +978,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// there is inplace variable has been transfered.
TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope);
}
if (FLAGS_enable_unused_var_check) {
// skip op that uses mkldnn because it has different memory reuse strategy.
// use attr here because some GradMakers (like ActivationGradOpMaker) add
// input when use_mkldnn=true;
if (!(HasAttr("use_mkldnn") && Attr<bool>("use_mkldnn"))) {
CheckUnusedVar(*this, scope);
}
}
/*For profiling/benchmark only*/
if (FLAGS_benchmark) {
......
......@@ -35,6 +35,7 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/unused_var_check.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/variant.h"
......@@ -290,6 +291,8 @@ class ExecutionContext {
virtual const std::vector<Variable*> MultiInputVar(
const std::string& name) const {
LogVarUsageIfUnusedVarCheckEnabled(name);
auto it = ctx_.inputs.find(name);
if (it == ctx_.inputs.end()) {
return {};
......@@ -330,6 +333,8 @@ class ExecutionContext {
template <typename T>
const std::vector<const T*> MultiInput(const std::string& name) const {
LogVarUsageIfUnusedVarCheckEnabled(name);
auto vars = MultiInputVar(name);
if (vars.size() == 0) {
return {};
......
......@@ -18,6 +18,8 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/init.h"
DECLARE_bool(enable_unused_var_check);
namespace paddle {
namespace framework {
......@@ -139,6 +141,8 @@ class CPUKernelTest : public OpKernel<float> {
cpu_kernel_run_num++;
ASSERT_EQ(ctx.InputName("x"), "IN1");
ASSERT_EQ(ctx.OutputName("y"), "OUT1");
auto* x = ctx.Input<Tensor>("X");
ASSERT_EQ(x, nullptr);
}
};
......@@ -626,3 +630,117 @@ void SetGetLoDLevelTestMain(std::string op_type) {
TEST(GetLoDLevelTest, base) { SetGetLoDLevelTestMain("get_lod_level_test"); }
TEST(SetLoDLevelTest, base) { SetGetLoDLevelTestMain("set_lod_level_test"); }
namespace paddle {
namespace framework {
class OpUnusedVarTest : public OperatorWithKernel {
public:
using OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {}
OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override {
return OpKernelType(proto::VarType::FP32, ctx.GetPlace(),
framework::DataLayout::kAnyLayout);
}
};
class OpUnusedVarTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "input of test op");
AddOutput("Y", "output of test op");
AddComment("This is test op for unused var check.");
}
};
template <typename T>
class OpWithUnusedVarKernelTest : public OpKernel<T> {
public:
void Compute(const ExecutionContext& ctx) const {
ASSERT_EQ(ctx.InputName("X"), "X");
ASSERT_EQ(ctx.OutputName("Y"), "Y");
}
};
template <typename T>
class OpWithoutUnusedVarKernelTest : public OpKernel<T> {
public:
void Compute(const ExecutionContext& ctx) const {
ASSERT_EQ(ctx.InputName("X"), "X");
ASSERT_EQ(ctx.OutputName("Y"), "Y");
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Output<Tensor>("Y");
ASSERT_NE(x, y);
ASSERT_NE(y, nullptr);
}
};
} // namespace framework
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(
op_with_unused_var, paddle::framework::OpUnusedVarTest,
paddle::framework::OpUnusedVarTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_with_unused_var,
paddle::framework::OpWithUnusedVarKernelTest<float>);
REGISTER_OP_WITHOUT_GRADIENT(
op_without_unused_var, paddle::framework::OpUnusedVarTest,
paddle::framework::OpUnusedVarTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_without_unused_var,
paddle::framework::OpWithoutUnusedVarKernelTest<float>);
// test with single input
TEST(OpWithUnusedVar, all) {
// enable the unused_var_check
FLAGS_enable_unused_var_check = true;
paddle::framework::InitDevices(true);
paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_with_unused_var");
BuildVar("X", {"X"}, op_desc.add_inputs());
BuildVar("Y", {"Y"}, op_desc.add_outputs());
paddle::platform::CPUPlace cpu_place;
paddle::framework::Scope scope;
auto* x = scope.Var("X")->GetMutable<paddle::framework::LoDTensor>();
auto* y = scope.Var("Y")->GetMutable<paddle::framework::LoDTensor>();
x->Resize({32, 64});
y->Resize({32, 64});
x->mutable_data<float>(cpu_place);
y->mutable_data<float>(cpu_place);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
// should throw exception
ASSERT_THROW(op->Run(scope, cpu_place), paddle::platform::EnforceNotMet);
FLAGS_enable_unused_var_check = false;
}
TEST(OpWithoutUnusedVar, all) {
// enable the unused_var_check
FLAGS_enable_unused_var_check = true;
paddle::framework::InitDevices(true);
paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_without_unused_var");
BuildVar("X", {"X"}, op_desc.add_inputs());
BuildVar("Y", {"Y"}, op_desc.add_outputs());
paddle::platform::CPUPlace cpu_place;
paddle::framework::Scope scope;
auto* x = scope.Var("X")->GetMutable<paddle::framework::LoDTensor>();
auto* y = scope.Var("Y")->GetMutable<paddle::framework::LoDTensor>();
x->Resize({32, 64});
y->Resize({32, 64});
x->mutable_data<float>(cpu_place);
y->mutable_data<float>(cpu_place);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
// should not throw exception
ASSERT_NO_THROW(op->Run(scope, cpu_place));
FLAGS_enable_unused_var_check = false;
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/unused_var_check.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_bool(enable_unused_var_check, false,
"Checking whether operator contains unused inputs, "
"especially for grad operator. It should be in unittest.");
const std::unordered_set<std::string> op_has_unsed_vars_white_list = {
"auc",
"batch_norm",
"batch_norm_grad",
"sync_batch_norm_grad",
"center_loss_grad",
"crop",
"cvm",
"cos_sim_grad",
"dgc_momentum",
"fake_quantize_range_abs_max",
"fill_zeros_like",
"fusion_seqpool_cvm_concat",
"reshape2_grad_grad",
"reshape2_grad",
"gru_grad",
"hierarchical_sigmoid_grad",
"nce_grad",
"roi_perspective_transform_grad",
"sequence_conv_grad",
"gru_unit_grad",
"affine_grid_grad",
"fill_any_like",
"precision_recall",
"unsqueeze_grad",
"kldiv_loss_grad",
"cvm_grad",
"stack_grad",
"warpctc_grad",
"sync_batch_norm",
"match_matrix_tensor_grad",
"ngraph_engine"};
namespace paddle {
namespace framework {
std::unordered_set<std::string> *GetThreadLocalUsedVarNameSet() {
thread_local std::unordered_set<std::string> used_var_name_set;
return &used_var_name_set;
}
void LogVarUsageIfUnusedVarCheckEnabled(const std::string &name) {
if (FLAGS_enable_unused_var_check) {
VLOG(6) << "Variable used:" << name;
GetThreadLocalUsedVarNameSet()->insert(name);
}
}
void CheckUnusedVar(const OperatorBase &op, const Scope &scope) {
// skip op in white list and it should be fixed in the future.
if (op_has_unsed_vars_white_list.count(op.Type()) != 0) {
return;
}
auto *used_set = GetThreadLocalUsedVarNameSet();
std::vector<std::string> unsed_input_var_names;
auto &inferer = op.Info().NoNeedBufferVarsInferer();
std::unordered_set<std::string> no_need_buffer_ins = {};
if (inferer) {
no_need_buffer_ins = inferer(op.Inputs(), op.Outputs(), op.Attrs());
}
for (auto &pair : op.Inputs()) {
// skip no need buffer vars declared
if (no_need_buffer_ins.count(pair.first) != 0) {
VLOG(6) << op.Type() << " " << pair.first;
continue;
}
if (used_set->count(pair.first) == 0) {
for (auto &in_var_name : pair.second) {
auto *in_var = scope.FindVar(in_var_name);
if (in_var != nullptr && in_var->IsInitialized()) {
auto *tensor = &in_var->Get<LoDTensor>();
if (tensor != nullptr && tensor->IsInitialized()) {
unsed_input_var_names.emplace_back(pair.first);
break;
}
}
}
}
}
if (!unsed_input_var_names.empty()) {
std::string err_msg = "Operator " + op.Type() + " has input(s) not uesed: ";
for (auto &in_var_name : unsed_input_var_names) {
err_msg += in_var_name;
err_msg += ", ";
}
err_msg +=
"please make sure it(they) is(are) needed. If not, remove it(them) "
"from inputs of the operator; if yes, register "
"NoNeedBufferVarsInference or add "
"the operator to "
"white list in unused_var_check.cc. See more details at "
"[https://github.com/PaddlePaddle/Paddle/wiki/"
"OP-Should-Not-Have-Unused-Input]";
PADDLE_ENFORCE_EQ(unsed_input_var_names.size(), 0,
platform::errors::PermissionDenied(
"Unused input variables check failed: %s", err_msg));
}
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace framework {
std::unordered_set<std::string>* GetThreadLocalUsedVarNameSet();
void LogVarUsageIfUnusedVarCheckEnabled(const std::string& name);
void CheckUnusedVar(const OperatorBase& op, const Scope& scope);
} // namespace framework
} // namespace paddle
......@@ -633,12 +633,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
};
template <typename T>
class BatchNormGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
std::unique_ptr<T> Apply() const override {
std::unique_ptr<T> BatchNormGradMaker<T>::Apply() const {
auto *op = new T();
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("X", this->Input("X"));
......@@ -662,8 +657,7 @@ class BatchNormGradMaker : public framework::SingleGradOpMaker<T> {
op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
return std::unique_ptr<T>(op);
}
};
}
} // namespace operators
} // namespace paddle
......
......@@ -64,6 +64,15 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override;
};
template <typename T>
class BatchNormGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
std::unique_ptr<T> Apply() const override;
};
class BatchNormOpInferVarType
: public framework::PassInDtypeAndVarTypeToOutput {
protected:
......
......@@ -131,12 +131,28 @@ class FlattenGradOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"),
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out")),
ctx.device_context());
}
};
template <typename T>
class FlattenGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
std::unique_ptr<T> Apply() const override {
auto *grad_op = new T();
grad_op->SetType("flatten_grad");
grad_op->SetInput("X", this->Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
grad_op->SetAttrMap(this->Attrs());
return std::unique_ptr<T>(grad_op);
}
};
// FIXME(zcd): flatten2 adds an intermediate output(XShape) based on flatten,
// the XShape is used to carry the shape and lod of X which will be used in
// flatten_grad, in this way, the framework can reuse the memory of X
......@@ -234,18 +250,20 @@ DECLARE_INPLACE_OP_INFERER(FlattenOpInplaceInToOut, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(FlattenGradInplaceinToOut,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(FlattenGradNoNeedBufferVarsInference,
"X");
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
flatten, ops::FlattenOp, ops::FlattenOpMaker,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
REGISTER_OPERATOR(flatten, ops::FlattenOp, ops::FlattenOpMaker,
ops::FlattenGradOpMaker<paddle::framework::OpDesc>,
ops::FlattenGradOpMaker<paddle::imperative::OpBase>,
ops::FlattenOpInplaceInToOut);
REGISTER_OPERATOR(flatten_grad, ops::FlattenGradOp,
ops::FlattenGradInplaceinToOut);
ops::FlattenGradInplaceinToOut,
ops::FlattenGradNoNeedBufferVarsInference);
REGISTER_OPERATOR(flatten2, ops::Flatten2Op, ops::Flatten2OpMaker,
ops::Flatten2GradOpMaker<paddle::framework::OpDesc>,
......
......@@ -87,7 +87,6 @@ class InstanceNormGradMaker : public framework::SingleGradOpMaker<T> {
op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y"));
op->SetInput("Scale", this->Input("Scale"));
op->SetInput("Bias", this->Input("Bias"));
op->SetInput("SavedMean", this->Output("SavedMean"));
op->SetInput("SavedVariance", this->Output("SavedVariance"));
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/operators/math/blas.h"
......@@ -348,14 +349,31 @@ class LRNOpGrad : public framework::OperatorWithKernel {
layout_, library_);
}
};
template <typename T>
class LRNGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
std::unique_ptr<T> Apply() const override {
auto* op = new T();
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("Out", this->Output("Out"));
op->SetInput("MidOut", this->Output("MidOut"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
return std::unique_ptr<T>(op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
lrn, ops::LRNOp, ops::LRNOpMaker<float>,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
REGISTER_OPERATOR(lrn, ops::LRNOp, ops::LRNOpMaker<float>,
ops::LRNGradOpMaker<paddle::framework::OpDesc>,
ops::LRNGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(lrn_grad, ops::LRNOpGrad);
REGISTER_OP_CPU_KERNEL(
......
......@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h"
#include <memory>
#include <string>
namespace paddle {
......@@ -130,15 +131,34 @@ class PReluGradOp : public framework::OperatorWithKernel {
}
};
template <typename T>
class PReluGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
std::unique_ptr<T> Apply() const override {
std::unique_ptr<T> op(new T());
op->SetType("prelu_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("Alpha", this->Input("Alpha"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetOutput(framework::GradVarName("Alpha"), this->InputGrad("Alpha"));
op->SetAttrMap(this->Attrs());
return op;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
prelu, ops::PReluOp, ops::PReluOpMaker,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
REGISTER_OPERATOR(prelu, ops::PReluOp, ops::PReluOpMaker,
ops::PReluGradOpMaker<paddle::framework::OpDesc>,
ops::PReluGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(prelu_grad, ops::PReluGradOp);
REGISTER_OP_CPU_KERNEL(
prelu, ops::PReluKernel<paddle::platform::CPUDeviceContext, float>);
......
......@@ -70,11 +70,11 @@ class CUDAPReluKernel : public framework::OpKernel<T> {
enum PRELU_MODE { Element, Channel, Scalar };
template <typename T>
__global__ void PReluOpGradKernel(const T* x_ptr, const T* y_ptr,
const T* alpha_ptr, const T* dy_ptr,
T* dx_ptr, T* dalpha_ptr, size_t channel_num,
size_t plane_size, size_t spatial_size,
size_t numel, PRELU_MODE mode) {
__global__ void PReluOpGradKernel(const T* x_ptr, const T* alpha_ptr,
const T* dy_ptr, T* dx_ptr, T* dalpha_ptr,
size_t channel_num, size_t plane_size,
size_t spatial_size, size_t numel,
PRELU_MODE mode) {
size_t index;
CUDA_KERNEL_LOOP(index, numel) {
T scale;
......@@ -98,15 +98,15 @@ __global__ void PReluOpGradKernel(const T* x_ptr, const T* y_ptr,
template <typename T>
class PreluOpGradFunctor {
public:
void operator()(cudaStream_t stream, const T* x, const T* y, const T* alpha,
const T* dy, T* dx, T* dalpha, std::vector<int> input_shape,
void operator()(cudaStream_t stream, const T* x, const T* alpha, const T* dy,
T* dx, T* dalpha, std::vector<int> input_shape,
PRELU_MODE mode) {
size_t plane_size = input_shape[2] * input_shape[3];
size_t spatial_size = plane_size * input_shape[1];
size_t numel = spatial_size * input_shape[0];
PReluOpGradKernel<
T><<<PADDLE_GET_BLOCKS(numel), CUDA_NUM_THREADS, 0, stream>>>(
x, y, alpha, dy, dx, dalpha, input_shape[1], plane_size, spatial_size,
x, alpha, dy, dx, dalpha, input_shape[1], plane_size, spatial_size,
numel, mode);
}
};
......@@ -121,14 +121,12 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X");
auto* y = context.Input<Tensor>("Out");
auto* alpha = context.Input<Tensor>("Alpha");
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* dy = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dalpha = context.Output<Tensor>(framework::GradVarName("Alpha"));
const T* x_ptr = x->data<T>();
const T* y_ptr = y->data<T>();
const T* alpha_ptr = alpha->data<T>();
const T* dy_ptr = dy->data<T>();
T* dx_ptr = dx ? dx->mutable_data<T>(context.GetPlace()) : nullptr;
......@@ -163,7 +161,7 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
m = Scalar;
}
PreluOpGradFunctor<T> prelu_grad;
prelu_grad(stream, x_ptr, y_ptr, alpha_ptr, dy_ptr, dx_ptr, dalpha_tmp_ptr,
prelu_grad(stream, x_ptr, alpha_ptr, dy_ptr, dx_ptr, dalpha_tmp_ptr,
input_shape, m);
if (dalpha_tmp_ptr == nullptr) return;
......
......@@ -123,8 +123,8 @@ class SqueezeGradOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"),
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out")),
ctx.device_context());
}
};
......@@ -214,6 +214,22 @@ class Squeeze2Op : public framework::OperatorWithKernel {
}
};
template <typename T>
class SqueezeGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
std::unique_ptr<T> Apply() const override {
auto *grad_op = new T();
grad_op->SetType("squeeze_grad");
grad_op->SetInput("X", this->Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
grad_op->SetAttrMap(this->Attrs());
return std::unique_ptr<T>(grad_op);
}
};
class Squeeze2GradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -274,16 +290,17 @@ DECLARE_INPLACE_OP_INFERER(SequeezeInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(SequeezeGradInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(SqueezeGradNoNeedBufferVarsInference,
"X");
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp);
REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops::SqueezeGradOpMaker<paddle::framework::OpDesc>,
ops::SqueezeGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp,
ops::SqueezeGradNoNeedBufferVarsInference);
REGISTER_OPERATOR(squeeze2, ops::Squeeze2Op, ops::Squeeze2OpMaker,
ops::Squeeze2GradOpMaker<paddle::framework::OpDesc>,
......
......@@ -439,9 +439,7 @@ EOF
pip3.7 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
fi
# TODO: jiabin need to refine this part when these tests fixed on mac
ctest --output-on-failure -j $2
paddle version
fi
}
......@@ -709,6 +707,13 @@ function parallel_test() {
parallel_test_base
}
function enable_unused_var_check() {
# NOTE(zhiqiu): Set FLAGS_enable_unused_var_check=1 here to enable unused_var_check,
# which checks if an operator has unused input variable(s).
# Currently, use it in coverage CI job.
export FLAGS_enable_unused_var_check=1
}
function gen_doc_lib() {
mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
......@@ -1079,6 +1084,7 @@ function main() {
cicheck)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
enable_unused_var_check
parallel_test
;;
cicheck_brpc)
......
......@@ -168,7 +168,8 @@ def __bootstrap__():
'print_sub_graph_dir', 'pe_profile_fname', 'inner_op_parallelism',
'enable_parallel_graph', 'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size',
'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator'
'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator',
'enable_unused_var_check'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......
......@@ -25,7 +25,8 @@ API_FILES=("CMakeLists.txt"
"python/paddle/fluid/parallel_executor.py"
"python/paddle/fluid/framework.py"
"python/paddle/fluid/backward.py"
"paddle/fluid/operators/distributed/send_recv.proto.in")
"paddle/fluid/operators/distributed/send_recv.proto.in"
"paddle/fluid/framework/unused_var_check.cc")
approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000`
git_files=`git diff --numstat upstream/$BRANCH| wc -l`
......@@ -73,7 +74,7 @@ for API_FILE in ${API_FILES[*]}; do
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ "${API_CHANGE}" ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
# approval_user_list: XiaoguangHu01 46782768,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,qingqing01 7845005,guoshengCS 14105589,heavengate 12605721,kuke 3064195,Superjomn 328693,lanxianghit 47554610,cyj1986 39645414,hutuxian 11195205,frankwhzhang 20274488,nepeplwu 45024560,Dianhai 38231817,JiabinYang 22361972,chenwhql 22561442,seiriosPlus 5442383,gongweibao 10721757,saxon-zh 2870059,Boyan-Liu 2870059.
# approval_user_list: XiaoguangHu01 46782768,Xreki 12538138,luotao1 6836917,sneaxiy 32832641,qingqing01 7845005,guoshengCS 14105589,heavengate 12605721,kuke 3064195,Superjomn 328693,lanxianghit 47554610,cyj1986 39645414,hutuxian 11195205,frankwhzhang 20274488,nepeplwu 45024560,Dianhai 38231817,JiabinYang 22361972,chenwhql 22561442,zhiqiu 6888866,seiriosPlus 5442383,gongweibao 10721757,saxon-zh 2870059,Boyan-Liu 2870059.
if [ "${API_FILE}" == "paddle/fluid/op_use_default_grad_op_maker.spec" ];then
echo_line="You must have one RD (sneaxiy (Recommend) or luotao1) approval for op_use_default_grad_op_maker.spec, which manages the grad_op memory optimization.\n"
check_approval 1 32832641 6836917
......@@ -89,6 +90,9 @@ for API_FILE in ${API_FILES[*]}; do
elif [ "${API_FILE}" == "paddle/fluid/operators/distributed/send_recv.proto.in" ];then
echo_line="You must have one RD (gongweibao or seiriosPlus) approval for the paddle/fluid/operators/distributed/send_recv.proto.in, which manages the environment variables.\n"
check_approval 1 10721757 5442383
elif [ "${API_FILE}" == "paddle/fluid/framework/unused_var_check.cc" ];then
echo_line="You must have one RD (zhiqiu (Recommend) , sneaxiy or luotao1) approval for the paddle/fluid/framework/unused_var_check.cc, which manages the white list of operators that have unused input variables. Before change the white list, please read the spicification [https://github.com/PaddlePaddle/Paddle/wiki/OP-Should-Not-Have-Unused-Input] and try to refine code first. \n"
check_approval 1 6888866 32832641 6836917
else
echo_line="You must have one RD (XiaoguangHu01,Xreki,luotao1,sneaxiy) approval for ${API_FILE}, which manages the underlying code for fluid.\n"
check_approval 1 3048612 46782768 12538138 6836917 32832641
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册