提交 7000ec85 编写于 作者: S sneaxiy

fix some op grad maker

fix ctest eager deletion disable bug
test=develop
上级 f8ed2c22
......@@ -63,7 +63,7 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory)
cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags)
cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
cc_test(reader_test SRCS reader_test.cc DEPS reader)
......
......@@ -194,10 +194,17 @@ ExtractComputationOpFromLastLivedVar(VarHandle *var, size_t scope_idx,
}
/**
* Shrink op dependencies. If some ops do not Tensor buffer of any input,
* Shrink op dependencies accoring to no need buffer vars.
*
* If some ops do not need Tensor buffer of any input,
* just remove the dependency of this op, i.e, decrease reference count.
*
* Returns whether the dependency count decreases to 0.
* For example, input Y of elementwise_add_grad op is only used to infer shape
* and lod of Y@GRAD, we do not need the buffer of input Y. Data buffer of
* input Y can be collected before elementwise_add_grad op runs.
*
* This method returns whether the dependency count decreases to 0, and
* shrinks op dependency if possible.
*/
static bool ShrinkNoNeedBufferVarOpDependency(
const std::string &var_name,
......@@ -214,7 +221,7 @@ static bool ShrinkNoNeedBufferVarOpDependency(
inferer(op_base->Inputs(), op_base->Outputs(), op_base->Attrs());
// Check whether var_name occurs in other inputs or outputs of the op
// If it occurs, we cannot precede reference count to previous op
// If it occurs, we cannot decrease the dependency number.
bool occurred_in_other_vars = false;
for (auto &in_pair : op_base->Inputs()) {
if (no_need_buffer_vars.count(in_pair.first) > 0) {
......
......@@ -21,6 +21,7 @@
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "gflags/gflags.h"
#include "paddle/fluid/framework/garbage_collector.h"
namespace paddle {
......
......@@ -1017,6 +1017,8 @@ Scope* OperatorWithKernel::PrepareData(
// of search key even though the set is empty.
if (!no_buffer_ins.empty() &&
no_buffer_ins.count(var_name_item.first) > 0) {
VLOG(1) << "Skip scanning input " << var_name_item.first
<< " in Operator " << type_;
continue;
}
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/add_position_encoding_op.h"
#include <memory>
namespace paddle {
namespace operators {
......@@ -39,13 +40,8 @@ class AddPositionEncodingOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) must not be null.");
PADDLE_ENFORCE(ctx->HasInput("Out"), "Out must not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Out@GRAD must not be null.");
auto out_dims = ctx->GetInputDim("Out");
if (ctx->HasOutput(framework::GradVarName("X"))) {
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
ctx->SetOutputDim(framework::GradVarName("X"), out_dims);
}
}
......@@ -75,6 +71,22 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
class AddPositionEncodingGradOpDescMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("add_position_encoding_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
} // namespace operators
} // namespace paddle
......@@ -83,7 +95,7 @@ namespace plt = paddle::platform;
REGISTER_OPERATOR(add_position_encoding, ops::AddPositionEncodingOp,
ops::AddPositionEncodingOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::AddPositionEncodingGradOpDescMaker);
REGISTER_OPERATOR(add_position_encoding_grad, ops::AddPositionEncodingOpGrad);
REGISTER_OP_CPU_KERNEL(
......
......@@ -567,7 +567,6 @@ std::unique_ptr<framework::OpDesc> BatchNormGradMaker::Apply() const {
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetInput("Scale", Input("Scale"));
op->SetInput("Bias", Input("Bias"));
op->SetInput("SavedMean", Output("SavedMean"));
op->SetInput("SavedVariance", Output("SavedVariance"));
......
......@@ -455,13 +455,13 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
return type;
}
class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
class Conv2DGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType(GradOpType());
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("Input", Input("Input"));
op->SetInput("Filter", Input("Filter"));
op->SetInput("Bias", Input("Bias"));
......@@ -470,14 +470,33 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
op->SetOutput(framework::GradVarName("Input"), InputGrad("Input"));
op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter"));
op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias"));
op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDesc>(op);
}
};
class Conv3DGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
virtual std::string GradOpType() const {
return this->ForwardOpType() + "_grad";
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType(this->ForwardOpType() + "_grad");
op->SetInput("Input", Input("Input"));
op->SetInput("Filter", Input("Filter"));
op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input"), InputGrad("Input"));
op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter"));
if (ForwardOp().Inputs().count("ResidualData") != 0) {
op->SetInput("ResidualData", Input("ResidualData"));
}
op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDesc>(op);
}
};
......@@ -486,17 +505,16 @@ class Conv2dGradMaker : public framework::SingleGradOpDescMaker {
namespace ops = paddle::operators;
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType, ops::Conv2dGradMaker);
ops::ConvOpInferVarType, ops::Conv2DGradMaker);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad);
// depthwise convolution op
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType, ops::Conv2dGradMaker);
ops::ConvOpInferVarType, ops::Conv2DGradMaker);
REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad);
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
ops::ConvOpInferVarType,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::ConvOpInferVarType, ops::Conv3DGradMaker);
REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad);
// depthwise conv kernel
......
......@@ -238,6 +238,23 @@ class CrossEntropyGradientOp : public CrossEntropyGradientOpBase {
}
};
class CrossEntropyGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("cross_entropy_grad");
op->SetInput("X", Input("X"));
op->SetInput("Label", Input("Label"));
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class CrossEntropyOp2 : public CrossEntropyOpBase {
public:
using CrossEntropyOpBase::CrossEntropyOpBase;
......@@ -354,7 +371,7 @@ using CPUCtx = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase,
ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::CrossEntropyGradOpDescMaker);
REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp);
REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<CPUCtx, float>,
ops::CrossEntropyOpKernel<CPUCtx, double>);
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/op_registry.h"
......@@ -170,11 +171,6 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("last_h"),
"Input(last_h) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("last_c"),
"Input(last_c) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Cache"),
"Input(last_c) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("InitH"),
......@@ -197,6 +193,35 @@ class CudnnLSTMGradOp : public framework::OperatorWithKernel {
}
};
class CudnnLSTMGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("cudnn_lstm_grad");
op->SetInput("Input", Input("Input"));
op->SetInput("InitH", Input("InitH"));
op->SetInput("InitC", Input("InitC"));
op->SetInput("W", Input("W"));
if (ForwardOp().Inputs().count("Cache") > 0) {
op->SetInput("Cache", Input("Cache"));
}
op->SetInput("Out", Output("Out"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput(framework::GradVarName("last_c"), OutputGrad("last_c"));
op->SetInput(framework::GradVarName("last_h"), OutputGrad("last_h"));
op->SetOutput(framework::GradVarName("Input"), InputGrad("Input"));
op->SetOutput(framework::GradVarName("W"), InputGrad("W"));
op->SetOutput(framework::GradVarName("InitH"), InputGrad("InitH"));
op->SetOutput(framework::GradVarName("InitC"), InputGrad("InitC"));
op->SetAttrMap(Attrs());
return op;
}
};
template <typename T>
class NotImpleKernel : public framework::OpKernel<T> {
public:
......@@ -211,7 +236,7 @@ class NotImpleKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OPERATOR(cudnn_lstm, ops::CudnnLSTMOp, ops::CudnnLSTMOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::CudnnLSTMGradOpDescMaker);
REGISTER_OPERATOR(cudnn_lstm_grad, ops::CudnnLSTMGradOp);
REGISTER_OP_CPU_KERNEL(cudnn_lstm, ops::NotImpleKernel<float>);
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lod_reset_op.h"
#include <memory>
namespace paddle {
namespace operators {
......@@ -146,18 +147,39 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
ctx.device_context());
return framework::OpKernelType(
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))->type(),
ctx.device_context());
}
};
class LoDResetGradDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("lod_reset_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("X", Input("X"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference,
"X");
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp);
ops::LoDResetGradDescMaker);
REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp,
ops::LoDResetGradNoNeedBufferVarInference);
REGISTER_OP_CPU_KERNEL(
lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
ops::LoDResetKernel<paddle::platform::CPUPlace, double>,
......
......@@ -75,6 +75,10 @@ DEFINE_bool(reader_queue_speed_test_mode, false,
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing");
DECLARE_double(eager_delete_tensor_gb);
DECLARE_bool(fast_eager_deletion_mode);
DECLARE_double(memory_fraction_of_eager_deletion);
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);
......@@ -150,6 +154,16 @@ PYBIND11_MODULE(core, m) {
return paddle::operators::AppendPythonCallableObjectAndReturnId(py_obj);
});
// NOTE(zjl): ctest would load environment variables at the beginning even
// though we have not `import paddle.fluid as fluid`. So we add this API
// to enable eager deletion mode in unittest.
m.def("_set_eager_deletion_mode",
[](double threshold, double fraction, bool fast_mode) {
FLAGS_eager_delete_tensor_gb = threshold;
FLAGS_memory_fraction_of_eager_deletion = fraction;
FLAGS_fast_eager_deletion_mode = fast_mode;
});
m.add_object("_cleanup",
py::capsule([]() { ScopePool::Instance().Clear(); }));
......
......@@ -14,8 +14,6 @@
import os
import numpy as np
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['FLAGS_fast_eager_deletion_mode'] = '1'
os.environ['FLAGS_use_ngraph'] = '0'
os.environ['FLAGS_use_mkldnn'] = '0'
os.environ['CPU_NUM'] = '4'
......@@ -25,6 +23,8 @@ import six
import unittest
import multiprocessing
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
def simple_fc_net():
image = fluid.layers.data(name='image', shape=[784], dtype='float32')
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import os
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['CPU_NUM'] = '2'
import six
......
......@@ -16,6 +16,8 @@ import unittest
from test_eager_deletion_dynamic_rnn_base import TestBase
import paddle.fluid as fluid
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
def gru_net(data,
label,
......
......@@ -16,6 +16,8 @@ from test_eager_deletion_dynamic_rnn_base import TestBase
import paddle.fluid as fluid
import unittest
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
def lstm_net(data,
label,
......
......@@ -14,7 +14,8 @@
import os
import unittest
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
# FIXME(zjl): It seems that this unittest fails randomly
# when comparing all reduce last loss and reduce last loss
......
......@@ -14,7 +14,8 @@
import os
import unittest
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
os.environ['RECORDIO_FILENAME'] = './eager_deletion_transformer.wmt16.recordio'
......
......@@ -16,8 +16,6 @@ from __future__ import print_function
import os
os.environ['CPU_NUM'] = '2'
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['FLAGS_fast_eager_deletion_mode'] = '1'
import unittest
import paddle.fluid as fluid
......@@ -29,6 +27,8 @@ import paddle.fluid.compiler as compiler
import numpy
import multiprocessing
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
class TestEagerDeletionWhileOpBase(unittest.TestCase):
def test_main(self):
......
......@@ -14,11 +14,12 @@
import os
import unittest
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
os.environ['FLAGS_memory_fraction_of_eager_deletion'] = "0.55"
import paddle.fluid as fluid
os.environ['RECORDIO_FILENAME'] = './p_gc_transformer.wmt16.recordio'
fluid.core._set_eager_deletion_mode(0.0, 0.55, True)
from test_parallel_executor_transformer import TestTransformer
if __name__ == '__main__':
......
......@@ -168,3 +168,7 @@ class TestROIAlignOp(OpTest):
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册