From 24e988a471529874a3138ede9373b16d2162bdfa Mon Sep 17 00:00:00 2001 From: chengduo Date: Sat, 15 Jun 2019 09:03:05 +0800 Subject: [PATCH] Fix bug of scope_buffered_ssa_graph_executor (#18100) * fix code bug test=develop --- paddle/fluid/API.spec | 4 +- .../scope_buffered_ssa_graph_executor.cc | 1 + paddle/fluid/operators/print_op.cc | 73 ++++++++++++------- python/paddle/fluid/framework.py | 8 +- python/paddle/fluid/layers/control_flow.py | 14 +++- python/paddle/fluid/parallel_executor.py | 1 + .../fluid/tests/unittests/test_print_op.py | 32 ++++++++ 7 files changed, 95 insertions(+), 38 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 9b9c6c66415..b7f2fa96a7c 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -31,7 +31,7 @@ paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'pr paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd38c5b8b2b2e0bb19bcf1b581a80a7e4')) paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '80d857dc626612e2b2460d0154551e95')) +paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40')) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '33ce6ec50f8eeb05d340e6b114b026fd')) paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) @@ -313,7 +313,7 @@ paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=No paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '252890d4c3199a7623ab8667e13fd837')) paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '7a0000520f179f35239956a5ba55119f')) paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '5b552a1f0f7eb4dacb768a975ba15d08')) -paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', 'a222dbad457441941e50b812e5af9c7e')) +paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', 'ee6c70867d317b0a87094ed23546215f')) paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '3011dc695f490afdf504dc24f628319a')) paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a4e395ab004e7da34e94a0a1f9eee183')) paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f2508c52e0a797bb9bd5e29d79ede78')) diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 06a454f4ada..5bbbf07e6d9 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -71,6 +71,7 @@ void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { if (local_scope_var != nullptr) { auto &local_scope = *local_scope_var->GetMutable(); scope->DeleteScope(local_scope); + scope->EraseVars({std::string(details::kLocalExecScopeName)}); VLOG(3) << "Drop local execution scope: " << local_scope; } } diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 200b01797e4..f686e5293b0 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -135,33 +135,34 @@ struct Formater { }; // TODO(ChunweiYan) there should be some other printers for TensorArray -class TensorPrintOp : public framework::OperatorBase { +class PrintOp : public framework::OperatorBase { public: - TensorPrintOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) + PrintOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - TensorPrintOp(const TensorPrintOp &o) - : framework::OperatorBase( - static_cast(o)) { - PADDLE_THROW("Not implemented."); - } - private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { - const framework::Variable *in_var_ptr = nullptr; - std::string printed_var_name = ""; - - in_var_ptr = scope.FindVar(Input("In")); - printed_var_name = Inputs("In").front(); - - PADDLE_ENFORCE_NOT_NULL(in_var_ptr); - - auto &in_tensor = in_var_ptr->Get(); + const auto in_var = scope.FindVar(Input("In")); + auto out_var = scope.FindVar(Output("Out")); + PADDLE_ENFORCE_NOT_NULL(in_var, "The input should not be found in scope", + Input("In")); + PADDLE_ENFORCE_NOT_NULL(out_var, "The output should not be found in scope", + Output("Out")); + auto &in_tensor = in_var->Get(); + framework::LoDTensor *out_tensor = + out_var->GetMutable(); + + PrintValue(place, Inputs("In").front(), in_tensor); + framework::TensorCopy(in_tensor, place, out_tensor); + out_tensor->set_lod(in_tensor.lod()); + } + void PrintValue(const platform::Place &place, + const std::string &printed_var_name, + const framework::LoDTensor &in_tensor) const { std::string print_phase = Attr("print_phase"); bool is_forward = Attr("is_forward"); @@ -177,12 +178,12 @@ class TensorPrintOp : public framework::OperatorBase { printed_tensor.set_lod(in_tensor.lod()); printed_tensor.Resize(in_tensor.dims()); - if (platform::is_cpu_place(in_tensor.place())) { + if (is_cpu_place(in_tensor.place())) { printed_tensor.ShareDataWith(in_tensor); } else { // copy data to cpu to print platform::CPUPlace place; - framework::TensorCopy(in_tensor, place, &printed_tensor); + TensorCopy(in_tensor, place, &printed_tensor); } Formater formater; @@ -215,6 +216,7 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("In", "Input tensor to be displayed."); + AddOutput("Out", "The output tensor."); AddAttr("first_n", "Only log `first_n` number of times."); AddAttr("message", "A string message to print as a prefix."); AddAttr("summarize", "Number of elements printed."); @@ -239,10 +241,23 @@ tensor `t`.)DOC"); } }; -class InferShapeForward : public framework::InferShapeBase { +class PrintOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + VLOG(10) << "PrintOpInferShape"; + PADDLE_ENFORCE(ctx->HasInput("In"), "Input(In) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); + ctx->ShareDim("In", /*->*/ "Out"); + ctx->ShareLoD("In", /*->*/ "Out"); + } +}; + +class PrintOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(framework::InferShapeContext *context) const override { - PADDLE_ENFORCE(context->HasInput("In"), "Input(In) should not be null."); + void operator()(framework::InferVarTypeContext *ctx) const override { + auto input_type = ctx->GetType(ctx->Input("In")[0]); + auto out_name = ctx->Output("Out").front(); + ctx->SetType(out_name, input_type); } }; @@ -253,7 +268,8 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker { std::unique_ptr Apply() const override { auto *op_desc_ptr = new framework::OpDesc(); op_desc_ptr->SetType("print"); - op_desc_ptr->SetInput("In", InputGrad("In")); + op_desc_ptr->SetInput("In", OutputGrad("Out")); + op_desc_ptr->SetOutput("Out", InputGrad("In")); op_desc_ptr->SetAttrMap(Attrs()); op_desc_ptr->SetAttr("is_forward", false); return std::unique_ptr(op_desc_ptr); @@ -265,5 +281,6 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; -REGISTER_OPERATOR(print, ops::TensorPrintOp, ops::PrintOpProtoAndCheckMaker, - ops::PrintOpGradientMaker, ops::InferShapeForward); +REGISTER_OPERATOR(print, ops::PrintOp, ops::PrintOpProtoAndCheckMaker, + ops::PrintOpGradientMaker, ops::PrintOpInferShape, + ops::PrintOpVarTypeInference); diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 62d66296606..1407c93a240 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -27,7 +27,7 @@ import six import numpy as np import subprocess import multiprocessing - +import sys from .. import compat as cpt from .proto import framework_pb2 @@ -82,14 +82,14 @@ def _current_expected_place(): def _cpu_num(): - cpu_num = os.environ.get('CPU_NUM', None) - if cpu_num is None: + if "CPU_NUM" not in os.environ.keys(): sys.stderr.write( 'The CPU_NUM is not specified, you should set CPU_NUM in ' 'the environment variable list, i.e export CPU_NUM=1. CPU_NUM ' 'indicates that how many CPUPlace are used in the current task.\n' - '!!! The default number of CPUPlaces is 1.') + '!!! The default number of CPUPlaces is 1.\n\n') os.environ['CPU_NUM'] = str(1) + cpu_num = os.environ.get('CPU_NUM') return int(cpu_num) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index f8c84a70290..d073c15b023 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -165,8 +165,12 @@ def Print(input, print the gradients of input tensor. Returns: - Variable: Output tensor, same data with input tensor. + Variable: Output tensor. + NOTES: + The input and output are two different variables, and in the + following process, you should use the output variable but not the input, + otherwise, the print layer doesn't have backward. Examples: .. code-block:: python @@ -174,16 +178,18 @@ def Print(input, import paddle.fluid as fluid input = fluid.layers.data(name="input", shape=[4, 32, 32], dtype="float32") - fluid.layers.Print(input, message = "The content of input layer:") + input = fluid.layers.Print(input, message = "The content of input layer:") # value = some_layer(...) # Print(value, summarize=10, # message="The content of some_layer: ") ''' - helper = LayerHelper('print', **locals()) + helper = LayerHelper('print' + "_" + input.name, **locals()) + output = helper.create_variable_for_type_inference(input.dtype) helper.append_op( type='print', inputs={'In': input}, + outputs={'Out': output}, attrs={ 'first_n': first_n, 'summarize': summarize, @@ -194,7 +200,7 @@ def Print(input, 'print_tensor_lod': print_tensor_lod, 'print_phase': print_phase.upper() }) - return input + return output class BlockGuard(object): diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index f2cefeb3013..d4a1041a4bf 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -324,6 +324,7 @@ class ParallelExecutor(object): loss = fluid.layers.mean(hidden) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) exe.run(startup_program) parallel_exe = fluid.ParallelExecutor(use_cuda=use_cuda, diff --git a/python/paddle/fluid/tests/unittests/test_print_op.py b/python/paddle/fluid/tests/unittests/test_print_op.py index 8e89aa6b749..0fc11ef8d92 100644 --- a/python/paddle/fluid/tests/unittests/test_print_op.py +++ b/python/paddle/fluid/tests/unittests/test_print_op.py @@ -17,11 +17,13 @@ from __future__ import print_function import unittest import paddle.fluid.core as core from paddle.fluid.executor import Executor +import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid.backward import append_backward from paddle.fluid.framework import switch_main_program from paddle.fluid.framework import Program import numpy as np +from simple_nets import simple_fc_net, init_data class TestPrintOpCPU(unittest.TestCase): @@ -89,5 +91,35 @@ class TestPrintOpGPU(TestPrintOpCPU): self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) +class TestPrintOpBackward(unittest.TestCase): + def check_backward(self, use_cuda): + main = fluid.Program() + startup = fluid.Program() + + with fluid.program_guard(main, startup): + loss = simple_fc_net() + loss = fluid.layers.Print(loss) + fluid.optimizer.Adam().minimize(loss) + + print_ops = [op for op in main.blocks[0].ops if op.type == u'print'] + assert len(print_ops) == 2, "The number of print op should be 2" + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup) + + binary = fluid.compiler.CompiledProgram(main).with_data_parallel( + loss_name=loss.name) + + img, label = init_data() + feed_dict = {"image": img, "label": label} + exe.run(binary, feed_dict) + + def test_fw_bw(self): + if core.is_compiled_with_cuda(): + self.check_backward(use_cuda=True) + self.check_backward(use_cuda=False) + + if __name__ == '__main__': unittest.main() -- GitLab