diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 4fdab5cd94358d08eac7f8b041bf16d09042f0bd..a8de23839b78b15d3f0874032e2aff89b8dbc10b 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -52,6 +52,10 @@ void BroadcastOpHandle::RunImpl() { var_scopes.at(in_var_handle->scope_idx_)->FindVar(in_var_handle->name_); PADDLE_ENFORCE_NOT_NULL(in_var); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); + if (!in_tensor.IsInitialized()) { + VLOG(3) << "in var " << in_var_handle->name_ << "not inited, return!"; + return; + } InitOutputValue(*in_var_handle, out_var_handles); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 3368ae2ee4cf65b85abf1fcd89dee14f43522e1f..59db33a2edb8222bea5163dc0dfd9b12477cd5ec 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -185,6 +185,10 @@ void ParallelExecutor::BCastParamsToDevices( } auto &main_tensor = main_var->Get(); + if (!main_tensor.IsInitialized()) { + VLOG(3) << "one in var not inited, return!"; + continue; + } auto &dims = main_tensor.dims(); if (paddle::platform::is_gpu_place(main_tensor.place())) { #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/fake_init_op.cc b/paddle/fluid/operators/fake_init_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..28ebdcb03ea83f3ec701106111a7cc5f0f7ed7dc --- /dev/null +++ b/paddle/fluid/operators/fake_init_op.cc @@ -0,0 +1,86 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +class FakeInitInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of FakeInitOp should not be null."); + auto &shape = ctx->Attrs().Get>("shape"); + ctx->SetOutputDim("Out", framework::make_ddim(shape)); + } +}; + +class FakeInitOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + framework::Tensor *tensor = nullptr; + + auto &out_var = *scope.FindVar(Output("Out")); + + if (out_var.IsType()) { + tensor = out_var.GetMutable(); + tensor->Resize(framework::make_ddim(Attr>("shape"))); + } else if (out_var.IsType()) { + tensor = out_var.GetMutable()->mutable_value(); + tensor->Resize(framework::make_ddim(Attr>("shape"))); + } else { + PADDLE_THROW( + "fake init op's output only" + "supports SelectedRows and LoDTensor"); + } + } +}; + +class FakeInitOpVarTypeInference : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override {} +}; + +class FakeInitOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddAttr>("shape", + "(vector) The shape of the output"); + AddOutput("Out", + "(Tensor) Tensor of specified shape will be filled " + "with the specified value"); + AddComment(R"DOC( +FakeInit Operator. + +Init an variable but not alloc memory for it, it is used for init the +table parameter at trainer side in distributed lookup table. + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(fake_init, ops::FakeInitOp, ops::FakeInitInferShape, + ops::FakeInitOpMaker, paddle::framework::EmptyGradOpMaker, + ops::FakeInitOpVarTypeInference); diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index b9ac54e446811889b647397ae1fbb11c28f46777..a4d1e812a54e8d92750c991d09860ab974e3e56d 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -115,7 +115,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("W")); + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Out")); return framework::OpKernelType(data_type, ctx.device_context()); } }; diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 54a1c68a37f6929890aab697b48d621e6effb7d8..2b7227a6460f7f9e1ada54c28eb8a88230cce6e1 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -497,7 +497,7 @@ class TestDistLookupTable(TestDistLookupTableBase): # 5 save table self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"]) - trainer, _ = self.get_trainer() + trainer, trainer_startup = self.get_trainer() self.assertEqual(len(trainer.blocks), 1) ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'split_ids', @@ -511,6 +511,16 @@ class TestDistLookupTable(TestDistLookupTableBase): ] self.assertEqual([op.type for op in trainer.blocks[0].ops], ops) + startup_ops = [ + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'uniform_random', 'recv', 'recv', + 'fetch_barrier', 'fake_init' + ] + self.assertEqual([op.type for op in trainer_startup.blocks[0].ops], + startup_ops) + class TestAsyncLocalLookupTable(TestDistLookupTableBase): def net_conf(self): diff --git a/python/paddle/fluid/tests/unittests/test_fake_init_op.py b/python/paddle/fluid/tests/unittests/test_fake_init_op.py new file mode 100644 index 0000000000000000000000000000000000000000..a62b7aed66b59940b4ba654d98479e3e35c7b78b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fake_init_op.py @@ -0,0 +1,52 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import paddle.fluid.core as core +from paddle.fluid.op import Operator + + +class TestFakeInitOpSelectedRows(unittest.TestCase): + def check_with_place(self, place, is_selected_rows): + scope = core.Scope() + + out_var_name = 'Out' + if is_selected_rows: + out_tensor = scope.var(out_var_name).get_selected_rows().get_tensor( + ) + else: + out_tensor = scope.var(out_var_name).get_tensor() + + var_shape = [4, 784] + + # create and run fake_init_op + fake_init_op = Operator("fake_init", Out=out_var_name, shape=var_shape) + fake_init_op.run(scope, place) + + self.assertEqual(var_shape, out_tensor._get_dims()) + + def test_fake_init_selected_rows(self): + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + for place in places: + for is_selected_rows in [True, False]: + self.check_with_place(place, is_selected_rows) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 443f0a95f3e3c0ed535333aa26ecc99dc99d64c1..70a305219706af3c5759bca2d83820f0cbcb4ada 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -475,6 +475,26 @@ class DistributeTranspiler(object): delete_ops(self.origin_program.global_block(), self.optimize_ops) delete_ops(self.origin_program.global_block(), lr_ops) + # delete table init op + if self.has_distributed_lookup_table: + table_var = self.startup_program.global_block().vars[ + self.table_name] + table_param_init_op = [] + for op in self.startup_program.global_block().ops: + if self.table_name in op.output_arg_names: + table_param_init_op.append(op) + init_op_num = len(table_param_init_op) + if init_op_num != 1: + raise ValueError("table init op num should be 1, now is " + str( + init_op_num)) + table_init_op = table_param_init_op[0] + self.startup_program.global_block().append_op( + type="fake_init", + inputs={}, + outputs={"Out": table_var}, + attrs={"shape": table_init_op.attr('shape')}) + delete_ops(self.startup_program.global_block(), table_param_init_op) + self.origin_program.__str__() if wait_port: