Merge branch 'dist-table-do-not-init-on-trainer' of...

Merge branch 'dist-table-do-not-init-on-trainer' of ssh://github.com/jacquesqiao/Paddle into cpu-for-1.1-merge

Merge branch 'dist-table-do-not-init-on-trainer' of...
Merge branch 'dist-table-do-not-init-on-trainer' of ssh://github.com/jacquesqiao/Paddle into cpu-for-1.1-merge
641369f9 · Qiao Longfei · d69c8207 · 93f173db · 641369f9 · 641369f9
7 changed file
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -52,6 +52,10 @@ void BroadcastOpHandle::RunImpl() {
      var_scopes.at(in_var_handle->scope_idx_)->FindVar(in_var_handle->name_);
  PADDLE_ENFORCE_NOT_NULL(in_var);
  Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
+  if (!in_tensor.IsInitialized()) {
+    VLOG(3) << "in var " << in_var_handle->name_ << "not inited, return!";
+    return;
+  }
  InitOutputValue(*in_var_handle, out_var_handles);

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -185,6 +185,10 @@ void ParallelExecutor::BCastParamsToDevices(
    }
    auto &main_tensor = main_var->Get<LoDTensor>();
+    if (!main_tensor.IsInitialized()) {
+      VLOG(3) << "one in var not inited, return!";
+      continue;
+    }
    auto &dims = main_tensor.dims();
    if (paddle::platform::is_gpu_place(main_tensor.place())) {
 #ifdef PADDLE_WITH_CUDA

--- a/paddle/fluid/operators/fake_init_op.cc
+++ b/paddle/fluid/operators/fake_init_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/math_function.h"
+namespace paddle {
+namespace operators {
+class FakeInitInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of FakeInitOp should not be null.");
+    auto &shape = ctx->Attrs().Get<std::vector<int64_t>>("shape");
+    ctx->SetOutputDim("Out", framework::make_ddim(shape));
+  }
+};
+class FakeInitOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+ private:
+  void RunImpl(const framework::Scope &scope,
+               const platform::Place &dev_place) const override {
+    framework::Tensor *tensor = nullptr;
+    auto &out_var = *scope.FindVar(Output("Out"));
+    if (out_var.IsType<framework::LoDTensor>()) {
+      tensor = out_var.GetMutable<framework::LoDTensor>();
+      tensor->Resize(framework::make_ddim(Attr<std::vector<int64_t>>("shape")));
+    } else if (out_var.IsType<framework::SelectedRows>()) {
+      tensor = out_var.GetMutable<framework::SelectedRows>()->mutable_value();
+      tensor->Resize(framework::make_ddim(Attr<std::vector<int64_t>>("shape")));
+    } else {
+      PADDLE_THROW(
+          "fake init op's output only"
+          "supports SelectedRows and LoDTensor");
+    }
+  }
+};
+class FakeInitOpVarTypeInference : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc &op_desc,
+                  framework::BlockDesc *block) const override {}
+};
+class FakeInitOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddAttr<std::vector<int64_t>>("shape",
+                                  "(vector<int64_t>) The shape of the output");
+    AddOutput("Out",
+              "(Tensor) Tensor of specified shape will be filled "
+              "with the specified value");
+    AddComment(R"DOC(
+FakeInit Operator.
+Init an variable but not alloc memory for it, it is used for init the
+table parameter at trainer side in distributed lookup table.
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(fake_init, ops::FakeInitOp, ops::FakeInitInferShape,
+                  ops::FakeInitOpMaker, paddle::framework::EmptyGradOpMaker,
+                  ops::FakeInitOpVarTypeInference);
--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -115,7 +115,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
-    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("W"));
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Out"));
    return framework::OpKernelType(data_type, ctx.device_context());
  }
 };

--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -497,7 +497,7 @@ class TestDistLookupTable(TestDistLookupTableBase):
        # 5 save table
        self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"])
-        trainer, _ = self.get_trainer()
+        trainer, trainer_startup = self.get_trainer()
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
            'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'split_ids',
@@ -511,6 +511,16 @@ class TestDistLookupTable(TestDistLookupTableBase):
        ]
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)
+        startup_ops = [
+            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
+            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
+            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
+            'fill_constant', 'fill_constant', 'uniform_random', 'recv', 'recv',
+            'fetch_barrier', 'fake_init'
+        ]
+        self.assertEqual([op.type for op in trainer_startup.blocks[0].ops],
+                         startup_ops)
 class TestAsyncLocalLookupTable(TestDistLookupTableBase):
    def net_conf(self):

--- a/python/paddle/fluid/tests/unittests/test_fake_init_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fake_init_op.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import unittest
+import paddle.fluid.core as core
+from paddle.fluid.op import Operator
+class TestFakeInitOpSelectedRows(unittest.TestCase):
+    def check_with_place(self, place, is_selected_rows):
+        scope = core.Scope()
+        out_var_name = 'Out'
+        if is_selected_rows:
+            out_tensor = scope.var(out_var_name).get_selected_rows().get_tensor(
+            )
+        else:
+            out_tensor = scope.var(out_var_name).get_tensor()
+        var_shape = [4, 784]
+        # create and run fake_init_op
+        fake_init_op = Operator("fake_init", Out=out_var_name, shape=var_shape)
+        fake_init_op.run(scope, place)
+        self.assertEqual(var_shape, out_tensor._get_dims())
+    def test_fake_init_selected_rows(self):
+        places = [core.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(core.CUDAPlace(0))
+        for place in places:
+            for is_selected_rows in [True, False]:
+                self.check_with_place(place, is_selected_rows)
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -475,6 +475,26 @@ class DistributeTranspiler(object):
        delete_ops(self.origin_program.global_block(), self.optimize_ops)
        delete_ops(self.origin_program.global_block(), lr_ops)
+        # delete table init op
+        if self.has_distributed_lookup_table:
+            table_var = self.startup_program.global_block().vars[
+                self.table_name]
+            table_param_init_op = []
+            for op in self.startup_program.global_block().ops:
+                if self.table_name in op.output_arg_names:
+                    table_param_init_op.append(op)
+            init_op_num = len(table_param_init_op)
+            if init_op_num != 1:
+                raise ValueError("table init op num should be 1, now is " + str(
+                    init_op_num))
+            table_init_op = table_param_init_op[0]
+            self.startup_program.global_block().append_op(
+                type="fake_init",
+                inputs={},
+                outputs={"Out": table_var},
+                attrs={"shape": table_init_op.attr('shape')})
+            delete_ops(self.startup_program.global_block(), table_param_init_op)
        self.origin_program.__str__()
        if wait_port: