merge baidu/develop

87189665 · qijun · 2a03e380 · a0caf234 · 87189665 · 87189665
5 changed file
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -216,21 +216,32 @@ class OpRegistry {
  static OperatorPtr CreateOp(const OpDesc& op_desc) {
    std::string op_type = op_desc.type();
    OperatorPtr op(creators().at(op_type)());
+    const OpProto& op_proto = protos().at(op_type);
+    // set op's inputs_ from desc.
    op->type_ = op_desc.type();
    op->inputs_.reserve((size_t)op_desc.inputs_size());
    std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
              std::back_inserter(op->inputs_));
+    // set op's outputs_ from desc.
    op->outputs_.reserve((size_t)op_desc.outputs_size());
    std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
              std::back_inserter(op->outputs_));
+    // set op's attr;
    for (auto& attr : op_desc.attrs()) {
      op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
    }
    op_checkers().at(op_type).Check(op->attrs_);
+    // set argument offsets stored in op.
+    CreateInOutOffsetMap(op, op_proto);
    op->Init();
    return op;
  }
+  // init op.in_out_idxs_ to accelerate argument's offset lookup.
+  static void CreateInOutOffsetMap(OperatorPtr op, const OpProto& proto) {
+    op->CreateInOutOffsetMap(proto);
+  }
  static std::unordered_map<std::string, OpProto>& protos() {
    static std::unordered_map<std::string, OpProto> protos_;
    return protos_;

--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -12,25 +12,83 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include <algorithm>
 #include "paddle/framework/operator.h"
 namespace paddle {
 namespace framework {
 template <>
-Eigen::DefaultDevice* OpKernel::KernelContext::GetEigenDevice<
+Eigen::DefaultDevice* KernelContext::GetEigenDevice<
    platform::CPUPlace, Eigen::DefaultDevice>() const {
  return device_context_.get_eigen_device<Eigen::DefaultDevice>();
 }
 #ifndef PADDLE_ONLY_CPU
 template <>
-Eigen::GpuDevice* OpKernel::KernelContext::GetEigenDevice<
+Eigen::GpuDevice*
-    platform::GPUPlace, Eigen::GpuDevice>() const {
+KernelContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
  return device_context_.get_eigen_device<Eigen::GpuDevice>();
 }
 #endif
+void OperatorBase::CreateInOutOffsetMap(const OpProto& proto) {
+  PADDLE_ENFORCE(in_out_idxs_.empty(), "duplicate call CreateInOutOffsetMap");
+  for (int i = 0; i < proto.inputs_size(); i++) {
+    const auto& name = proto.inputs()[i].name();
+    in_out_idxs_[name] = i;
+  }
+  for (int i = 0; i < proto.outputs_size(); i++) {
+    const auto& name = proto.outputs()[i].name();
+    in_out_idxs_[name] = i;
+  }
+}
+const std::string& OperatorBase::Input(const std::string& name) const {
+  auto it = in_out_idxs_.find(name);
+  PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name);
+  if (attrs_.count("input_format") == 0) {
+    return inputs_[it->second];
+  } else {
+    const auto& input_format = GetAttr<std::vector<int>>("input_format");
+    int idx = input_format[it->second];
+    return inputs_.at(idx);
+  }
+}
+std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
+  auto input_format = GetAttr<std::vector<int>>("input_format");
+  auto offset = in_out_idxs_.at(name);
+  return std::vector<std::string>{
+      inputs_.begin() + input_format.at(offset),
+      inputs_.begin() + input_format.at(offset + 1)};
+}
+const std::string& OperatorBase::Output(const std::string& name) const {
+  auto it = in_out_idxs_.find(name);
+  PADDLE_ENFORCE(it != in_out_idxs_.end(), "no key [%s] in in_out_idxs_", name);
+  if (attrs_.count("output_format") == 0) {
+    return outputs_[it->second];
+  } else {
+    const auto& output_format = GetAttr<std::vector<int>>("output_format");
+    int idx = output_format[it->second];
+    return outputs_.at(idx);
+  }
+}
+std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
+  auto output_format = GetAttr<std::vector<int>>("output_format");
+  auto offset = in_out_idxs_.at(name);
+  return std::vector<std::string>{
+      outputs_.begin() + output_format.at(offset),
+      outputs_.begin() + output_format.at(offset + 1)};
+}
 std::string OperatorBase::DebugString() const {
  std::stringstream ss;
  ss << "=================\n";

--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -18,8 +18,10 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "paddle/framework/attr_checker.h"
 #include "paddle/framework/op_desc.pb.h"
+#include "paddle/framework/op_proto.pb.h"
 #include "paddle/framework/scope.h"
 #include "paddle/framework/tensor.h"
 #include "paddle/platform/device_context.h"
@@ -77,24 +79,32 @@ class OperatorBase {
  virtual void Run(const ScopePtr& scope,
                   const platform::DeviceContext& dev_ctx) const = 0;
+  // Get a input with argument's name described in `op_proto`
+  const std::string& Input(const std::string& name) const;
+  // Get a input which has multiple variables.
+  // TODO add a vector_view to prevent memory copy.
+  std::vector<std::string> Inputs(const std::string& name) const;
+  // Get a output with argument's name described in `op_proto`
+  const std::string& Output(const std::string& name) const;
+  // Get an output which has multiple variables.
+  // TODO add a vector_view to prevent memory copy.
+  std::vector<std::string> Outputs(const std::string& name) const;
+  // init in_out_idxs_ to accelerate argument's offset lookup.
+  void CreateInOutOffsetMap(const OpProto& proto);
 public:
  std::string type_;
  std::vector<std::string> inputs_;
  std::vector<std::string> outputs_;
  AttributeMap attrs_;
+  // store the arguments' offset described in op_desc.
+  std::unordered_map<std::string, int> in_out_idxs_;
 };
-class OpKernel {
+class KernelContext {
- public:
-  /**
-   * KernelContext is the only parameter of Kernel Run function.
-   * Run will get input/output variables, state such as momentum and
-   * device resource such as CUDA stream, cublas handle, etc. from
-   * KernelContext. User should construct it before run the Operator.
-   */
-  class KernelContext {
 public:
-    KernelContext(const OperatorBase* op, const ScopePtr& scope,
+  KernelContext(const OperatorBase* op, const std::shared_ptr<Scope>& scope,
                const platform::DeviceContext& device_context)
      : op_(*op), scope_(scope), device_context_(device_context) {}
@@ -106,6 +116,32 @@ class OpKernel {
    return scope_->GetVariable(op_.outputs_[index]);
  }
+  const Variable* Input(const std::string& name) const {
+    return scope_->GetVariable(op_.Input(name));
+  }
+  const Variable* Output(const std::string& name) const {
+    return scope_->GetVariable(op_.Output(name));
+  }
+  const std::vector<const Variable*> Inputs(const std::string& name) const {
+    auto names = op_.Inputs(name);
+    std::vector<const Variable*> res;
+    std::transform(
+        names.begin(), names.end(), res.begin(),
+        [this](const std::string& name) { return scope_->GetVariable(name); });
+    return res;
+  }
+  const std::vector<const Variable*> Outputs(const std::string& name) const {
+    auto names = op_.Outputs(name);
+    std::vector<const Variable*> res;
+    std::transform(
+        names.begin(), names.end(), res.begin(),
+        [this](const std::string& name) { return scope_->GetVariable(name); });
+    return res;
+  }
  template <typename PlaceType,
            typename DeviceType =
                typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
@@ -114,9 +150,18 @@ class OpKernel {
  platform::Place GetPlace() const { return device_context_.GetPlace(); }
  const OperatorBase& op_;
-    const ScopePtr& scope_;
+  const std::shared_ptr<Scope>& scope_;
  const platform::DeviceContext& device_context_;
-  };
+};
+class OpKernel {
+ public:
+  /**
+   * KernelContext is the only parameter of Kernel Run function.
+   * Run will get input/output variables, state such as momentum and
+   * device resource such as CUDA stream, cublas handle, etc. from
+   * KernelContext. User should construct it before run the Operator.
+   */
  virtual void Compute(const KernelContext& context) const = 0;
@@ -162,7 +207,7 @@ class OperatorWithKernel : public OperatorBase {
  void Run(const ScopePtr& scope,
           const platform::DeviceContext& dev_ctx) const final {
    auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
-    opKernel->Compute(OpKernel::KernelContext(this, scope, dev_ctx));
+    opKernel->Compute(KernelContext(this, scope, dev_ctx));
  }
  static std::unordered_map<std::string /* op_type */, OpKernelMap>&
@@ -170,6 +215,7 @@ class OperatorWithKernel : public OperatorBase {
    static std::unordered_map<std::string, OpKernelMap> g_all_op_kernels;
    return g_all_op_kernels;
  }
  void InferShape(const std::shared_ptr<Scope>& scope) const final {
    std::vector<const Tensor*> ins;
    VarNamesToTensors(scope, inputs_, &ins);

--- a/paddle/framework/operator_test.cc
+++ b/paddle/framework/operator_test.cc
@@ -30,7 +30,6 @@ class OpWithoutKernelTest : public OperatorBase {
    op_run_num++;
    ASSERT_EQ((int)inputs_.size(), 1);
    ASSERT_EQ((int)outputs_.size(), 1);
-    ASSERT_NEAR(GetAttr<float>("scale"), 3.14, 1e-5);
    ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
    ASSERT_EQ(x, 1);
    ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
@@ -86,9 +85,11 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
 public:
  OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("input", "input of test op");
+    AddInput("x", "input of test op");
-    AddOutput("output", "output of test op");
+    AddOutput("y", "output of test op");
-    AddAttr<float>("scale", "scale of cosine op");
+    AddAttr<float>("scale", "scale of cosine op")
+        .SetDefault(1.0)
+        .LargerThan(0.0);
    AddComment("This is test op");
  }
 };
@@ -103,11 +104,65 @@ class OpWithKernelTest : public OperatorWithKernel {
 class CPUKernelTest : public OpKernel {
 public:
-  void Compute(const KernelContext& context) const {
+  void Compute(const KernelContext& ctx) const {
+    std::cout << "this is cpu kernel" << std::endl;
+    std::cout << ctx.op_.DebugString() << std::endl;
    cpu_kernel_run_num++;
-    ASSERT_EQ((int)context.op_.inputs_.size(), 1);
+    ASSERT_EQ(ctx.op_.Input("x"), "IN1");
-    ASSERT_EQ((int)context.op_.outputs_.size(), 1);
+    ASSERT_EQ(ctx.op_.Output("y"), "OUT1");
-    ASSERT_NEAR(context.op_.GetAttr<float>("scale"), 3.14, 1e-5);
+  }
+};
+// multiple inputs test
+class OperatorMultiInputsTest : public OperatorBase {
+ public:
+  void Init() override { x = 1; }
+  void InferShape(const std::shared_ptr<Scope>& scope) const override {}
+  void Run(const std::shared_ptr<Scope>& scope,
+           const platform::DeviceContext& dev_ctx) const override {
+    ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
+    ASSERT_EQ(x, 1);
+    ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
+    ASSERT_EQ(Input("x"), "IN1");
+    ASSERT_EQ(Input("y"), "OUT1");
+  }
+ public:
+  float x = 0;
+};
+class OpKernelTestMultiInputsProtoAndCheckerMaker
+    : public OpProtoAndCheckerMaker {
+ public:
+  OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto,
+                                              OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInputs("xs", "inputs of test op");
+    AddInput("k", "input of test op");
+    AddOutputs("ys", "outputs of test op");
+    AddAttr<float>("scale", "scale of cosine op")
+        .SetDefault(1.0)
+        .LargerThan(0.0);
+    AddComment("This is test op");
+  }
+};
+class CPUKernalMultiInputsTest : public OpKernel {
+ public:
+  void Compute(const KernelContext& ctx) const {
+    auto xs = ctx.op_.Inputs("xs");
+    ASSERT_EQ(xs.size(), 3UL);
+    ASSERT_EQ(xs[0], "x0");
+    ASSERT_EQ(xs[1], "x1");
+    ASSERT_EQ(xs[2], "x2");
+    auto k = ctx.op_.Input("k");
+    ASSERT_EQ(k, "k0");
+    auto ys = ctx.op_.Outputs("ys");
+    ASSERT_EQ(ys.size(), 2UL);
+    ASSERT_EQ(ys[0], "y0");
+    ASSERT_EQ(ys[1], "y1");
  }
 };
@@ -118,6 +173,7 @@ REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest,
            paddle::framework::OpKernelTestProtoAndCheckerMaker);
 REGISTER_OP_CPU_KERNEL(op_with_kernel, paddle::framework::CPUKernelTest);
+// test with single input
 TEST(OpKernel, all) {
  paddle::framework::OpDesc op_desc;
  op_desc.set_type("op_with_kernel");
@@ -137,3 +193,47 @@ TEST(OpKernel, all) {
  op->Run(scope, cpu_device_context);
  ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1);
 }
+REGISTER_OP(op_multi_inputs_with_kernel, paddle::framework::OpWithKernelTest,
+            paddle::framework::OpKernelTestMultiInputsProtoAndCheckerMaker);
+REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
+                       paddle::framework::CPUKernalMultiInputsTest);
+// test with multi inputs
+TEST(OpKernel, multi_inputs) {
+  using namespace paddle::framework;
+  OpDesc op_desc;
+  op_desc.set_type("op_multi_inputs_with_kernel");
+  *op_desc.mutable_inputs()->Add() = "x0";
+  *op_desc.mutable_inputs()->Add() = "x1";
+  *op_desc.mutable_inputs()->Add() = "x2";
+  *op_desc.mutable_inputs()->Add() = "k0";
+  *op_desc.mutable_outputs()->Add() = "y0";
+  *op_desc.mutable_outputs()->Add() = "y1";
+  auto attr = op_desc.mutable_attrs()->Add();
+  attr->set_name("scale");
+  attr->set_type(paddle::framework::AttrType::FLOAT);
+  attr->set_f(3.14);
+  auto attr0 = op_desc.mutable_attrs()->Add();
+  attr0->set_name("input_format");
+  attr0->set_type(paddle::framework::AttrType::INTS);
+  auto input_format = attr0->mutable_ints();
+  input_format->Add(0);  // x0
+  input_format->Add(3);  // k
+  input_format->Add(4);  // end
+  auto attr1 = op_desc.mutable_attrs()->Add();
+  attr1->set_name("output_format");
+  attr1->set_type(paddle::framework::AttrType::INTS);
+  auto output_format = attr1->mutable_ints();
+  output_format->Add(0);  // y0
+  output_format->Add(2);  // y1
+  paddle::platform::CPUDeviceContext cpu_device_context;
+  auto scope = std::make_shared<Scope>();
+  OperatorPtr op(paddle::framework::OpRegistry::CreateOp(op_desc));
+  op->Run(scope, cpu_device_context);
+}
--- a/paddle/operators/add_op.h
+++ b/paddle/operators/add_op.h
@@ -22,7 +22,7 @@ namespace operators {
 template <typename Place, typename T>
 class AddKernel : public framework::OpKernel {
 public:
-  void Compute(const KernelContext& context) const override {
+  void Compute(const framework::KernelContext& context) const override {
    auto input0 = context.Input(0)->Get<framework::Tensor>();
    auto input1 = context.Input(1)->Get<framework::Tensor>();
    auto* output = context.Output(0)->GetMutable<framework::Tensor>();