diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 2722ea078ebdf9a88fe2286fb4050fca652ffb7f..fd4cf92d85d5daa891d602d4365122c870920bba 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -66,6 +66,7 @@ paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)) +paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)) diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index 6b526f0103ad3c530c06a68757cf89293f4fb84b..595454e90b9cd713fd2baed24538cf5fbc93934a 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(framework) +add_subdirectory(imperative) add_subdirectory(operators) add_subdirectory(string) add_subdirectory(recordio) diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 3e9353f5cf67d8de62c5551f12ea786e49190549..6338be75a4b1d3c4caf7a6f7add4d05fec690340 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -16,7 +16,9 @@ limitations under the License. */ #include #include #include "glog/logging.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { @@ -53,5 +55,12 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, return tensor; } +LoDTensor& GetVariableTensor(const Scope& scope, const std::string& var_name) { + Variable* var = scope.FindVar(var_name); + PADDLE_ENFORCE(var, "%s no in scope", var_name); + PADDLE_ENFORCE(var->IsType(), "Only support lod tensor now."); + return *var->GetMutable(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/feed_fetch_method.h b/paddle/fluid/framework/feed_fetch_method.h index 7f504bfd232862c014cb59b6e8301eec74e0351f..031f8e01aa6128b803dcbfb990778e87d4fafc13 100644 --- a/paddle/fluid/framework/feed_fetch_method.h +++ b/paddle/fluid/framework/feed_fetch_method.h @@ -27,5 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input, LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, size_t index); +LoDTensor& GetVariableTensor(const Scope& scope, const std::string& var_name); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index fc91564bbaecf7b1725908fc1eb8b1e4d2e20d32..8679118fe28b1c68aea30caf711441823b5255c0 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -38,9 +38,8 @@ void CheckProgram(const ProgramDesc &program) { switch (role_id) { case _INT(OpRole::kForward): if (visit.find(_INT(OpRole::kBackward)) != visit.end()) { - LOG(ERROR) - << "Cannot add backward operator before forward operator %s." - << op->Type(); + LOG(ERROR) << "Cannot add backward operator before forward operator " + << op->Type(); } break; case _INT(OpRole::kBackward): diff --git a/paddle/fluid/framework/ir/is_test_pass.cc b/paddle/fluid/framework/ir/is_test_pass.cc index 6d8f020918d4e56fa7f125a659f7f8511ca067ca..57cc98e2ca0175848aa62c62c8ad3b20594b3bde 100644 --- a/paddle/fluid/framework/ir/is_test_pass.cc +++ b/paddle/fluid/framework/ir/is_test_pass.cc @@ -38,7 +38,7 @@ std::unique_ptr IsTestPass::ApplyImpl( for (const Node* n : graph->Nodes()) { if (n->IsOp()) { auto* op = n->Op(); - if (n->RuntimeHasAttr("is_test")) { + if (op->HasAttr("is_test") || op->HasProtoAttr("is_test")) { op->SetAttr("is_test", true); } else if (std::find(begin(op_list), end(op_list), op->Type()) != end(op_list)) { diff --git a/paddle/fluid/framework/ir/is_test_pass_tester.cc b/paddle/fluid/framework/ir/is_test_pass_tester.cc index d9a68c7f1dd2a0dca5204719c4ce6cd9d68292a2..9696441a21661db89146c448742a992d1f7df022 100644 --- a/paddle/fluid/framework/ir/is_test_pass_tester.cc +++ b/paddle/fluid/framework/ir/is_test_pass_tester.cc @@ -104,9 +104,9 @@ TEST(IsTestPass, basic) { auto* op = node->Op(); auto op_name = boost::get(op->GetAttr("name")); if (op_name == "conv3") { - ASSERT_FALSE(node->RuntimeHasAttr("is_test")); + ASSERT_FALSE(op->HasAttr("is_test")); } else { - ASSERT_TRUE(node->RuntimeHasAttr("is_test")); + ASSERT_TRUE(op->HasAttr("is_test")); EXPECT_TRUE(boost::get(op->GetAttr("is_test"))); } } diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc index 9a9314161b0e8d14a525d253572915ed597c716e..951fcb066ce759ebfec0182e1e9dca887e343170 100644 --- a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc @@ -25,12 +25,15 @@ std::unique_ptr MKLDNNPlacementPass::ApplyImpl( const auto& op_types_list = Get>("mkldnn_enabled_op_types"); for (const Node* n : graph->Nodes()) { - if (n->IsOp() && n->RuntimeHasAttr("use_mkldnn")) { - if (op_types_list.empty()) { - n->Op()->SetAttr("use_mkldnn", true); - } else if (std::find(op_types_list.begin(), op_types_list.end(), - n->Name()) != op_types_list.end()) { - n->Op()->SetAttr("use_mkldnn", true); + if (n->IsOp()) { + auto* op = n->Op(); + if (op->HasAttr("use_mkldnn") || op->HasProtoAttr("use_mkldnn")) { + if (op_types_list.empty()) { + op->SetAttr("use_mkldnn", true); + } else if (std::find(op_types_list.begin(), op_types_list.end(), + n->Name()) != op_types_list.end()) { + op->SetAttr("use_mkldnn", true); + } } } } diff --git a/paddle/fluid/framework/ir/node.cc b/paddle/fluid/framework/ir/node.cc index 7a88cb2b681c1aa5e1b75481b1aba66a125a1d9c..eac67108e2106e986cbe1255a64c956153bc5560 100644 --- a/paddle/fluid/framework/ir/node.cc +++ b/paddle/fluid/framework/ir/node.cc @@ -30,28 +30,6 @@ std::unique_ptr CreateNodeForTest(const std::string &name, return std::unique_ptr(new Node(name, type)); } -bool Node::RuntimeHasAttr(const std::string &name) const { - if (Op()->HasAttr(name)) { - return true; - } else { - auto &op_info = OpInfoMap::Instance(); - auto op_type = Op()->Type(); - if (op_info.Has(op_type)) { - auto op_info_ptr = op_info.Get(op_type); - if (op_info_ptr.HasOpProtoAndChecker()) { - const proto::OpProto &proto = op_info_ptr.Proto(); - for (int i = 0; i != proto.attrs_size(); ++i) { - const proto::OpProto::Attr &attr = proto.attrs(i); - if (attr.name() == name) { - return true; - } - } - } - } - } - return false; -} - } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index 1044a96430f060b750580ea0b225787ba6ebd2a0..d2a393b3f19e9aab79098757dae663d030b0fa2b 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -108,18 +108,6 @@ class Node { Name().find(ir::Node::kControlDepVarName) != std::string::npos; } - // RuntimeHasAttr is different with HasAttr now. - // 1. For Op()->HasAttr(), it judges whether a stored program_desc_ has attr, - // thus, if stored program_desc_ are old which don't have an attr, a new - // library which adds the attr already will fail on this function. - // Details: - // https://github.com/PaddlePaddle/Paddle/pull/14608#issuecomment-442309087 - // 2. For Op()->RuntimeHasAttr, it judges the attr in runtime to avoid above - // problem. - // TODO(luotao): Maybe we should enhance HasAttr later, instead of adding - // RuntimeHasAttr. - bool RuntimeHasAttr(const std::string& name) const; - std::vector inputs; std::vector outputs; diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index e8ecd90502933a049cc8f886212579fc061d44ff..dde642764fa5dfce11edcef51ad1be11be331fbc 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -237,6 +237,23 @@ void OpDesc::SetOutput(const std::string ¶m_name, this->outputs_[param_name] = args; } +bool OpDesc::HasProtoAttr(const std::string &name) const { + auto &op_info = OpInfoMap::Instance(); + if (op_info.Has(desc_.type())) { + auto op_info_ptr = op_info.Get(desc_.type()); + if (op_info_ptr.HasOpProtoAndChecker()) { + const proto::OpProto &proto = op_info_ptr.Proto(); + for (int i = 0; i != proto.attrs_size(); ++i) { + const proto::OpProto::Attr &attr = proto.attrs(i); + if (attr.name() == name) { + return true; + } + } + } + } + return false; +} + proto::AttrType OpDesc::GetAttrType(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 30c8a26c3d2f0068674aa70b4ff875a2f73c1dca..e8debec7f13706b7fc5a4882d237ee2257e53b7e 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -65,6 +65,8 @@ class OpDesc { return attrs_.find(name) != attrs_.end(); } + bool HasProtoAttr(const std::string &name) const; + proto::AttrType GetAttrType(const std::string &name) const; std::vector AttrNames() const; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..373d292b443b7651b785a52a6986b0a0be58ad61 --- /dev/null +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -0,0 +1,3 @@ +cc_library(layer SRCS layer.cc DEPS proto_desc operator) +cc_library(tracer SRCS tracer.cc DEPS proto_desc) +cc_library(engine SRCS engine.cc) diff --git a/paddle/fluid/imperative/engine.cc b/paddle/fluid/imperative/engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..de7ab0e5918281579728ef48d1517be2cd530af7 --- /dev/null +++ b/paddle/fluid/imperative/engine.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/imperative/engine.h" + +#include // NOLINT +#include + +#include "glog/logging.h" + +namespace paddle { +namespace imperative { + +static std::once_flag init_engine; +static Engine* engine; + +class DummyEngine : public Engine { + public: + void Enqueue(Runnable* runnable) override { + queued_runnables_.push_back(runnable); + } + + size_t Size() const override { return queued_runnables_.size(); } + + void Sync() override { + for (Runnable* l : queued_runnables_) { + LOG(INFO) << "running " << reinterpret_cast(l); + } + queued_runnables_.clear(); + } + + private: + std::vector queued_runnables_; +}; + +Engine* GetEngine() { + std::call_once(init_engine, []() { engine = new DummyEngine(); }); + return engine; +} + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/engine.h b/paddle/fluid/imperative/engine.h new file mode 100644 index 0000000000000000000000000000000000000000..a1dfa5bda38d0c419aa4ccbea77b32eb7e0d5b23 --- /dev/null +++ b/paddle/fluid/imperative/engine.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace paddle { +namespace imperative { + +struct Runnable {}; + +class Engine { + public: + virtual ~Engine() {} + + virtual void Enqueue(Runnable* runnable) = 0; + + virtual size_t Size() const = 0; + + virtual void Sync() = 0; +}; + +Engine* GetEngine(); + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc new file mode 100644 index 0000000000000000000000000000000000000000..612503768079472ba233ee3fcd43a47fdba9a0cc --- /dev/null +++ b/paddle/fluid/imperative/layer.cc @@ -0,0 +1,221 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/imperative/layer.h" +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/string/printf.h" + +namespace paddle { +namespace imperative { + +using framework::Variable; + +void AddTo(Variable* src, Variable* dst) { + framework::LoDTensor* dst_tensor = dst->GetMutable(); + framework::LoDTensor* src_tensor = src->GetMutable(); + PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "%lld vs %lld", + dst_tensor->numel(), src_tensor->numel()); + float* dst_data = dst_tensor->mutable_data(platform::CPUPlace()); + const float* src_data = src_tensor->data(); + for (size_t i = 0; i < src_tensor->numel(); ++i) { + dst_data[i] += src_data[i]; + } +} + +class Autograd { + public: + explicit Autograd(framework::Scope* scope) : scope_(scope) {} + + void RunBackward(VarBase* var) { + PADDLE_ENFORCE(var->pre_op_->op_desc_); + // TODO(panyx0718): Only create for vars that "require_grad" + (*var->pre_op_->output_vars_)[var->pre_op_out_idx_]->grads_ = var->grads_; + + std::deque ready; + ready.push_back(var->pre_op_); + + std::map dep_counts = ComputeDepCounts(var->pre_op_); + + while (!ready.empty()) { + OpBase* ready_op = ready.front(); + ready.pop_front(); + std::vector input_grads = ready_op->ApplyGrad(scope_); + + for (size_t i = 0; i < input_grads.size(); ++i) { + if (!input_grads[i]) continue; + OpBase* pre_op = ready_op->pre_ops_->at(i); + if (!pre_op) continue; + + dep_counts[pre_op] -= 1; + PADDLE_ENFORCE(dep_counts[pre_op] >= 0); + bool pre_op_ready = dep_counts[pre_op] == 0; + if (pre_op_ready) { + ready.push_back(pre_op); + } + } + } + } + + private: + std::map ComputeDepCounts(OpBase* op) { + std::map ret; + + std::deque queue; + queue.push_back(op); + std::unordered_set visited; + visited.insert(op); + while (!queue.empty()) { + OpBase* candidate = queue.front(); + queue.pop_front(); + for (OpBase* pre_op : *(candidate->pre_ops_)) { + if (!pre_op) continue; + if (visited.find(pre_op) == visited.end()) { + visited.insert(pre_op); + queue.push_back(pre_op); + } + ret[pre_op] += 1; + } + } + + return ret; + } + + framework::Scope* scope_; +}; + +framework::Variable* CreateVariable(const std::string& name, + const framework::DDim& dim, float val, + framework::Scope* scope, + bool random_name = true) { + std::string varname = name; + if (random_name) { + std::mt19937 rng; + rng.seed(std::random_device()()); + std::uniform_int_distribution dist6( + 1, std::numeric_limits::max()); + int id = dist6(rng); + varname = string::Sprintf("%s@%d", varname, id); + } + + VLOG(3) << "creating var " << varname; + framework::Variable* var = scope->Var(varname); + framework::LoDTensor* tensor = var->GetMutable(); + + float* data = tensor->mutable_data(dim, platform::CPUPlace()); + std::fill(data, data + tensor->numel(), val); + return var; +} + +framework::LoDTensor& VarBase::Grad() { + VLOG(3) << "get var grad " << var_desc_->Name(); + return *grads_->GetMutable(); +} + +void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) { + VLOG(3) << "apply var grad " << var_desc_->Name() << " " + << grad->Get().data()[0]; + if (!grads_) { + grads_ = + CreateVariable(string::Sprintf("%s@IGrad", var_desc_->Name()), + var_->Get().dims(), 0.0, scope); + } + AddTo(grad, grads_); + VLOG(3) << "grad_ after apply var grad " << var_desc_->Name() << " " + << grads_->Get().data()[0]; +} + +std::vector OpBase::ApplyGrad(framework::Scope* scope) { + VLOG(3) << "op grad " << grad_op_desc_->Type(); + + for (const std::string& grad_invar : grad_op_desc_->InputArgumentNames()) { + if (grad_to_var_->find(grad_invar) == grad_to_var_->end()) { + // grad op inputs can be forward inputs, so not in grad_to_var. + continue; + } + VLOG(3) << "op grad in var " << grad_invar; + block_->FindRecursiveOrCreateVar(grad_invar); + framework::Variable* var = scope->Var(grad_invar); + const std::string& invar = grad_to_var_->at(grad_invar); + for (VarBase* varbase : *output_vars_) { + // Use the accumulated grads_ by sharing the input with grads_. + if (varbase->var_desc_->Name() == invar) { + var->GetMutable()->ShareDataWith( + varbase->grads_->Get()); + break; + } + } + } + + for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { + VLOG(3) << "grad outvar " << outvar; + block_->FindRecursiveOrCreateVar(outvar); + framework::Variable* var = scope->Var(outvar); + if (!var->IsInitialized()) { + framework::VarDesc* var_desc = block_->FindVar(outvar); + if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { + var->GetMutable(); + } else { + LOG(ERROR) << "tracer doesn't support yet"; + } + } + } + grad_op_desc_->InferShape(*block_); + grad_op_desc_->InferVarType(block_); + std::unique_ptr opbase = + framework::OpRegistry::CreateOp(*grad_op_desc_); + + opbase->Run(*scope, platform::CPUPlace()); + + // `ret` matches exactly with `input_vars_` of forward op. + std::vector ret; + for (size_t i = 0; i < input_vars_->size(); ++i) { + bool found = false; + for (const std::string& outvar : grad_op_desc_->OutputArgumentNames()) { + Variable* var = scope->FindVar(outvar); + VarBase* origin_var = (*input_vars_)[i]; + std::string orig_var = grad_to_var_->at(outvar); + PADDLE_ENFORCE(origin_var->var_desc_->Name() == orig_var); + VLOG(3) << "apply grad " << outvar << " with origin " << orig_var; + origin_var->ApplyGrad(scope, var); + found = true; + ret.push_back(var); + // TODO(panyx0718): There might be another outvar with the same name. + // In that case, it doesn't matter the first one or the second one is + // used. + break; + } + if (!found) { + ret.push_back(nullptr); + } + } + return ret; +} + +void VarBase::RunBackward(framework::Scope* scope) { + grads_ = CreateVariable(framework::GradVarName(var_desc_->Name()), + var_->Get().dims(), 1.0, scope, + false); + if (!pre_op_) return; + Autograd(scope).RunBackward(this); +} + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h new file mode 100644 index 0000000000000000000000000000000000000000..85a71ca83d21ed2595ddbe684300a46c05fed3af --- /dev/null +++ b/paddle/fluid/imperative/layer.h @@ -0,0 +1,102 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/var_desc.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace imperative { + +class OpBase; + +class VarBase { + public: + VarBase() + : pre_op_(nullptr), + pre_op_out_idx_(-1), + var_desc_(nullptr), + var_(nullptr), + grads_(nullptr) {} + + virtual ~VarBase() {} + + void ApplyGrad(framework::Scope* scope, framework::Variable* grad); + + void RunBackward(framework::Scope* scope); + + framework::LoDTensor& Grad(); + + OpBase* pre_op_; + int pre_op_out_idx_; + + framework::VarDesc* var_desc_; + framework::Variable* var_; + framework::Variable* grads_; +}; + +class OpBase { + public: + OpBase() + : input_vars_(new std::vector()), + output_vars_(new std::vector()), + pre_ops_(new std::vector()), + pre_ops_out_idx_(new std::vector()), + op_desc_(nullptr), + grad_op_desc_(nullptr) {} + + virtual ~OpBase() { + delete input_vars_; + delete output_vars_; + + delete pre_ops_; + delete pre_ops_out_idx_; + + if (grad_op_desc_) delete grad_op_desc_; + if (grad_to_var_) delete grad_to_var_; + } + + std::vector ApplyGrad(framework::Scope* scope); + + std::vector* input_vars_; + std::vector* output_vars_; + std::vector* pre_ops_; + std::vector* pre_ops_out_idx_; + framework::OpDesc* op_desc_; + + framework::OpDesc* grad_op_desc_; + std::unordered_map* grad_to_var_; + framework::BlockDesc* block_; +}; + +class Layer { + public: + virtual ~Layer() {} + + virtual std::vector Forward(const std::vector& inputs) { + std::vector vars; + return vars; + } + + virtual void Backward() { LOG(ERROR) << "To support customize"; } +}; + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc new file mode 100644 index 0000000000000000000000000000000000000000..f64f9e72c4a23528948183b909d65e90783a4463 --- /dev/null +++ b/paddle/fluid/imperative/tracer.cc @@ -0,0 +1,19 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/imperative/tracer.h" + +namespace paddle { +namespace imperative {} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h new file mode 100644 index 0000000000000000000000000000000000000000..433d07c0e5aa0986ab1e9fe349ef865d2851c0c0 --- /dev/null +++ b/paddle/fluid/imperative/tracer.h @@ -0,0 +1,128 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/imperative/engine.h" +#include "paddle/fluid/imperative/layer.h" + +namespace paddle { +namespace imperative { + +void CreateGradOp(const framework::OpDesc& op_desc, + const std::unordered_set& no_grad_set, + const std::vector& grad_sub_block, + framework::OpDesc** grad_op_desc, + std::unordered_map* grad_to_var) { + std::vector> grad_op_descs = + framework::OpInfoMap::Instance() + .Get(op_desc.Type()) + .GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block); + PADDLE_ENFORCE(grad_op_descs.size() == 1, "Only support 1 grad op now."); + // TODO(panyx0718): Leak? + *grad_op_desc = grad_op_descs[0].release(); +} + +class Tracer { + public: + explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { + root_scope_ = new framework::Scope(); + scopes_[root_block_] = root_scope_; + } + + virtual ~Tracer() { delete root_scope_; } + + void Trace(OpBase* op, const std::vector& inputs, + const std::vector& outputs, + framework::BlockDesc* block) { + framework::Scope* scope = GetScope(block); + framework::OpDesc* op_desc = op->op_desc_; + VLOG(3) << "tracer tracing " << op_desc->Type(); + op_desc->InferShape(*block); + op_desc->InferVarType(block); + std::unique_ptr op_base = + framework::OpRegistry::CreateOp(*op_desc); + + *op->input_vars_ = inputs; + for (VarBase* input : inputs) { + const std::string vname = input->var_desc_->Name(); + framework::Variable* var = scope->Var(vname); + input->var_ = var; + if (!var->IsInitialized()) { + framework::VarDesc* var_desc = block->FindVar(vname); + if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { + var->GetMutable(); + } else { + LOG(ERROR) << "tracer doesn't support yet"; + } + } + if (input->pre_op_) { + op->pre_ops_->push_back(input->pre_op_); + op->pre_ops_out_idx_->push_back(input->pre_op_out_idx_); + } else { + op->pre_ops_->push_back(nullptr); + } + } + + *op->output_vars_ = outputs; + for (size_t i = 0; i < outputs.size(); ++i) { + const std::string vname = outputs[i]->var_desc_->Name(); + framework::Variable* var = scope->Var(vname); + if (!var->IsInitialized()) { + framework::VarDesc* var_desc = block->FindVar(vname); + if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) { + var->GetMutable(); + } else { + LOG(ERROR) << "tracer doesn't support yet"; + } + } + outputs[i]->var_ = var; + outputs[i]->pre_op_ = op; + outputs[i]->pre_op_out_idx_ = i; + } + op_base->Run(*scope, platform::CPUPlace()); + framework::OpDesc* grad_op_desc; + auto grad_to_var = new std::unordered_map(); + CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); + op->grad_op_desc_ = grad_op_desc; + op->grad_to_var_ = grad_to_var; + op->block_ = block; + } + + framework::Scope* GetScope(framework::BlockDesc* block) { + if (scopes_.find(block) != scopes_.end()) { + return scopes_.at(block); + } + framework::BlockDesc* parent_block = block->ParentBlock(); + PADDLE_ENFORCE(scopes_.find(parent_block) != scopes_.end()); + framework::Scope* scope = &scopes_[parent_block]->NewScope(); + scopes_[block] = scope; + return scope; + } + + private: + std::map scopes_; + framework::BlockDesc* root_block_; + framework::Scope* root_scope_; +}; + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..9258d7c7e83122149c7cbc42e4a4bdd84903ce67 --- /dev/null +++ b/paddle/fluid/operators/bpr_loss_op.cc @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/bpr_loss_op.h" + +namespace paddle { +namespace operators { + +class BprLossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto label_dims = ctx->GetInputDim("Label"); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, label_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + + auto y_dims = x_dims; + y_dims[rank - 1] = 1; + ctx->SetOutputDim("Y", y_dims); + ctx->ShareLoD("X", /*->*/ "Y"); + } + + protected: + // Explicitly set that the data type of computation kernel of Seq-bpr + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + platform::CPUPlace()); + } +}; + +class BprLossGradientOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) shoudl be not null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Output(X@GRAD) should be not null."); + + auto x_dims = ctx->GetInputDim("X"); + auto label_dims = ctx->GetInputDim("Label"); + auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(dy_dims.size(), rank, + "Input(Y@Grad) and Input(X) should have the same rank."); + PADDLE_ENFORCE_EQ(label_dims.size(), rank, + "Input(Label) and Input(X) should have the same rank."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, + " the last dimension of Input(Label) should be 1."); + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD("X", framework::GradVarName("X")); + } + + protected: + // Explicitly set that the data type of computation kernel of cross_entropy + // is determined by its input "X". + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + platform::CPUPlace()); + } +}; + +class BprLossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a tensor whose last dimension " + "size is equal to the number of classes. This input is a " + "real number."); + AddInput( + "Label", + "(Tensor), the tensor which represents the ground truth. It has the " + "same shape with 'X' except the last dimension. the last dimension " + "size is 1."); + AddOutput("Y", + "(Tensor, default Tensor), a tensor whose shape is same " + "with 'X' except that the last dimension size is 1. It " + "represents the sequence bpr loss."); + AddComment(R"DOC( +Bayesian Personalized Ranking Loss Operator. + +This operator belongs to pairwise ranking loss. Label is the desired item. +The loss at a given point in one session is defined as: +$Y[i] = -\frac{1}{N_{i}} * \sum_{j=0}^{N_{i}}\log(\sigma(X[i, Label[i]]-X[i, j]))$ + +Learn more details by reading paper (https://arxiv.org/abs/1511.06939) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CPUCtx = paddle::platform::CPUDeviceContext; + +REGISTER_OPERATOR(bpr_loss, ops::BprLossOp, ops::BprLossOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(bpr_loss_grad, ops::BprLossGradientOp); +REGISTER_OP_CPU_KERNEL(bpr_loss, ops::BprLossOpKernel, + ops::BprLossOpKernel); +REGISTER_OP_CPU_KERNEL(bpr_loss_grad, + ops::BprLossGradientOpKernel, + ops::BprLossGradientOpKernel); diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h new file mode 100644 index 0000000000000000000000000000000000000000..e223be7af82146e7c69c7c5aab8f08d0fe0d1710 --- /dev/null +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/for_range.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +/*Todo: + *Find a way to adapt TolerableValue, using blas or eigen. + */ +template +struct TolerableValue { + HOSTDEVICE T operator()(const T& x) const { + PADDLE_ASSERT(std::is_floating_point::value); + const T kApproInf = 1e20; + if (x == INFINITY) return kApproInf; + if (x == -INFINITY) return -kApproInf; + return x; + } +}; + +template +class BprLossOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + int rank = x->dims().size(); + + Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); + Tensor labels_2d = framework::ReshapeToMatrix(*label, rank - 1); + Tensor y_2d = framework::ReshapeToMatrix(*y, rank - 1); + + const framework::Tensor* logits = &x_2d; + const framework::Tensor* labels = &labels_2d; + framework::Tensor* out = &y_2d; + + const int step_size = logits->dims()[0]; + const int class_num = logits->dims()[1]; + const T* logits_data = logits->data(); + T* loss_data = out->data(); + + const int64_t* label_data = labels->data(); + for (int i = 0; i < step_size; ++i) { + int lbl_pos = label_data[i]; + PADDLE_ENFORCE_GE(lbl_pos, 0); + PADDLE_ENFORCE_LT(lbl_pos, class_num); + int index_pos = i * class_num + lbl_pos; + T sum = static_cast(0); + for (int j = 0; j < class_num; j++) { + if (j == lbl_pos) continue; + int index_neg = i * class_num + j; + sum += TolerableValue()(-std::log( + 1.0f + TolerableValue()(std::exp(logits_data[index_neg] - + logits_data[index_pos])))); + } + loss_data[i] = -sum / (class_num - 1); + } + } +}; + +template +class BprLossGradientOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* label = ctx.Input("Label"); + auto* dx = ctx.Output(framework::GradVarName("X")); + + const int step_size = x->dims()[0]; + const int num_classes = x->dims()[1]; + T* dx_data = dx->mutable_data(ctx.GetPlace()); + const T* dy_data = dy->data(); + const T* x_data = x->data(); + const int64_t* label_data = label->data(); + + for (size_t sample_id = 0; sample_id < step_size; sample_id++) { + for (size_t x_offset = sample_id * num_classes; + x_offset < (sample_id + 1) * num_classes; x_offset++) { + dx_data[x_offset] = static_cast(0); + } + auto p_index = sample_id * num_classes + label_data[sample_id]; + for (size_t ni = 0; ni < num_classes; ni++) { + if (label_data[sample_id] == ni) continue; + auto n_index = sample_id * num_classes + ni; + auto grad_ = -dy_data[sample_id] / + ((num_classes - 1) * + (1.0f + TolerableValue()(std::exp(x_data[p_index] - + x_data[n_index])))); + dx_data[p_index] += grad_; + dx_data[n_index] -= grad_; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/split_selected_rows_op.h b/paddle/fluid/operators/split_selected_rows_op.h index af64607fafc6544047714e731846a2440be219b8..1fef2b3d378c96d087118d0136885e7e29aa237c 100644 --- a/paddle/fluid/operators/split_selected_rows_op.h +++ b/paddle/fluid/operators/split_selected_rows_op.h @@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel { for (size_t i = 0; i < outs_rows_idx.size(); ++i) { auto rows_idx = outs_rows_idx[i]; outs[i]->set_height(height_sections[i]); + auto dims = x->GetCompleteDims(); + dims[0] = rows_idx.size(); + outs[i]->mutable_value()->mutable_data(dims, x->place()); + outs[i]->mutable_rows()->clear(); if (rows_idx.size() > 0) { - auto dims = x->GetCompleteDims(); - dims[0] = rows_idx.size(); - outs[i]->mutable_value()->mutable_data(dims, x->place()); for (auto idx : rows_idx) { outs[i]->mutable_rows()->push_back(idx - abs_sections[i]); } @@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel { } } } + PADDLE_ENFORCE_EQ(rows_idx.size(), outs[i]->rows().size(), + "rows should has the same size with tensor dim 0"); } } }; diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index d602613fc82223e14f48830a87533880696eb550..b8954cb12628d1f4f333956e0213ddf9c01e592c 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,6 +1,7 @@ -set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler) -set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc) +set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer) +set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc) + if(WITH_PYTHON) if(WITH_AMD_GPU) hip_library(paddle_pybind SHARED diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc new file mode 100644 index 0000000000000000000000000000000000000000..34e9c897d9e95feb185083b7c0a6a824d8dc809c --- /dev/null +++ b/paddle/fluid/pybind/imperative.cc @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/pybind/imperative.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/imperative/tracer.h" + +namespace paddle { +namespace pybind { + +// Bind Methods +void BindTracer(pybind11::module *m) { + pybind11::class_(*m, "Tracer", "") + .def("__init__", + [](imperative::Tracer &self, framework::BlockDesc *root_block) { + new (&self) imperative::Tracer(root_block); + }) + .def("trace", &imperative::Tracer::Trace) + .def("get_scope", &imperative::Tracer::GetScope, + pybind11::return_value_policy::reference); +} + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h new file mode 100644 index 0000000000000000000000000000000000000000..7a9d3a01ea81f11ac85000c3d0153f20e108789a --- /dev/null +++ b/paddle/fluid/pybind/imperative.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include +#include +#include "paddle/fluid/imperative/layer.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace paddle { +namespace pybind { + +class PyLayer : public imperative::Layer { + public: + using imperative::Layer::Layer; // Inherit constructors + + std::vector Forward( + const std::vector& inputs) override { + PYBIND11_OVERLOAD(std::vector, Layer, Forward, + inputs); // NOLINT + } + + void Backward() override { + PYBIND11_OVERLOAD(void, Layer, Backward, ); // NOLINT + } +}; + +class PyOpBase : public imperative::OpBase { + public: + using imperative::OpBase::OpBase; // Inherit constructors +}; + +class PyVarBase : public imperative::VarBase { + public: + using imperative::VarBase::VarBase; // Inherit constructors +}; + +void BindTracer(pybind11::module* m); + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 58ef3da0b2312d2286ec2f92fdd7fa1f82ca4027..74b4f2e937b3d3715b13b03e8d3618c0afafb69c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -34,6 +34,7 @@ limitations under the License. */ #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/version.h" +#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" @@ -45,6 +46,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/async_executor_py.h" #include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/exception.h" +#include "paddle/fluid/pybind/imperative.h" #include "paddle/fluid/pybind/protobuf.h" #include "paddle/fluid/pybind/pybind.h" // NOLINT #include "paddle/fluid/pybind/recordio.h" @@ -100,6 +102,42 @@ PYBIND11_MODULE(core, m) { BindException(&m); + py::class_(m, "VarBase", R"DOC()DOC") + .def(py::init<>()) + .def("_run_backward", + [](imperative::VarBase &self, framework::Scope *scope) { + self.RunBackward(scope); + }) + .def("_grad", &imperative::VarBase::Grad) + .def_property( + "desc", + [](const imperative::VarBase &self) { return self.var_desc_; }, + [](imperative::VarBase &self, framework::VarDesc *var_desc) { + self.var_desc_ = var_desc; + }, + py::return_value_policy::reference); + + py::class_(m, "OpBase", R"DOC()DOC") + .def(py::init<>()) + .def_property( + "desc", [](const imperative::OpBase &self) { return self.op_desc_; }, + [](imperative::OpBase &self, framework::OpDesc *op_desc) { + if (op_desc) { + self.op_desc_ = op_desc; + } + }, + py::return_value_policy::reference); + + py::class_ layer(m, "Layer"); + layer.def(py::init<>()) + .def("forward", + [](imperative::Layer &self, + const std::vector &inputs) { + return self.Forward(inputs); + }) + .def("backward", &imperative::Layer::Backward); + BindTracer(&m); + py::class_(m, "Tensor", py::buffer_protocol()) .def_buffer( [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) @@ -298,6 +336,8 @@ PYBIND11_MODULE(core, m) { .def("get_tensor", [](SelectedRows &self) { return self.mutable_value(); }, py::return_value_policy::reference) + .def("numel", + [](SelectedRows &self) -> int64_t { return self.value().numel(); }) .def("set_height", &SelectedRows::set_height) .def("height", &SelectedRows::height) .def("set_rows", @@ -601,6 +641,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("set_feed_variable", framework::SetFeedVariable); m.def("get_fetch_variable", framework::GetFetchVariable); + m.def("get_variable_tensor", framework::GetVariableTensor); m.def("_is_program_version_supported", IsProgramVersionSupported); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 02a75236f6c7c7a64f2aa110ca7a7e3d92832fe9..f67f40f19f676054e0ab140f12770f01624fac02 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -182,7 +182,7 @@ inline void PyCPUTensorSetFromArray( paddle::platform::CPUPlace place) { std::vector dims; dims.reserve(array.ndim()); - for (size_t i = 0; i < array.ndim(); ++i) { + for (int i = 0; i < array.ndim(); ++i) { dims.push_back(static_cast(array.shape()[i])); } diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 2a53519188e7454b54424cfdd4a713ae37a2326b..52417a1eaf74658b4d87394404eafd5343a3c5fe 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -34,6 +34,7 @@ from . import io from . import evaluator from . import initializer from . import layers +from . import imperative from . import contrib from . import nets from . import optimizer @@ -67,6 +68,7 @@ __all__ = framework.__all__ + executor.__all__ + \ 'initializer', 'layers', 'contrib', + 'imperative', 'transpiler', 'nets', 'optimizer', diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index 42cd3b36420ef5a17a9a7d981978ba8869809936..40a734af311e2037c1816dce97db123ebedd2f4f 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -48,6 +48,7 @@ class WeightedAverage(object): Examples: .. code-block:: python + avg = fluid.average.WeightedAverage() avg.add(value=2.0, weight=1) avg.add(value=4.0, weight=2) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index a40826168dc21e3eec0050c2be7afc1dc74e8e5b..089792059465c60da43d02e8389f4e36900c2292 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -18,6 +18,7 @@ import collections import contextlib import re import six +import sys import numpy as np @@ -49,6 +50,16 @@ GRAD_VAR_SUFFIX = core.kGradVarSuffix() ZERO_VAR_SUFFIX = core.kZeroVarSuffix() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() +_imperative_tracer_ = None + + +def _in_imperative_mode(): + return _imperative_tracer_ is not None + + +def _imperative_tracer(): + return _imperative_tracer_ + class NameScope(object): def __init__(self, name="", parent=None): @@ -345,6 +356,21 @@ class Variable(object): self.op = None self.stop_gradient = stop_gradient self.is_data = is_data + if _in_imperative_mode(): + self._ivar = core.VarBase() + self._ivar.desc = self.desc + + def _numpy(self): + scope = _imperative_tracer().get_scope(self.block.desc) + tensor = core.get_variable_tensor(scope, self.desc.name()) + return np.array(tensor) + + def _backward(self): + scope = _imperative_tracer().get_scope(self.block.desc) + self._ivar._run_backward(scope) + + def _gradient(self): + return np.array(self._ivar._grad()) def __str__(self): return self.to_string(True) @@ -655,6 +681,23 @@ class Operator(object): if self._has_kernel(type): self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) + if _in_imperative_mode(): + self.iop = core.OpBase() + self.iop.desc = self.desc + self.inputs = [] + if inputs is not None: + for inp in inputs.values(): + if isinstance(inp, Variable): + self.inputs.append(inp) + elif isinstance(inp, list) or isinstance(inp, tuple): + self.inputs.extend(inp[:]) + self.outputs = [] + if outputs is not None: + for out in outputs.values(): + if isinstance(out, Variable): + self.outputs.append(out) + elif isinstance(out, list) or isinstance(out, tuple): + self.outputs.extend(out[:]) def _has_kernel(self, op_type): return op_type not in self.OP_WITHOUT_KERNEL_SET @@ -1041,19 +1084,15 @@ class Block(object): raise ValueError("var %s not in this block" % name) return v - def _var_recursive(self, name): + def _find_var_recursive(self, name): """ Get a Variable by name from this block recursively. Args: name(str): the Variable's name. - Raises: - ValueError: this block and this parent block doesn't - have a Variable with the giving name. - Returns: - Variable: the Variable with the giving name. + Variable: the Variable with the giving name. Or None if not found. """ frontier = list() visited = set() @@ -1079,8 +1118,27 @@ class Block(object): frontier.append(prog.block(cur.forward_block_idx)) visited.add(id(cur)) + return None - raise ValueError("Var {0} is not found recursively".format(name)) + def _var_recursive(self, name): + """ + Get a Variable by name from this block recursively. + + Args: + name(str): the Variable's name. + + Raises: + ValueError: this block and this parent block doesn't + have a Variable with the giving name. + + Returns: + Variable: the Variable with the giving name. + """ + var = self._find_var_recursive(name) + if var: + return var + else: + raise ValueError("Var {0} is not found recursively".format(name)) def all_parameters(self): return list(self.iter_parameters()) @@ -1206,6 +1264,9 @@ class Block(object): """ op_desc = self.desc.append_op() op = Operator(block=self, desc=op_desc, *args, **kwargs) + if _in_imperative_mode(): + _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], + [v._ivar for v in op.outputs], self.desc) self.ops.append(op) return op @@ -2210,3 +2271,12 @@ def _get_var(name, program=None): assert isinstance(program, Program) return program.global_block().var(name) + + +@contextlib.contextmanager +def _imperative_guard(tracer): + global _imperative_tracer_ + tmp_trace = _imperative_tracer_ + _imperative_tracer_ = tracer + yield + _imperative_tracer_ = tmp_trace diff --git a/python/paddle/fluid/imperative/__init__.py b/python/paddle/fluid/imperative/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..922308b6b18b335535d41f24d544cde04991b794 --- /dev/null +++ b/python/paddle/fluid/imperative/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +from . import base +from .base import * + +from . import layers +from .layers import * + +__all__ = [] +__all__ += layers.__all__ +__all__ += base.__all__ diff --git a/python/paddle/fluid/imperative/base.py b/python/paddle/fluid/imperative/base.py new file mode 100644 index 0000000000000000000000000000000000000000..15d38ddb56c71ef7de67f79cf52cd26070f470cb --- /dev/null +++ b/python/paddle/fluid/imperative/base.py @@ -0,0 +1,56 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib +import numpy as np + +from paddle.fluid import core +from paddle.fluid import framework + +__all__ = ['enabled', 'guard', 'to_variable'] + + +def enabled(): + return framework._in_imperative_mode() + + +@contextlib.contextmanager +def guard(): + train = framework.Program() + startup = framework.Program() + tracer = core.Tracer(train.current_block().desc) + with framework.program_guard(train, startup): + with framework.unique_name.guard(): + with framework._imperative_guard(tracer): + yield + + +def to_variable(value, block=None): + if isinstance(value, np.ndarray): + if not block: + block = framework.default_main_program().current_block() + py_var = framework.Variable( + block, + type=core.VarDesc.VarType.LOD_TENSOR, + name=None, + shape=value.shape, + dtype=value.dtype) + scope = framework._imperative_tracer().get_scope(block.desc) + var = scope.var(py_var.name) + tensor = var.get_tensor() + tensor.set(value, core.CPUPlace()) + return py_var + elif isinstance(value, framework.Variable): + return value + else: + raise ValueError("Unsupported type %s" % type(value)) diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..1a28f7f4ae35295394b560d79e3dc0cdd5f2beab --- /dev/null +++ b/python/paddle/fluid/imperative/layers.py @@ -0,0 +1,44 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import sys +import numpy as np + +from paddle.fluid import core +from paddle.fluid import framework +from paddle.fluid.imperative import base + +__all__ = ['PyLayer'] + + +class PyLayer(core.Layer): + def __init__(self): + pass + + def __call__(self, inputs): + # TODO(panyx0718): Support declarative mode as well. + assert base.enabled() + if not isinstance(inputs, list) and not isinstance(inputs, tuple): + inputs = [inputs] + + var_inputs = [] + for x in inputs: + py_var = base.to_variable(x) + var_inputs.append(py_var) + outputs = self.forward(var_inputs) + return outputs + + def forward(self, inputs): + return [] diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index dc317de9abbd06f4021e64b87ea88ba6af8809c9..74b4a977db6b69d4d256e1f7b36eb53524269bb1 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -17,10 +17,13 @@ from __future__ import print_function import copy import itertools import six +import sys +import numpy as np from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating from . import unique_name from paddle.fluid.initializer import Constant, Xavier +from paddle.fluid.imperative import base from .param_attr import ParamAttr, WeightNormParamAttr from . import core from six.moves import zip @@ -46,23 +49,21 @@ class LayerHelper(object): def startup_program(self): return default_startup_program() + def to_variable(self, x): + return base.to_variable(x, self.main_program.current_block()) + def append_op(self, *args, **kwargs): return self.main_program.current_block().append_op(*args, **kwargs) def multiple_input(self, input_param_name='input'): inputs = self.kwargs.get(input_param_name, []) - type_error = TypeError( - "Input of {0} layer should be Variable or sequence of Variable". - format(self.layer_type)) - if isinstance(inputs, Variable): - inputs = [inputs] - elif not isinstance(inputs, list) and not isinstance(inputs, tuple): - raise type_error + ret = [] + if isinstance(inputs, list) or isinstance(inputs, tuple): + for inp in inputs: + ret.append(self.to_variable(inp)) else: - for each in inputs: - if not isinstance(each, Variable): - raise type_error - return inputs + ret.append(self.to_variable(inputs)) + return ret def input(self, input_param_name='input'): inputs = self.multiple_input(input_param_name) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 05138bf94598f649ef7fdbaa94896b6ba0884416..b7e39685691809d04ecddc21d2d04a7a85e478d5 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -717,8 +717,9 @@ class While(object): out_vars = [] for inner_out_name in inner_outputs: - if inner_out_name in parent_block.vars: - out_vars.append(parent_block.var(inner_out_name)) + inner_var = parent_block._find_var_recursive(inner_out_name) + if inner_var: + out_vars.append(inner_var) step_scope = parent_block.create_var( type=core.VarDesc.VarType.STEP_SCOPES) @@ -1264,10 +1265,11 @@ class ConditionalBlock(object): if each_name not in input_set ] - out_list = [ - parent_block.var(var_name) for var_name in parent_block.vars - if var_name in intermediate - ] + out_list = [] + for inner_out_name in intermediate: + inner_var = parent_block._find_var_recursive(inner_out_name) + if inner_var: + out_list.append(inner_var) step_scope = parent_block.create_var( type=core.VarDesc.VarType.STEP_SCOPES) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4833212d311e4792dd14709cf3e5843e297e810d..e25eaaa9fda6add9d8e81d9e6bdfb711cee3648e 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -41,6 +41,7 @@ __all__ = [ 'crf_decoding', 'cos_sim', 'cross_entropy', + 'bpr_loss', 'square_error_cost', 'chunk_eval', 'sequence_conv', @@ -1348,6 +1349,44 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): return out +def bpr_loss(input, label, name=None): + """ + Bayesian Personalized Ranking Loss Operator. + + This operator belongs to pairwise ranking loss. Label is the desired item. + The loss at a given point in one session is defined as: + $Y[i] = -\frac{1}{N_{i}-1} * \sum_{0\le j(https://arxiv.org/abs/1511.06939) + + Args: + input (Variable|list): a 2-D tensor with shape [N x D], where N is the + batch size and D is the number of classes. + This input is not probability but logits. + label (Variable|list): the ground truth which is a 2-D tensor. `label` + is a tensor with shape [N x 1]. + name (str|None): A name for this layer(optional). If set None, the + layer will be named automatically. Default: None. + Returns: + A 2-D tensor with shape [N x 1], the bpr loss. + + Examples: + .. code-block:: python + + cost = fluid.layers.bpr_loss(input=predict, label=label) + """ + + helper = LayerHelper('bpr_loss', **locals()) + out = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='bpr_loss', + inputs={'X': [input], + 'Label': [label]}, + outputs={'Y': [out]}) + return out + + def square_error_cost(input, label): """ **Square error cost layer** @@ -6623,7 +6662,8 @@ def relu(x, name=None): helper = LayerHelper('relu', **locals()) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype) - helper.append_op(type="relu", inputs={"X": x}, outputs={"Out": out}) + helper.append_op( + type="relu", inputs={"X": helper.input('x')}, outputs={"Out": out}) return out diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py new file mode 100644 index 0000000000000000000000000000000000000000..c8dc5fbd237d17f2d4e45b06e5806fff5cbf58fe --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py @@ -0,0 +1,52 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest, randomize_probability + + +class TestBprLossOp1(OpTest): + """Test BprLoss with discrete one-hot labels. + """ + + def setUp(self): + self.op_type = "bpr_loss" + batch_size = 40 + class_num = 5 + X = randomize_probability(batch_size, class_num, dtype='float64') + label = np.random.randint(0, class_num, (batch_size, 1), dtype="int64") + bpr_loss_result = [] + for i in range(batch_size): + sum = 0.0 + for j in range(class_num): + if j == label[i][0]: + continue + sum += (-np.log(1.0 + np.exp(X[i][j] - X[i][label[i][0]]))) + bpr_loss_result.append(-sum / (class_num - 1)) + bpr_loss = np.asmatrix([[x] for x in bpr_loss_result], dtype="float64") + self.inputs = {"X": X, "Label": label} + self.outputs = {"Y": bpr_loss} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Y", numeric_grad_delta=0.001) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py new file mode 100644 index 0000000000000000000000000000000000000000..b5b6305155d1ef3dcf6ce590c221664754c5bdc8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -0,0 +1,52 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import sys +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid import core + + +class MyLayer(fluid.imperative.PyLayer): + def __init__(self): + super(MyLayer, self).__init__() + + def forward(self, inputs): + x = fluid.layers.relu(inputs[0]) + self._x_for_debug = x + return [fluid.layers.elementwise_mul(x, x)] + + +class TestImperative(unittest.TestCase): + def test_layer(self): + with fluid.imperative.guard(): + cl = core.Layer() + cl.forward([]) + l = fluid.imperative.PyLayer() + l.forward([]) + + def test_layer_in_out(self): + with fluid.imperative.guard(): + l = MyLayer() + x = l(np.array([1.0, 2.0, -1.0], dtype=np.float32))[0] + self.assertIsNotNone(x) + sys.stderr.write("%s output: %s\n" % (x, x._numpy())) + x._backward() + sys.stderr.write("grad %s\n" % l._x_for_debug._gradient()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index be51fb06a37a376f6f410336184c95981ded35dc..10e8bb5a86691d8654c5ae48794e49f30f47500d 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -846,6 +846,15 @@ class TestBook(unittest.TestCase): out = layers.cross_entropy(x, label, False, 4) self.assertIsNotNone(out) + def test_bpr_loss(self): + program = Program() + with program_guard(program): + x = layers.data(name="x", shape=[30, 10], dtype="float32") + label = layers.data(name="label", shape=[30, 1], dtype="int32") + out = layers.bpr_loss(x, label) + self.assertIsNotNone(out) + print(str(program)) + def test_expand(self): program = Program() with program_guard(program): diff --git a/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py b/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py index 275e5c49d5c298a95b012582a74f8073b800991e..fa16f082880eb97f54abe8bf75e26321f72b3bd3 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py @@ -22,6 +22,15 @@ from paddle.fluid.framework import Program, program_guard from paddle.fluid.transpiler import memory_optimize +def _get_vars(prog): + assert (isinstance(prog, Program)) + all_vars = set() + for op in prog.global_block().ops: + all_vars.update(op.input_arg_names) + all_vars.update(op.output_arg_names) + return all_vars + + class TestControlFlowGraph(unittest.TestCase): def setUp(self): program = Program() @@ -37,11 +46,11 @@ class TestControlFlowGraph(unittest.TestCase): self.program = program def test_control_flow_graph(self): - print("before optimization") - print(str(self.program)) - result_program = memory_optimize(self.program) - print("after optimization") - print(str(result_program)) + result_program = self.program.clone() + memory_optimize(self.program) + old_vars = _get_vars(self.program) + new_vars = _get_vars(result_program) + self.assertTrue(old_vars != new_vars) class TestMemoryTranspiler2(unittest.TestCase): @@ -58,14 +67,22 @@ class TestMemoryTranspiler2(unittest.TestCase): avg_cost = layers.mean(cost) opt = optimizer.SGD(learning_rate=0.001) opt.minimize(avg_cost) + self.skip_set = set([cost.name, fc.name]) self.program = program def test_inplace_ops(self): - print("before optimization") - print(str(self.program)) - result_program = memory_optimize(self.program) - print("after optimization") - print(str(result_program)) + result_program = self.program.clone() + memory_optimize(self.program) + old_vars = _get_vars(self.program) + new_vars = _get_vars(result_program) + self.assertTrue(old_vars != new_vars) + + def test_skip_opt(self): + result_program = self.program.clone() + memory_optimize(self.program, skip_opt_set=self.skip_set) + old_vars = _get_vars(self.program) + new_vars = _get_vars(result_program) + self.assertTrue(old_vars != new_vars) class TestMemoryTranspiler3(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py b/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py index 50204b8a77c187aa695da83860960566448d290f..f8847e1570dc47d432777faa15f4004f1a7111a6 100644 --- a/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py @@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase): # expected output selected rows expected_out0_rows = [0, 4] expected_out1_rows = [0, 2] + expected_out2_rows = [] expected_out4_rows = [0] op = Operator( @@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase): self.assertEqual(outs[0].rows(), expected_out0_rows) self.assertEqual(outs[1].rows(), expected_out1_rows) + self.assertEqual(outs[2].rows(), expected_out2_rows) self.assertEqual(outs[4].rows(), expected_out4_rows) self.assertEqual(outs[0].height(), height_sections[0]) @@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase): self.assertAlmostEqual(4.0, np.array(outs[1].get_tensor())[1, 1]) self.assertAlmostEqual(8.0, np.array(outs[4].get_tensor())[0, 1]) + self.assertEqual(outs[2].numel(), 0) + self.assertEqual(outs[3].numel(), 0) + def check_grad_with_place(self, place): scope = core.Scope() height = 10 diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index c9f1be934773cc28f026f2b867b9e3a4f7aa8472..95aafec05361a8b66b849268c7a738bb2ee5da86 100755 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -14,6 +14,7 @@ from __future__ import print_function +import six from collections import defaultdict, MutableSet from .. import core from ... import compat as cpt @@ -470,8 +471,21 @@ def memory_optimize(input_program, Returns: None """ + + def to_name_str(var): + if isinstance(var, Variable): + return var.desc.name() + elif isinstance(var, str): + return var + elif isinstance(var, six.string_types): + return str(var) + else: + raise TypeError(str(var) + " should be Variable or str") + if level != 0 and level != 1: raise ValueError("only support opt_level 0 or 1.") + if skip_opt_set is not None and not isinstance(skip_opt_set, set): + raise ValueError("only support skip_opt_set as set.") global PRINT_LOG PRINT_LOG = print_log if skip_grads: @@ -486,6 +500,8 @@ def memory_optimize(input_program, skip_opt_set = grad_set else: skip_opt_set.update(grad_set) + if skip_opt_set is not None: + skip_opt_set = set(map(to_name_str, skip_opt_set)) cfgs = _get_cfgs(input_program) for cfg in cfgs: cfg.memory_optimize(skip_opt_set=skip_opt_set, level=level) diff --git a/python/setup.py.in b/python/setup.py.in index 5aee26b63832889272cde09c553b4615efb8872a..0eb69cdb5c7d140527dba7a648728750bfb404f7 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -101,6 +101,7 @@ packages=['paddle', 'paddle.dataset', 'paddle.reader', 'paddle.fluid', + 'paddle.fluid.imperative', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', 'paddle.fluid.layers', diff --git a/tools/print_signatures.py b/tools/print_signatures.py index 5c5266f904f5dcf74dd1d4ee7e98081f74a79907..7e61dde0a446cf5bfe656105ffd2472f03576f05 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -27,6 +27,8 @@ import pydoc member_dict = collections.OrderedDict() +experimental_namespace = {"paddle.fluid.imperative"} + def visit_member(parent_name, member): cur_name = ".".join([parent_name, member.__name__]) @@ -51,6 +53,8 @@ def visit_member(parent_name, member): def visit_all_module(mod): + if (mod.__name__ in experimental_namespace): + return for member_name in ( name for name in (mod.__all__ if hasattr(mod, "__all__") else dir(mod))