diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 52946c7f11f90490b1af1347f20db236a8fe24af..94b3af9159fe661ed298def9cf36e47328a2bdba 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -126,8 +126,9 @@ cc_library(version SRCS version.cc) cc_test(version_test SRCS version_test.cc DEPS version) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version) -cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto) + if(NOT WIN32) +cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph) cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog shape_inference data_transform lod_tensor profiler) endif(NOT WIN32) diff --git a/paddle/fluid/framework/ngraph_bridge.cc b/paddle/fluid/framework/ngraph_bridge.cc index 8177436d0bd90c3bcf8f91d5c55b66be188b19f9..45ef0211ad2e84709884973be223703d8e929d41 100644 --- a/paddle/fluid/framework/ngraph_bridge.cc +++ b/paddle/fluid/framework/ngraph_bridge.cc @@ -15,23 +15,105 @@ limitations under the License. */ #ifdef PADDLE_WITH_NGRAPH #include #include +#include #include "paddle/fluid/framework/ngraph_bridge.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/enforce.h" #include "ngraph/ngraph.hpp" namespace paddle { namespace framework { +static std::shared_ptr GetNode( + const std::shared_ptr& op, const std::string prm, + const VariableNameMap& var_map, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto& var_names = var_map.at(prm); + PADDLE_ENFORCE_EQ(var_names.size(), 1, + "op %s prm %s expects one associated var", op->Type(), prm); + if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) { + return (*ngb_node_map)[var_names[0]]; + } else { + return nullptr; + } +} + +static std::shared_ptr GetInputNode( + const std::shared_ptr& op, const std::string prm, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + return GetNode(op, prm, op->Inputs(), ngb_node_map); +} + +static std::shared_ptr GetOutputNode( + const std::shared_ptr& op, const std::string prm, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + return GetNode(op, prm, op->Outputs(), ngb_node_map); +} + +static void SetOutputNode( + const std::shared_ptr& op, const std::string prm, + std::shared_ptr node, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto& var_names = op->Outputs().at(prm); + if (var_names.size() == 1) { + (*ngb_node_map)[var_names[0]] = node; + } else if (var_names.size() == 0) { + (*ngb_node_map)[""] = node; + } else { + PADDLE_THROW("prm %s has more than 1 var_names.", prm); + } +} + +static bool HasOutput(const std::shared_ptr& op, + const std::string prm) { + auto& outputs = op->Outputs(); + if (outputs.find(prm) == outputs.end()) return false; + return outputs.at(prm).size() > 0; +} + +template +static void BuildBinaryNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto x = GetInputNode(op, "X", ngb_node_map); + auto y = GetInputNode(op, "Y", ngb_node_map); + auto out = std::make_shared(x, y); + SetOutputNode(op, "Out", out, ngb_node_map); +} + +template +static void BuildUnaryNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto input = GetInputNode(op, "X", ngb_node_map); + auto out = std::make_shared(input); + SetOutputNode(op, "Out", out, ngb_node_map); +} + std::map&, std::shared_ptr>>)>> - NgraphBridge::NG_NODE_MAP = {}; + NgraphBridge::NG_NODE_MAP = {{"relu", BuildUnaryNode}, + {"tanh", BuildUnaryNode}}; -void NgraphBridge::build_graph(const std::shared_ptr& op) { +void NgraphBridge::BuildNgGraph(const std::shared_ptr& op) { auto& op_type = op->Type(); - NG_NODE_MAP[op_type](op, ngb_node_map); + NG_NODE_MAP[op_type](op, ngb_node_map_); } } // namespace framework diff --git a/paddle/fluid/framework/ngraph_bridge.h b/paddle/fluid/framework/ngraph_bridge.h index 55bf0d21f3471013b1fb780e852d813313345f03..3cf62b6daab96537435bdb61f8dd3b9c8fc80222 100644 --- a/paddle/fluid/framework/ngraph_bridge.h +++ b/paddle/fluid/framework/ngraph_bridge.h @@ -20,16 +20,14 @@ limitations under the License. */ #include #include #include -#include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/enforce.h" - -#include "ngraph/ngraph.hpp" +#include "ngraph/node.hpp" namespace paddle { namespace framework { +class OperatorBase; + class NgraphBridge { public: static std::map< @@ -43,14 +41,14 @@ class NgraphBridge { std::shared_ptr< std::unordered_map>> var_node_map) - : ngb_node_map(var_node_map) {} + : ngb_node_map_(var_node_map) {} - void build_graph(const std::shared_ptr& op); + void BuildNgGraph(const std::shared_ptr& op); private: std::shared_ptr< std::unordered_map>> - ngb_node_map; + ngb_node_map_; }; } // namespace framework diff --git a/paddle/fluid/framework/ngraph_operator.cc b/paddle/fluid/framework/ngraph_operator.cc index d967b2780c21713a2f9a73a3402964103f44269e..e9ff0513557725d068c3d3082f15c1f773e6624a 100644 --- a/paddle/fluid/framework/ngraph_operator.cc +++ b/paddle/fluid/framework/ngraph_operator.cc @@ -19,14 +19,29 @@ limitations under the License. */ #include #include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/ngraph_bridge.h" #include "paddle/fluid/framework/ngraph_operator.h" -#include "paddle/fluid/framework/shape_inference.h" +#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_type.h" +#include "ngraph/ngraph.hpp" + namespace paddle { namespace framework { +static ngraph::Shape Ddim2Shape(const DDim& dims) { + ngraph::Shape sp; + for (int i = 0; i < dims.size(); ++i) { + int k = dims[i]; + k = k == 0 ? 1 : k; + sp.push_back(k); + } + return sp; +} + static std::map pd2ng_type_map = { {proto::VarType::FP32, ngraph::element::f32}, {proto::VarType::FP64, ngraph::element::f64}, @@ -59,13 +74,23 @@ class NgraphOperator { persistables_(persist), fetches_(fetches), post_op_inputs_(post_op_inputs), - ng_op_state_(ng_op_state) {} + ng_op_state_(ng_op_state) { + var_in_node_map_ = std::make_shared< + std::unordered_map>>(); + + var_node_map_ = std::make_shared< + std::unordered_map>>(); + + BuildNgIO(); + + GetNgFunction(); + } void Run(const Scope& scope, const platform::Place& place) const; private: static std::unordered_map> - func_cache; + func_cache_; const Scope& scope_; const platform::Place& place_; std::vector> fused_ops_; @@ -74,6 +99,35 @@ class NgraphOperator { std::unordered_set fetches_; std::unordered_set post_op_inputs_; op_state ng_op_state_; + + static std::shared_ptr backend_; + + std::shared_ptr ngraph_function_; + // var_name of inputs + std::vector var_in_; + // var_name of outputs from fetch in order + std::vector var_out_; + + std::shared_ptr< + std::unordered_map>> + var_in_node_map_; + + // map each var name with a ngraph node + std::shared_ptr< + std::unordered_map>> + var_node_map_; + + std::shared_ptr GetCacheKey(); + + void GetNgInputShape(std::shared_ptr op); + + void BuildNgNode(); + + void BuildNgIO(); + + void BuildNgFunction(); + + void GetNgFunction(); }; std::vector>::iterator>> @@ -86,7 +140,7 @@ FusedOperator::FusedOpIntervals( } size_t size = ops->size(); size_t left = 0; - while (left < size && ops.at(left)->Type() != kFeedOpType) { + while (left < size && ops->at(left)->Type() != kFeedOpType) { ++left; } if (left == size) { @@ -116,7 +170,7 @@ FusedOperator::FusedOpIntervals( size_t start = pivot, end = start; while (pivot < right && (paddle::framework::NgraphBridge::NG_NODE_MAP.find( - ops.at(pivot)->Type()) != + ops->at(pivot)->Type()) != paddle::framework::NgraphBridge::NG_NODE_MAP.end())) { ++pivot; ++end; @@ -136,7 +190,9 @@ FusedOperator::FusedOperator( std::vector>::iterator end, const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs), pdesc(prog), block(block_id) { + : OperatorBase(type, inputs, outputs, attrs), + pdesc_(prog), + block_(block_id) { for (std::vector>::iterator it = start; it != end; ++it) { fused_ops_.push_back(std::move(*it)); @@ -152,7 +208,7 @@ FusedOperator::FusedOperator( } if ((*(start - 1))->Type() == kFeedOpType && (*end)->Type() == kFetchOpType) { - is_complete = true; + is_full_ = true; } Process(); @@ -205,7 +261,7 @@ void FusedOperator::RunImpl(const Scope& scope, } } - if (is_full) { + if (is_full_) { ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN; } @@ -215,6 +271,297 @@ void FusedOperator::RunImpl(const Scope& scope, ngraph_op.Run(scope, place); } +std::unordered_map> + NgraphOperator::func_cache_ = {}; + +std::shared_ptr NgraphOperator::backend_ = + ngraph::runtime::Backend::create("CPU"); + +void NgraphOperator::GetNgInputShape(std::shared_ptr op) { + RuntimeInferShapeContext infer_shape_ctx(*op, scope_); + std::shared_ptr op_k = + std::dynamic_pointer_cast(op); + op_k->InferShape(&infer_shape_ctx); + + for (auto& var_name_item : op->Inputs()) { + std::vector vshape; + auto& var_prm_name = var_name_item.first; + auto var_name_size = var_name_item.second.size(); + if (var_name_size == 1) { + auto dim = infer_shape_ctx.GetInputDim(var_prm_name); + vshape.push_back(Ddim2Shape(dim)); + } else if (var_name_item.second.size() > 1) { + auto vdim = infer_shape_ctx.GetInputsDim(var_prm_name); + PADDLE_ENFORCE_EQ(vdim.size(), var_name_item.second.size(), + "Need dim info for each var"); + for (auto& dim : vdim) { + vshape.push_back(Ddim2Shape(dim)); + } + } else { + // 0 size : conv2d Bias + } + + for (size_t i = 0; i < var_name_item.second.size(); ++i) { + auto var_name = var_name_item.second.at(i); + if (std::find(var_in_.begin(), var_in_.end(), var_name) != + var_in_.end()) { + if (var_node_map_->find(var_name) == var_node_map_->end()) { + auto ng_type = var_type_map_.at(var_name); + auto prm = std::make_shared( + ng_type, vshape.at(i), true); + (*var_node_map_)[var_name] = prm; + (*var_in_node_map_)[var_name] = prm; + } + } + } + } +} + +void NgraphOperator::BuildNgNode() { + for (auto& var_name : var_out_) { + if (var_node_map_->find(var_name) == var_node_map_->end()) { + auto* var = scope_.FindVar(var_name); + if (var && VarIsTensor(*var)) { + auto* tensor_pd = GetLoDTensorOrSelectedRowsValueFromVar(*var); + auto& ddim = tensor_pd->dims(); + auto ng_shape = Ddim2Shape(ddim); + auto ng_type = var_type_map_.at(var_name); + auto prm = + std::make_shared(ng_type, ng_shape, true); + (*var_node_map_)[var_name] = prm; + } + } + } + + paddle::framework::NgraphBridge ngb(var_node_map_); + for (auto& op : fused_ops_) { + ngb.BuildNgGraph(op); + } +} + +void NgraphOperator::BuildNgIO() { + std::unordered_set inputs; + std::unordered_set outputs; + + for (auto& op : fused_ops_) { + for (auto& var_name_item : op->Inputs()) { + for (auto& var_name : var_name_item.second) { + inputs.insert(var_name); + const bool is_output = outputs.find(var_name) != outputs.end(); + if (!is_output && + std::find(var_in_.begin(), var_in_.end(), var_name) == + var_in_.end()) { + // fill var_in here to keep lhs and rhs order + var_in_.push_back(var_name); + } + } + } + + if (op->Type() != "fill_constant") { + GetNgInputShape(op); + } + + for (auto& var_name_item : op->Outputs()) { + PADDLE_ENFORCE_LE(var_name_item.second.size(), 1, + "op %s has more than 1 output - Not handling yet", + op->Type()); + for (auto& var_name : var_name_item.second) { + outputs.insert(var_name); + } + } + } + + // var_out.clear(); + for (auto& op : fused_ops_) { + for (auto& var_name_item : op->Outputs()) { + PADDLE_ENFORCE_LE(var_name_item.second.size(), 1, + "op %s has more than 1 output - Not handling yet", + op->Type()); + for (auto& var_name : var_name_item.second) { + switch (ng_op_state_) { + case PARTIAL_TEST: + if (post_op_inputs_.find(var_name) != post_op_inputs_.end() || + fetches_.find(var_name) != fetches_.end()) { + var_out_.push_back(var_name); + } + break; + case FULL_TEST: + if (fetches_.find(var_name) != fetches_.end()) { + var_out_.push_back(var_name); + } + break; + case PARTIAL_TRAIN: + if (fetches_.find(var_name) != fetches_.end() || + post_op_inputs_.find(var_name) != post_op_inputs_.end() || + persistables_.find(var_name) != persistables_.end()) { + var_out_.push_back(var_name); + } + break; + case FULL_TRAIN: + if (fetches_.find(var_name) != fetches_.end() || + persistables_.find(var_name) != persistables_.end()) { + var_out_.push_back(var_name); + } + break; + default: + var_out_.push_back(var_name); + } + } + } + } +} + +void NgraphOperator::BuildNgFunction() { + BuildNgNode(); + ngraph_function_ = nullptr; + ngraph::NodeVector func_outputs; + ngraph::op::ParameterVector func_inputs; + + for (auto& vo : var_out_) { + func_outputs.push_back(var_node_map_->at(vo)); + } + + for (auto& vi : var_in_) { + std::shared_ptr prm = + std::dynamic_pointer_cast( + var_in_node_map_->at(vi)); + func_inputs.push_back(prm); + } + + ngraph_function_ = + std::make_shared(func_outputs, func_inputs); +} + +std::shared_ptr NgraphOperator::GetCacheKey() { + auto cache_key = std::make_shared(""); + *cache_key += std::to_string(fused_ops_.size()); + for (auto& op : fused_ops_) { + *cache_key += op->Type(); + } + for (auto& var_name : var_in_) { + auto shape = var_node_map_->at(var_name)->get_shape(); + *cache_key += var_name; + *cache_key += var_type_map_.at(var_name).c_type_string(); + for (size_t i = 0; i < shape.size(); ++i) { + *cache_key += std::to_string(shape.at(i)); + } + } + + for (auto& var_name : var_out_) { + auto* var = scope_.FindVar(var_name); + if (var && VarIsTensor(*var)) { + auto* tensor_pd = GetLoDTensorOrSelectedRowsValueFromVar(*var); + auto& ddim = tensor_pd->dims(); + for (int i = 0; i < ddim.size(); ++i) { + *cache_key += std::to_string(ddim[i]); + } + } + } + return cache_key; +} + +void NgraphOperator::GetNgFunction() { + bool cache_on = true; + if (cache_on) { + std::string cache_key_val = *GetCacheKey(); + if (func_cache_.find(cache_key_val) != func_cache_.end()) { + ngraph_function_ = func_cache_.at(cache_key_val); + } else { + BuildNgFunction(); + func_cache_[cache_key_val] = ngraph_function_; + } + } else { + BuildNgFunction(); + } +} + +void NgraphOperator::Run(const Scope& scope, + const platform::Place& place) const { + std::vector> t_in; + std::vector> t_out; + + for (size_t i = 0; i < var_in_.size(); ++i) { + auto vi = var_in_.at(i); + auto sp = var_node_map_->at(vi)->get_shape(); + std::shared_ptr ti; + auto* var = scope.FindVar(vi); + if (var && VarIsTensor(*var)) { + auto* tensor_pd = GetLoDTensorOrSelectedRowsValueFromVar(*var); + PADDLE_ENFORCE(sp == Ddim2Shape(tensor_pd->dims()), + "Ensure ngraph tensor layout align with paddle tensor"); + if (tensor_pd->type().hash_code() == + typeid(float).hash_code()) { // NOLINT + const float* arr = tensor_pd->data(); + ti = backend_->create_tensor(ngraph::element::f32, sp, + const_cast(arr)); + } else if (tensor_pd->type().hash_code() == + typeid(int).hash_code()) { // NOLINT + const int* arr = tensor_pd->data(); + ti = backend_->create_tensor(ngraph::element::i32, sp, + const_cast(arr)); + } else if (tensor_pd->type().hash_code() == typeid(int64_t).hash_code()) { + const int64_t* arr = tensor_pd->data(); + ti = backend_->create_tensor(ngraph::element::i64, sp, + const_cast(arr)); + } else if (tensor_pd->type().hash_code() == + typeid(double).hash_code()) { // NOLINT + const double* arr = tensor_pd->data(); + ti = backend_->create_tensor(ngraph::element::f64, sp, + const_cast(arr)); + } else if (tensor_pd->type().hash_code() == + typeid(bool).hash_code()) { // NOLINT + const bool* arr = tensor_pd->data(); + ti = backend_->create_tensor(ngraph::element::boolean, sp, + const_cast(arr)); + } else { + PADDLE_THROW("Data type not handling for var %s", vi); + } + } else { + PADDLE_THROW("Cannot find var or tensor with var name %s", vi); + } + bool is_test = (ng_op_state_ == PARTIAL_TEST || ng_op_state_ == FULL_TEST) + ? true + : false; + bool is_persistable = + (persistables_.find(vi) != persistables_.end()) ? true : false; + if (is_test && is_persistable) { + ti->set_stale(false); + } + t_in.push_back(ti); + } + + for (size_t i = 0; i < var_out_.size(); ++i) { + auto var_name = var_out_[i]; + auto* var = scope.FindVar(var_name); + std::shared_ptr to; + if (var && VarIsTensor(*var)) { + auto* tensor_pd = GetMutableLoDTensorOrSelectedRowsValueFromVar(var); + auto dd = tensor_pd->dims(); + ngraph::Shape sp = Ddim2Shape(dd); + auto ng_type = var_type_map_.at(var_name); + if (ng_type == ngraph::element::f32) { + auto pd_arr = tensor_pd->mutable_data(place); + to = backend_->create_tensor(ngraph::element::f32, sp, pd_arr); + } else if (ng_type == ngraph::element::i64) { + auto pd_arr = tensor_pd->mutable_data(place); + to = backend_->create_tensor(ngraph::element::i64, sp, pd_arr); + } else if (ng_type == ngraph::element::f64) { + auto pd_arr = tensor_pd->mutable_data(place); + to = backend_->create_tensor(ngraph::element::f64, sp, pd_arr); + } else if (ng_type == ngraph::element::boolean) { + auto pd_arr = tensor_pd->mutable_data(place); + to = backend_->create_tensor(ngraph::element::boolean, sp, pd_arr); + } else { + PADDLE_THROW("Data type not handled in for var %s", var_name); + } + t_out.push_back(to); + } else { + PADDLE_THROW("Cannot find var or tensor with var name %s", var_name); + } + } + + backend_->call(ngraph_function_, t_out, t_in); +} // NgraphOperator::RunImpl } // namespace framework } // namespace paddle #endif diff --git a/paddle/fluid/framework/ngraph_operator.h b/paddle/fluid/framework/ngraph_operator.h index 0f655cef1dde624bcf4944b5c096279097e1c8ae..3ca023e11111c5b447b2cabbfb8bb29877297f65 100644 --- a/paddle/fluid/framework/ngraph_operator.h +++ b/paddle/fluid/framework/ngraph_operator.h @@ -17,24 +17,19 @@ limitations under the License. */ #ifdef PADDLE_WITH_NGRAPH #include -#include #include #include #include #include "paddle/fluid/framework/attribute.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/ngraph_bridge.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/variant.h" -#include "ngraph/ngraph.hpp" +#include "ngraph/type/element_type.hpp" namespace paddle { namespace framework {