提交 0c554a59 编写于 作者: S sneaxiy

merge develop

test=develop
...@@ -131,11 +131,13 @@ cc_test(version_test SRCS version_test.cc DEPS version) ...@@ -131,11 +131,13 @@ cc_test(version_test SRCS version_test.cc DEPS version)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
if(NOT WIN32) if(WITH_NGRAPH)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph) if(NOT WIN32)
cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
shape_inference data_transform lod_tensor profiler) cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
endif(NOT WIN32) shape_inference data_transform lod_tensor profiler ngraph)
endif(NOT WIN32)
endif(WITH_NGRAPH)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
...@@ -171,14 +173,20 @@ if(WITH_DISTRIBUTE) ...@@ -171,14 +173,20 @@ if(WITH_DISTRIBUTE)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else() else()
if(NOT WIN32) if(WITH_NGRAPH)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph_operator variable_helper garbage_collector) if(NOT WIN32)
else(NOT WIN32) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph ngraph_operator variable_helper)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper garbage_collector) else(NOT WIN32)
endif(NOT WIN32) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(NOT WIN32)
else(WITH_NGRAPH)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(WITH_NGRAPH)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op) cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif() endif()
target_link_libraries(executor garbage_collector)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph build_strategy graph build_strategy
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/transfer_scope_cache.h"
...@@ -27,6 +26,10 @@ limitations under the License. */ ...@@ -27,6 +26,10 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_NGRAPH
#include "paddle/fluid/framework/ngraph_operator.h"
#endif
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
DEFINE_bool(use_ngraph, false, "Use NGRAPH to run"); DEFINE_bool(use_ngraph, false, "Use NGRAPH to run");
...@@ -131,11 +134,11 @@ static void DeleteUnusedTensors( ...@@ -131,11 +134,11 @@ static void DeleteUnusedTensors(
static void EnableFusedOp(ExecutorPrepareContext* ctx) { static void EnableFusedOp(ExecutorPrepareContext* ctx) {
#ifdef PADDLE_WITH_NGRAPH #ifdef PADDLE_WITH_NGRAPH
VLOG(3) << "use_ngraph=True"; VLOG(3) << "use_ngraph=True";
auto intervals = FusedOperator::FusedOpIntervals(&ctx->ops_); auto intervals = NgraphOperator::NgraphOpIntervals(&ctx->ops_);
for (auto& interval : intervals) { for (auto& interval : intervals) {
auto* fused_op = new FusedOperator(ctx->prog_, ctx->block_id_, auto* ng_op = new NgraphOperator(ctx->prog_, ctx->block_id_, interval.at(0),
interval.at(0), interval.at(1)); interval.at(1));
*interval[0] = std::unique_ptr<OperatorBase>(fused_op); *interval[0] = std::unique_ptr<OperatorBase>(ng_op);
} }
for (auto it = intervals.rbegin(); it != intervals.rend(); ++it) { for (auto it = intervals.rbegin(); it != intervals.rend(); ++it) {
ctx->ops_.erase(it->at(0) + 1, it->at(1)); ctx->ops_.erase(it->at(0) + 1, it->at(1));
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <vector> #include <vector>
...@@ -27,14 +26,15 @@ namespace paddle { ...@@ -27,14 +26,15 @@ namespace paddle {
namespace framework { namespace framework {
static std::shared_ptr<ngraph::Node> GetNode( static std::shared_ptr<ngraph::Node> GetNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
const VariableNameMap& var_map, const VariableNameMap& var_map,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
auto& var_names = var_map.at(prm); auto& var_names = var_map.at(name);
PADDLE_ENFORCE_EQ(var_names.size(), 1, PADDLE_ENFORCE_EQ(var_names.size(), 1,
"op %s prm %s expects one associated var", op->Type(), prm); "op %s name %s expects one associated var", op->Type(),
name);
if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) { if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) {
return (*ngb_node_map)[var_names[0]]; return (*ngb_node_map)[var_names[0]];
} else { } else {
...@@ -43,42 +43,42 @@ static std::shared_ptr<ngraph::Node> GetNode( ...@@ -43,42 +43,42 @@ static std::shared_ptr<ngraph::Node> GetNode(
} }
static std::shared_ptr<ngraph::Node> GetInputNode( static std::shared_ptr<ngraph::Node> GetInputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
return GetNode(op, prm, op->Inputs(), ngb_node_map); return GetNode(op, name, op->Inputs(), ngb_node_map);
} }
static std::shared_ptr<ngraph::Node> GetOutputNode( static std::shared_ptr<ngraph::Node> GetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
return GetNode(op, prm, op->Outputs(), ngb_node_map); return GetNode(op, name, op->Outputs(), ngb_node_map);
} }
static void SetOutputNode( static void SetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr<ngraph::Node> node, std::shared_ptr<ngraph::Node> node,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
auto& var_names = op->Outputs().at(prm); auto& var_names = op->Outputs().at(name);
if (var_names.size() == 1) { if (var_names.size() == 1) {
(*ngb_node_map)[var_names[0]] = node; (*ngb_node_map)[var_names[0]] = node;
} else if (var_names.size() == 0) { } else if (var_names.size() == 0) {
(*ngb_node_map)[""] = node; (*ngb_node_map)[""] = node;
} else { } else {
PADDLE_THROW("prm %s has more than 1 var_names.", prm); PADDLE_THROW("name %s has more than 1 var_names.", name);
} }
} }
static bool HasOutput(const std::shared_ptr<OperatorBase>& op, static bool HasOutput(const std::shared_ptr<OperatorBase>& op,
const std::string prm) { const std::string name) {
auto& outputs = op->Outputs(); auto& outputs = op->Outputs();
if (outputs.find(prm) == outputs.end()) return false; if (outputs.find(name) == outputs.end()) return false;
return outputs.at(prm).size() > 0; return outputs.at(name).size() > 0;
} }
template <typename T> template <typename T>
...@@ -118,4 +118,3 @@ void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) { ...@@ -118,4 +118,3 @@ void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif
...@@ -14,8 +14,6 @@ limitations under the License. */ ...@@ -14,8 +14,6 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <string> #include <string>
...@@ -53,4 +51,3 @@ class NgraphBridge { ...@@ -53,4 +51,3 @@ class NgraphBridge {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
...@@ -58,16 +57,16 @@ typedef enum { /* nGraph support state on ops */ ...@@ -58,16 +57,16 @@ typedef enum { /* nGraph support state on ops */
} op_state; } op_state;
// perform graph build through bridge and execute computation // perform graph build through bridge and execute computation
class NgraphOperator { class NgraphEngine {
public: public:
explicit NgraphOperator(const Scope& scope, const platform::Place& place, explicit NgraphEngine(const Scope& scope, const platform::Place& place,
const std::vector<std::shared_ptr<OperatorBase>>& ops, const std::vector<std::shared_ptr<OperatorBase>>& ops,
const std::unordered_map< const std::unordered_map<
std::string, ngraph::element::Type>& var_type_map, std::string, ngraph::element::Type>& var_type_map,
const std::unordered_set<std::string>& persist, const std::unordered_set<std::string>& persist,
const std::unordered_set<std::string>& fetches, const std::unordered_set<std::string>& fetches,
const std::unordered_set<std::string>& post_op_inputs, const std::unordered_set<std::string>& post_op_inputs,
op_state ng_op_state) op_state ng_op_state)
: scope_(scope), : scope_(scope),
place_(place), place_(place),
fused_ops_(ops), fused_ops_(ops),
...@@ -132,7 +131,7 @@ class NgraphOperator { ...@@ -132,7 +131,7 @@ class NgraphOperator {
}; };
std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
FusedOperator::FusedOpIntervals( NgraphOperator::NgraphOpIntervals(
std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops) { std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops) {
std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
intervals; intervals;
...@@ -185,7 +184,7 @@ FusedOperator::FusedOpIntervals( ...@@ -185,7 +184,7 @@ FusedOperator::FusedOpIntervals(
return intervals; return intervals;
} }
FusedOperator::FusedOperator( NgraphOperator::NgraphOperator(
const ProgramDesc& prog, size_t block_id, const ProgramDesc& prog, size_t block_id,
std::vector<std::unique_ptr<OperatorBase>>::iterator start, std::vector<std::unique_ptr<OperatorBase>>::iterator start,
std::vector<std::unique_ptr<OperatorBase>>::iterator end, std::vector<std::unique_ptr<OperatorBase>>::iterator end,
...@@ -215,7 +214,7 @@ FusedOperator::FusedOperator( ...@@ -215,7 +214,7 @@ FusedOperator::FusedOperator(
Process(); Process();
} }
void FusedOperator::Process() { void NgraphOperator::Process() {
auto& bdesc = pdesc_.Block(block_); auto& bdesc = pdesc_.Block(block_);
for (auto& var : bdesc.AllVars()) { for (auto& var : bdesc.AllVars()) {
if (!(var->GetType() == proto::VarType::SELECTED_ROWS || if (!(var->GetType() == proto::VarType::SELECTED_ROWS ||
...@@ -251,8 +250,8 @@ void FusedOperator::Process() { ...@@ -251,8 +250,8 @@ void FusedOperator::Process() {
} }
} }
void FusedOperator::RunImpl(const Scope& scope, void NgraphOperator::RunImpl(const Scope& scope,
const platform::Place& place) const { const platform::Place& place) const {
op_state ng_op_state = PARTIAL_TEST; op_state ng_op_state = PARTIAL_TEST;
auto& bdesc = pdesc_.Block(block_); auto& bdesc = pdesc_.Block(block_);
for (auto* op : bdesc.AllOps()) { for (auto* op : bdesc.AllOps()) {
...@@ -266,19 +265,19 @@ void FusedOperator::RunImpl(const Scope& scope, ...@@ -266,19 +265,19 @@ void FusedOperator::RunImpl(const Scope& scope,
ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN; ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN;
} }
NgraphOperator ngraph_op(scope, place, fused_ops_, var_type_map_, NgraphEngine ngraph_engine(scope, place, fused_ops_, var_type_map_,
persistables_, fetches_, post_op_inputs_, persistables_, fetches_, post_op_inputs_,
ng_op_state); ng_op_state);
ngraph_op.Run(scope, place); ngraph_engine.Run(scope, place);
} }
std::unordered_map<std::string, std::shared_ptr<ngraph::Function>> std::unordered_map<std::string, std::shared_ptr<ngraph::Function>>
NgraphOperator::func_cache_ = {}; NgraphEngine::func_cache_ = {};
std::shared_ptr<ngraph::runtime::Backend> NgraphOperator::backend_ = std::shared_ptr<ngraph::runtime::Backend> NgraphEngine::backend_ =
ngraph::runtime::Backend::create("CPU"); ngraph::runtime::Backend::create("CPU");
void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) { void NgraphEngine::GetNgInputShape(std::shared_ptr<OperatorBase> op) {
op->RuntimeInferShape(scope_, place_); op->RuntimeInferShape(scope_, place_);
for (auto& var_name_item : op->Inputs()) { for (auto& var_name_item : op->Inputs()) {
for (auto& var_name : var_name_item.second) { for (auto& var_name : var_name_item.second) {
...@@ -301,7 +300,7 @@ void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) { ...@@ -301,7 +300,7 @@ void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) {
} }
} }
void NgraphOperator::BuildNgNodes() { void NgraphEngine::BuildNgNodes() {
for (auto& var_name : var_out_) { for (auto& var_name : var_out_) {
if (var_node_map_->find(var_name) == var_node_map_->end()) { if (var_node_map_->find(var_name) == var_node_map_->end()) {
auto* var = scope_.FindVar(var_name); auto* var = scope_.FindVar(var_name);
...@@ -323,7 +322,7 @@ void NgraphOperator::BuildNgNodes() { ...@@ -323,7 +322,7 @@ void NgraphOperator::BuildNgNodes() {
} }
} }
void NgraphOperator::BuildNgIO() { void NgraphEngine::BuildNgIO() {
std::unordered_set<std::string> inputs; std::unordered_set<std::string> inputs;
std::unordered_set<std::string> outputs; std::unordered_set<std::string> outputs;
...@@ -395,7 +394,7 @@ void NgraphOperator::BuildNgIO() { ...@@ -395,7 +394,7 @@ void NgraphOperator::BuildNgIO() {
} }
} }
void NgraphOperator::BuildNgFunction() { void NgraphEngine::BuildNgFunction() {
BuildNgNodes(); BuildNgNodes();
ngraph_function_ = nullptr; ngraph_function_ = nullptr;
ngraph::NodeVector func_outputs; ngraph::NodeVector func_outputs;
...@@ -416,7 +415,7 @@ void NgraphOperator::BuildNgFunction() { ...@@ -416,7 +415,7 @@ void NgraphOperator::BuildNgFunction() {
std::make_shared<ngraph::Function>(func_outputs, func_inputs); std::make_shared<ngraph::Function>(func_outputs, func_inputs);
} }
std::shared_ptr<std::string> NgraphOperator::GetCacheKey() { std::shared_ptr<std::string> NgraphEngine::GetCacheKey() {
auto cache_key = std::make_shared<std::string>(""); auto cache_key = std::make_shared<std::string>("");
*cache_key += std::to_string(fused_ops_.size()); *cache_key += std::to_string(fused_ops_.size());
for (auto& op : fused_ops_) { for (auto& op : fused_ops_) {
...@@ -444,7 +443,7 @@ std::shared_ptr<std::string> NgraphOperator::GetCacheKey() { ...@@ -444,7 +443,7 @@ std::shared_ptr<std::string> NgraphOperator::GetCacheKey() {
return cache_key; return cache_key;
} }
void NgraphOperator::GetNgFunction() { void NgraphEngine::GetNgFunction() {
bool cache_on = true; bool cache_on = true;
if (cache_on) { if (cache_on) {
std::string cache_key_val = *GetCacheKey(); std::string cache_key_val = *GetCacheKey();
...@@ -459,8 +458,7 @@ void NgraphOperator::GetNgFunction() { ...@@ -459,8 +458,7 @@ void NgraphOperator::GetNgFunction() {
} }
} }
void NgraphOperator::Run(const Scope& scope, void NgraphEngine::Run(const Scope& scope, const platform::Place& place) const {
const platform::Place& place) const {
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_in; std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_in;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_out; std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_out;
...@@ -545,7 +543,6 @@ void NgraphOperator::Run(const Scope& scope, ...@@ -545,7 +543,6 @@ void NgraphOperator::Run(const Scope& scope,
} }
backend_->call(ngraph_function_, t_out, t_in); backend_->call(ngraph_function_, t_out, t_in);
} // NgraphOperator::RunImpl } // NgraphEngine::RunImpl
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif
...@@ -14,8 +14,6 @@ limitations under the License. */ ...@@ -14,8 +14,6 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -34,14 +32,14 @@ limitations under the License. */ ...@@ -34,14 +32,14 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class FusedOperator : public OperatorBase { class NgraphOperator : public OperatorBase {
public: public:
static std::vector< static std::vector<
std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
FusedOpIntervals( NgraphOpIntervals(
std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops); std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops);
explicit FusedOperator( explicit NgraphOperator(
const ProgramDesc& prog, size_t block_id, const ProgramDesc& prog, size_t block_id,
std::vector<std::unique_ptr<OperatorBase>>::iterator start, std::vector<std::unique_ptr<OperatorBase>>::iterator start,
std::vector<std::unique_ptr<OperatorBase>>::iterator end, std::vector<std::unique_ptr<OperatorBase>>::iterator end,
...@@ -64,4 +62,3 @@ class FusedOperator : public OperatorBase { ...@@ -64,4 +62,3 @@ class FusedOperator : public OperatorBase {
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif
...@@ -44,9 +44,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { ...@@ -44,9 +44,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else if (argument->model_program_path_valid() && } else if (argument->model_program_path_valid() &&
argument->model_params_path_valid()) { argument->model_params_path_valid()) {
auto program = auto program = LoadModel(
LoadModel(argument->model_program_path(), argument->model_params_path(), argument->model_program_path(), argument->model_params_path(),
argument->scope_ptr(), place, argument->model_from_memory()); argument->scope_ptr(), place,
argument->model_from_memory_valid() && argument->model_from_memory());
argument->SetMainProgram(program.release()); argument->SetMainProgram(program.release());
} else { } else {
PADDLE_THROW( PADDLE_THROW(
......
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor) set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor benchmark)
if(WITH_GPU AND TENSORRT_FOUND) if(WITH_GPU AND TENSORRT_FOUND)
set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor) set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor)
......
...@@ -30,8 +30,10 @@ ...@@ -30,8 +30,10 @@
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h" #include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/utils/benchmark.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DEFINE_string(model_name, "", "model name");
DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_model, "", "model path");
DEFINE_string(infer_data, "", "data file"); DEFINE_string(infer_data, "", "data file");
DEFINE_int32(batch_size, 1, "batch size."); DEFINE_int32(batch_size, 1, "batch size.");
...@@ -40,6 +42,8 @@ DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); ...@@ -40,6 +42,8 @@ DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool(use_analysis, true, DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode."); "Running the inference program in analysis mode.");
DEFINE_bool(record_benchmark, false,
"Record benchmark after profiling the model");
DECLARE_bool(profile); DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads); DECLARE_int32(paddle_num_threads);
...@@ -192,8 +196,16 @@ void TestOneThreadPrediction( ...@@ -192,8 +196,16 @@ void TestOneThreadPrediction(
predictor->Run(inputs[j], outputs, batch_size); predictor->Run(inputs[j], outputs, batch_size);
} }
} }
PrintTime(batch_size, num_times, 1, 0, run_timer.toc() / num_times,
inputs.size()); double latency = run_timer.toc() / num_times;
PrintTime(batch_size, num_times, 1, 0, latency, inputs.size());
if (FLAGS_record_benchmark) {
Benchmark benchmark;
benchmark.SetName(FLAGS_model_name);
benchmark.SetBatchSize(batch_size);
benchmark.SetLatency(latency);
benchmark.PersistToFile("benchmark_record.txt");
}
} }
} }
......
...@@ -135,6 +135,9 @@ TEST(TensorRT_resnext50, compare) { ...@@ -135,6 +135,9 @@ TEST(TensorRT_resnext50, compare) {
TEST(TensorRT_resnext50, profile) { TEST(TensorRT_resnext50, profile) {
std::string model_dir = FLAGS_infer_model + "/resnext50"; std::string model_dir = FLAGS_infer_model + "/resnext50";
// Set FLAGS_record_benchmark to true to record benchmark to file.
// FLAGS_record_benchmark=true;
FLAGS_model_name = "resnext50";
profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt); profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
} }
......
...@@ -30,7 +30,7 @@ std::string Benchmark::SerializeToString() const { ...@@ -30,7 +30,7 @@ std::string Benchmark::SerializeToString() const {
ss << '\n'; ss << '\n';
ss << name_ << "\t"; ss << name_ << "\t";
ss << batch_size_ << "\t"; ss << batch_size_ << "\t\t";
ss << num_threads_ << "\t"; ss << num_threads_ << "\t";
ss << latency_ << "\t"; ss << latency_ << "\t";
ss << 1000.0 / latency_; ss << 1000.0 / latency_;
......
...@@ -26,9 +26,6 @@ DEFINE_string(model_dir, "", "model directory"); ...@@ -26,9 +26,6 @@ DEFINE_string(model_dir, "", "model directory");
DEFINE_string(model_program_path, "", "model program path"); DEFINE_string(model_program_path, "", "model program path");
DEFINE_string(model_params_path, "", "model params path"); DEFINE_string(model_params_path, "", "model params path");
USE_PASS(graph_viz_pass);
USE_PASS(graph_to_program_pass);
using paddle::inference::analysis::Argument; using paddle::inference::analysis::Argument;
namespace paddle { namespace paddle {
...@@ -40,7 +37,6 @@ void Visualizer::SetArgument(Argument *argument) { argument_ = argument; } ...@@ -40,7 +37,6 @@ void Visualizer::SetArgument(Argument *argument) { argument_ = argument; }
bool Visualizer::Run() { bool Visualizer::Run() {
paddle::framework::InitDevices(false); paddle::framework::InitDevices(false);
paddle::inference::analysis::Analyzer().Run(argument_); paddle::inference::analysis::Analyzer().Run(argument_);
return true; return true;
} }
...@@ -77,7 +73,7 @@ int main(int argc, char *argv[]) { ...@@ -77,7 +73,7 @@ int main(int argc, char *argv[]) {
// Only 1 pass, default filename is 0_ir_origin.dot // Only 1 pass, default filename is 0_ir_origin.dot
// For more details, looking for paddle::inference::analysis::IRPassManager // For more details, looking for paddle::inference::analysis::IRPassManager
argument.SetIrAnalysisPasses({"graph_viz_pass"}); argument.SetIrAnalysisPasses({"infer_clean_graph_pass", "graph_viz_pass"});
std::unique_ptr<paddle::framework::Scope> scope{ std::unique_ptr<paddle::framework::Scope> scope{
new paddle::framework::Scope()}; new paddle::framework::Scope()};
...@@ -90,3 +86,7 @@ int main(int argc, char *argv[]) { ...@@ -90,3 +86,7 @@ int main(int argc, char *argv[]) {
return 0; return 0;
} }
USE_PASS(infer_clean_graph_pass);
USE_PASS(graph_viz_pass);
USE_PASS(graph_to_program_pass);
...@@ -301,23 +301,22 @@ template <typename T> ...@@ -301,23 +301,22 @@ template <typename T>
struct GeluFunctor : public BaseActivationFunctor<T> { struct GeluFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out> template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const { void operator()(Device d, X x, Out out) const {
auto temp = auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
((x * static_cast<T>(M_SQRT1_2)).erf()).template cast<T>().eval();
out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp); out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
} }
}; };
template <typename T> template <typename T>
struct GeluGradFunctor : BaseActivationFunctor<T> { struct GeluGradFunctor : BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("gelu"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto temp = (static_cast<T>(0.5 * M_2_SQRTPI * M_SQRT1_2) * x * auto first = static_cast<T>(0.5) *
((-static_cast<T>(0.5) * x.square()).exp())) (static_cast<T>(1) + ((x * static_cast<T>(M_SQRT1_2)).erf()));
.template cast<T>()
.eval(); auto second = static_cast<T>(0.5 * M_2_SQRTPI * M_SQRT1_2) * x *
dx.device(d) = dout * (out / x + temp); (-static_cast<T>(0.5) * x.square()).exp();
dx.device(d) = dout * (first + second);
} }
}; };
......
...@@ -158,7 +158,7 @@ ChannelQueuePtr BRPCClient::GetChannel(const std::string& ep) { ...@@ -158,7 +158,7 @@ ChannelQueuePtr BRPCClient::GetChannel(const std::string& ep) {
for (int i = 0; i < FLAGS_brpc_channel_num; ++i) { for (int i = 0; i < FLAGS_brpc_channel_num; ++i) {
std::shared_ptr<ChannelContext> c(new ChannelContext()); std::shared_ptr<ChannelContext> c(new ChannelContext());
if (c->channel.Init(ep.c_str(), &options) != 0) { if (c->channel.Init(ep.c_str(), &options) != 0) {
LOG(ERROR) << "Fail to initialize channel"; LOG(FATAL) << "Fail to initialize channel";
return nullptr; return nullptr;
} }
......
...@@ -390,8 +390,7 @@ void GRPCClient::Proceed() { ...@@ -390,8 +390,7 @@ void GRPCClient::Proceed() {
VLOG(3) << c->GetVarHandlePtr()->String() << " process"; VLOG(3) << c->GetVarHandlePtr()->String() << " process";
c->Process(); c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) { } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details? LOG(FATAL) << c->GetVarHandlePtr()->String()
LOG(ERROR) << c->GetVarHandlePtr()->String()
<< " meets grpc error, error_code:" << c->status_.error_code() << " meets grpc error, error_code:" << c->status_.error_code()
<< " error_message:" << c->status_.error_message() << " error_message:" << c->status_.error_message()
<< " error_details:" << c->status_.error_details(); << " error_details:" << c->status_.error_details();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册