提交 4b9fa423 编写于 作者: N nhzlx

Cherry-pick from 16813 : change singleton to graph RegistBlock

test=release/1.4
上级 e14ab180
...@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
auto base_op_desc = *mul->Op()->Proto(); auto base_op_desc = mul->Op();
// Create an FC Node. // Create an FC Node.
OpDesc desc(base_op_desc, nullptr); // OpDesc desc(base_op_desc, nullptr);
OpDesc desc;
std::string fc_x_in = subgraph.at(x)->Name(); std::string fc_x_in = subgraph.at(x)->Name();
std::string fc_Y_in = w->Name(); std::string fc_Y_in = w->Name();
std::string fc_bias_in = fc_bias->Name(); std::string fc_bias_in = fc_bias->Name();
std::string fc_out_out = fc_out->Name(); std::string fc_out_out = fc_out->Name();
desc.SetInput("Input", std::vector<std::string>({fc_x_in})); desc.SetInput("Input", std::vector<std::string>({fc_x_in}));
desc.SetInput("W", std::vector<std::string>({fc_Y_in})); desc.SetInput("W", std::vector<std::string>({fc_Y_in}));
desc.SetInput("Bias", std::vector<std::string>({fc_bias_in})); desc.SetInput("Bias", std::vector<std::string>({fc_bias_in}));
desc.SetOutput("Out", std::vector<std::string>({fc_out_out})); desc.SetOutput("Out", std::vector<std::string>({fc_out_out}));
desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims")); desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims"));
// For anakin subgraph int8
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
// fake_dequant"
// can be detected by the quant_dequant_fuse_pass. This pass will add
// "input_scale",
// "weight_scale" which are extracted from fake_quant op and fake_dequant op
// to mul op,
// and then delete the fake_quant op and fake_dequant op in the graph. If
// the mul op
// has the scale info, we should add those to the fused fc.
if (base_op_desc->HasAttr("enable_int8")) {
desc.SetAttr("enable_int8", base_op_desc->GetAttr("enable_int8"));
desc.SetAttr("input_scale", base_op_desc->GetAttr("input_scale"));
desc.SetAttr("weight_scale", base_op_desc->GetAttr("weight_scale"));
}
desc.SetType("fc"); desc.SetType("fc");
auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied. auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied.
GraphSafeRemoveNodes(graph, {mul, elementwise_add, mul_out}); GraphSafeRemoveNodes(graph, {mul, elementwise_add, mul_out});
......
...@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()( ...@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
// Copy the Scale to CPUPlace and get the pointer. // Copy the Scale to CPUPlace and get the pointer.
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front()); auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v); PADDLE_ENFORCE_NOT_NULL(scale_v);
auto weight1 = pblock_from_var<TargetT>(*scale_v); auto weight1 = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
// Copy the Bias to CPUPlace and get the pointer. // Copy the Bias to CPUPlace and get the pointer.
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front()); auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v); PADDLE_ENFORCE_NOT_NULL(bias_v);
auto weight2 = pblock_from_var<TargetT>(*bias_v); auto weight2 = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
......
...@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()( ...@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
auto *mean_v = scope.FindVar(op_desc.Input("Mean").front()); auto *mean_v = scope.FindVar(op_desc.Input("Mean").front());
PADDLE_ENFORCE_NOT_NULL(mean_v); PADDLE_ENFORCE_NOT_NULL(mean_v);
auto weight1 = pblock_from_var<TargetT>(*mean_v); auto weight1 = pblock_from_var<TargetT, PrecisionT>(*mean_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
auto *variance_v = scope.FindVar(op_desc.Input("Variance").front()); auto *variance_v = scope.FindVar(op_desc.Input("Variance").front());
PADDLE_ENFORCE_NOT_NULL(variance_v); PADDLE_ENFORCE_NOT_NULL(variance_v);
auto weight2 = pblock_from_var<TargetT>(*variance_v); auto weight2 =
pblock_from_var<TargetT, PrecisionT>(*variance_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
auto *weight3 = pblock_from_vector<TargetT>(std::vector<float>({1})); auto *weight3 = pblock_from_vector<TargetT, PrecisionT>(
std::vector<float>({1}), this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front()); auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v); PADDLE_ENFORCE_NOT_NULL(scale_v);
auto scale = pblock_from_var<TargetT>(*scale_v); auto scale = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale); this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front()); auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v); PADDLE_ENFORCE_NOT_NULL(bias_v);
auto bias = pblock_from_var<TargetT>(*bias_v); auto bias = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias); this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
} }
......
...@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()( ...@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
const float int8_range = 127.; const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale")); float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale")); float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global() PBlock<TargetT> *weight1 =
.template new_block<::anakin::AK_INT8>(anakin_shape); new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>(); float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8; std::vector<char> weight_int8;
int weight_num = weight_tensor->numel(); int weight_num = weight_tensor->numel();
...@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()( ...@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
{weight_scale / int8_range}, false); {weight_scale / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range); this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else { } else {
auto *weight1 = pblock_from_tensor<TargetT>(*weight_tensor, weight_shape); auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
} }
} }
......
...@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()( ...@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
const float int8_range = 127.; const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale")); float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale")); float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global() PBlock<TargetT> *weight1 =
.template new_block<::anakin::AK_INT8>(anakin_shape); new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>(); float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8; std::vector<char> weight_int8;
int weight_num = weight_tensor->numel(); int weight_num = weight_tensor->numel();
...@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()( ...@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
} else { } else {
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace()); auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize2int(weight_tensor->dims()); auto weight_shape = framework::vectorize2int(weight_tensor->dims());
auto *weight1 = pblock_from_tensor<TargetT>(*weight_tensor, weight_shape); auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto weight2 = pblock_from_var<TargetT>(*b_v); auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
} }
......
...@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()( ...@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob")); auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
auto factor = 1 - dropout_prob; auto factor = 1 - dropout_prob;
auto *weight1 = pblock_from_vector<TargetT>(std::vector<float>({factor})); auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(
std::vector<float>({factor}), this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->AddOpAttr(op_name, "axis", 0); this->engine_->AddOpAttr(op_name, "axis", 0);
......
...@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()( ...@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
const float int8_range = 127.; const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale")); float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale")); float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global() PBlock<TargetT> *weight1 =
.template new_block<::anakin::AK_INT8>(anakin_shape); new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
std::vector<char> weight_int8; std::vector<char> weight_int8;
for (int i = 0; i < weight_num; i++) { for (int i = 0; i < weight_num; i++) {
bool is_valid_int8 = bool is_valid_int8 =
...@@ -98,7 +99,8 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()( ...@@ -98,7 +99,8 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
{weight_scale / int8_range}, false); {weight_scale / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range); this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else { } else {
auto *weight1 = pblock_from_vector<TargetT>(trans_weight_data); auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(trans_weight_data,
this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
} }
...@@ -106,7 +108,7 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()( ...@@ -106,7 +108,7 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
if (with_bias) { if (with_bias) {
auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v); PADDLE_ENFORCE_NOT_NULL(b_v);
auto weight2 = pblock_from_var<TargetT>(*b_v); auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2); this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
} }
} }
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "framework/core/net/net.h" #include "framework/core/net/net.h"
#include "framework/core/types.h" #include "framework/core/types.h"
...@@ -29,8 +30,8 @@ ...@@ -29,8 +30,8 @@
using anakin::saber::Shape; using anakin::saber::Shape;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::AK_INT8;
using anakin::PBlock; using anakin::PBlock;
using anakin::graph::GraphGlobalMem;
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -38,31 +39,34 @@ namespace anakin { ...@@ -38,31 +39,34 @@ namespace anakin {
std::unique_ptr<framework::LoDTensor> tensor_from_var( std::unique_ptr<framework::LoDTensor> tensor_from_var(
const framework::Variable& var, const platform::Place& place); const framework::Variable& var, const platform::Place& place);
template <typename T>
PBlock<T>* pblock_from_tensor(const framework::LoDTensor& tensor, template <typename TargetT, ::anakin::Precision PrecisionT>
std::vector<int> shape) { PBlock<TargetT>* pblock_from_tensor(const framework::LoDTensor& tensor,
while (shape.size() < 4) { std::vector<int> shape_vec,
shape.insert(shape.begin(), 1); AnakinEngine<TargetT, PrecisionT>* engine) {
while (shape_vec.size() < 4) {
shape_vec.insert(shape_vec.begin(), 1);
} }
Shape anakin_shape(shape); Shape shape(shape_vec);
auto* weight = PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
GraphGlobalMem<T>::Global().template new_block<AK_FLOAT>(anakin_shape); engine->RegistBlock(weight);
float* cpu_data = static_cast<float*>(weight->h_tensor().mutable_data()); float* cpu_data = static_cast<float*>(weight->h_tensor().mutable_data());
std::copy_n(tensor.data<float>(), tensor.numel(), cpu_data); std::copy_n(tensor.data<float>(), tensor.numel(), cpu_data);
weight->d_tensor().set_shape(anakin_shape); weight->d_tensor().set_shape(shape);
weight->d_tensor().copy_from(weight->h_tensor()); weight->d_tensor().copy_from(weight->h_tensor());
return weight; return weight;
} }
template <typename T> template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<T>* pblock_from_vector(const std::vector<float>& vec, PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
std::vector<int> shape_vec) { std::vector<int> shape_vec,
AnakinEngine<TargetT, PrecisionT>* engine) {
while (shape_vec.size() < 4) { while (shape_vec.size() < 4) {
shape_vec.insert(shape_vec.begin(), 1); shape_vec.insert(shape_vec.begin(), 1);
} }
Shape shape(shape_vec); Shape shape(shape_vec);
auto* weight = PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
GraphGlobalMem<T>::Global().template new_block<AK_FLOAT>(shape); engine->RegistBlock(weight);
auto* weight_data = static_cast<float*>(weight->h_tensor().mutable_data()); auto* weight_data = static_cast<float*>(weight->h_tensor().mutable_data());
std::copy(std::begin(vec), std::end(vec), weight_data); std::copy(std::begin(vec), std::end(vec), weight_data);
weight->d_tensor().set_shape(shape); weight->d_tensor().set_shape(shape);
...@@ -70,17 +74,20 @@ PBlock<T>* pblock_from_vector(const std::vector<float>& vec, ...@@ -70,17 +74,20 @@ PBlock<T>* pblock_from_vector(const std::vector<float>& vec,
return weight; return weight;
} }
template <typename T> template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<T>* pblock_from_vector(const std::vector<float>& vec) { PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
AnakinEngine<TargetT, PrecisionT>* engine) {
int size = vec.size(); int size = vec.size();
return pblock_from_vector<T>(vec, std::vector<int>({1, 1, 1, size})); return pblock_from_vector<TargetT, PrecisionT>(
vec, std::vector<int>({1, 1, 1, size}), engine);
} }
template <typename T> template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<T>* pblock_from_var(const framework::Variable& var) { PBlock<TargetT>* pblock_from_var(const framework::Variable& var,
AnakinEngine<TargetT, PrecisionT>* engine) {
auto tensor = tensor_from_var(var, platform::CPUPlace()); auto tensor = tensor_from_var(var, platform::CPUPlace());
auto shape = framework::vectorize2int(tensor->dims()); auto shape = framework::vectorize2int(tensor->dims());
return pblock_from_tensor<T>(*tensor, shape); return pblock_from_tensor<TargetT, PrecisionT>(*tensor, shape, engine);
} }
} // namespace anakin } // namespace anakin
......
...@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() { ...@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization."); PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
} }
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::RegistBlock(
::anakin::PBlock<TargetT> *block_p) {
PADDLE_ENFORCE(graph_->RegistBlock(block_p), "Block register.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType> template <typename TargetT, Precision PrecisionType, OpRunType RunType>
std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>> std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
AnakinEngine<TargetT, PrecisionType, RunType>::Clone() { AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
......
...@@ -90,6 +90,7 @@ class AnakinEngine { ...@@ -90,6 +90,7 @@ class AnakinEngine {
int GetMaxBatchSize() { return max_batch_size_; } int GetMaxBatchSize() { return max_batch_size_; }
void Freeze(); void Freeze();
void Optimize(); void Optimize();
void RegistBlock(::anakin::PBlock<TargetT> *block_p);
void Save(std::string path) { graph_->save(path); } void Save(std::string path) { graph_->save(path); }
bool IsInit() { return initialized_; } bool IsInit() { return initialized_; }
int GetDevice() { return device_; } int GetDevice() { return device_; }
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/inference/anakin/engine.h" #include "paddle/fluid/inference/anakin/engine.h"
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT; using anakin::AK_FLOAT;
using anakin::Precision; using anakin::Precision;
using anakin::saber::NV; using anakin::saber::NV;
...@@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) { ...@@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) {
engine_->AddOpAttr("op1", "axis", 1); engine_->AddOpAttr("op1", "axis", 1);
std::vector<int> shape = {1, 1, 1, 2}; std::vector<int> shape = {1, 1, 1, 2};
Shape tmp_shape(shape); Shape tmp_shape(shape);
// PBlock<NV> weight1(tmp_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
PBlock<NV> *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
engine_->RegistBlock(weight1);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data()); float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
cpu_data[0] = 2.; cpu_data[0] = 2.;
weight1->d_tensor().set_shape(tmp_shape); weight1->d_tensor().set_shape(tmp_shape);
......
...@@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); } ...@@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine. // The following passes works for Anakin sub-graph engine.
const std::vector<std::string> kAnakinSubgraphPasses({ const std::vector<std::string> kAnakinSubgraphPasses({
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
"graph_viz_pass", //
"quant_conv2d_dequant_fuse_pass", // "quant_conv2d_dequant_fuse_pass", //
"graph_viz_pass", //
"simplify_anakin_priorbox_detection_out_pass", // "simplify_anakin_priorbox_detection_out_pass", //
"fillconstant_elementwisemul_fuse", // "fillconstant_elementwisemul_fuse", //
"fc_fuse_pass", // "fc_fuse_pass", //
...@@ -83,11 +81,8 @@ const std::vector<std::string> kAnakinSubgraphPasses({ ...@@ -83,11 +81,8 @@ const std::vector<std::string> kAnakinSubgraphPasses({
// "conv_bn_fuse_pass", // // "conv_bn_fuse_pass", //
// "conv_elementwise_add_fuse_pass", // // "conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass", // "fc_gru_fuse_pass", //
"graph_viz_pass", //
"anakin_subgraph_pass", // "anakin_subgraph_pass", //
"graph_viz_pass", //
"fc_gru_fuse_pass", // "fc_gru_fuse_pass", //
"graph_viz_pass", //
}); });
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册