diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index a5488eaa1b62036ec00621fbfdaab732339a67ab..cd8030519ccfcfab3741424e8a60e9c29b698593 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); - auto base_op_desc = *mul->Op()->Proto(); + auto base_op_desc = mul->Op(); // Create an FC Node. - OpDesc desc(base_op_desc, nullptr); + // OpDesc desc(base_op_desc, nullptr); + OpDesc desc; std::string fc_x_in = subgraph.at(x)->Name(); std::string fc_Y_in = w->Name(); std::string fc_bias_in = fc_bias->Name(); std::string fc_out_out = fc_out->Name(); + desc.SetInput("Input", std::vector({fc_x_in})); desc.SetInput("W", std::vector({fc_Y_in})); desc.SetInput("Bias", std::vector({fc_bias_in})); desc.SetOutput("Out", std::vector({fc_out_out})); desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims")); + + // For anakin subgraph int8 + // When in anakin subgraph int8 mode, the pattern like "fake_quant + mul + + // fake_dequant" + // can be detected by the quant_dequant_fuse_pass. This pass will add + // "input_scale", + // "weight_scale" which are extracted from fake_quant op and fake_dequant op + // to mul op, + // and then delete the fake_quant op and fake_dequant op in the graph. If + // the mul op + // has the scale info, we should add those to the fused fc. + if (base_op_desc->HasAttr("enable_int8")) { + desc.SetAttr("enable_int8", base_op_desc->GetAttr("enable_int8")); + desc.SetAttr("input_scale", base_op_desc->GetAttr("input_scale")); + desc.SetAttr("weight_scale", base_op_desc->GetAttr("weight_scale")); + } + desc.SetType("fc"); auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied. GraphSafeRemoveNodes(graph, {mul, elementwise_add, mul_out}); diff --git a/paddle/fluid/inference/anakin/convert/affine_channel.cc b/paddle/fluid/inference/anakin/convert/affine_channel.cc index 074c1b26ba8913a8e6f1777993a5a0b97757df90..a3abca0a84f66ffade758008ed7add300226de75 100644 --- a/paddle/fluid/inference/anakin/convert/affine_channel.cc +++ b/paddle/fluid/inference/anakin/convert/affine_channel.cc @@ -38,13 +38,13 @@ void AffineChannelOpConverter::operator()( // Copy the Scale to CPUPlace and get the pointer. auto *scale_v = scope.FindVar(op_desc.Input("Scale").front()); PADDLE_ENFORCE_NOT_NULL(scale_v); - auto weight1 = pblock_from_var(*scale_v); + auto weight1 = pblock_from_var(*scale_v, this->engine_); this->engine_->AddOpAttr(op_name, "weight_1", *weight1); // Copy the Bias to CPUPlace and get the pointer. auto *bias_v = scope.FindVar(op_desc.Input("Bias").front()); PADDLE_ENFORCE_NOT_NULL(bias_v); - auto weight2 = pblock_from_var(*bias_v); + auto weight2 = pblock_from_var(*bias_v, this->engine_); this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc index 3e1e422aea19bcb44661ed9978aee0beacdc2cf9..fa7f3bd79f2807f31cd1a4f0e39770e0adc2bdf0 100644 --- a/paddle/fluid/inference/anakin/convert/batch_norm.cc +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -54,25 +54,27 @@ void BatchNormOpConverter::operator()( auto *mean_v = scope.FindVar(op_desc.Input("Mean").front()); PADDLE_ENFORCE_NOT_NULL(mean_v); - auto weight1 = pblock_from_var(*mean_v); + auto weight1 = pblock_from_var(*mean_v, this->engine_); this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); auto *variance_v = scope.FindVar(op_desc.Input("Variance").front()); PADDLE_ENFORCE_NOT_NULL(variance_v); - auto weight2 = pblock_from_var(*variance_v); + auto weight2 = + pblock_from_var(*variance_v, this->engine_); this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); - auto *weight3 = pblock_from_vector(std::vector({1})); + auto *weight3 = pblock_from_vector( + std::vector({1}), this->engine_); this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); auto *scale_v = scope.FindVar(op_desc.Input("Scale").front()); PADDLE_ENFORCE_NOT_NULL(scale_v); - auto scale = pblock_from_var(*scale_v); + auto scale = pblock_from_var(*scale_v, this->engine_); this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale); auto *bias_v = scope.FindVar(op_desc.Input("Bias").front()); PADDLE_ENFORCE_NOT_NULL(bias_v); - auto bias = pblock_from_var(*bias_v); + auto bias = pblock_from_var(*bias_v, this->engine_); this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias); } diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc index 4bd380e7bb23b30ab5f9a4b1a71d3ec29cb3310d..e2ea6290fab1c84978a1805c042d4aaac4278d85 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -71,8 +71,9 @@ void Conv2dOpConverter::operator()( const float int8_range = 127.; float in_scale = boost::get(op_desc.GetAttr("input_scale")); float weight_scale = boost::get(op_desc.GetAttr("weight_scale")); - auto *weight1 = ::anakin::graph::GraphGlobalMem::Global() - .template new_block<::anakin::AK_INT8>(anakin_shape); + PBlock *weight1 = + new PBlock(anakin_shape, ::anakin::AK_INT8); + this->engine_->RegistBlock(weight1); float *weight_data = weight_tensor->data(); std::vector weight_int8; int weight_num = weight_tensor->numel(); @@ -94,7 +95,8 @@ void Conv2dOpConverter::operator()( {weight_scale / int8_range}, false); this->engine_->AddTensorScale(input_name, in_scale / int8_range); } else { - auto *weight1 = pblock_from_tensor(*weight_tensor, weight_shape); + auto *weight1 = pblock_from_tensor( + *weight_tensor, weight_shape, this->engine_); this->engine_->AddOpAttr(op_name, "weight_1", *weight1); } } diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc index a8ef73d50f2a42a8416363ba083bbb3dc547a1ed..a557c35475d374cc4c4adf795b7764bf0e8bd399 100644 --- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -73,8 +73,9 @@ void Conv2dFusionOpConverter::operator()( const float int8_range = 127.; float in_scale = boost::get(op_desc.GetAttr("input_scale")); float weight_scale = boost::get(op_desc.GetAttr("weight_scale")); - auto *weight1 = ::anakin::graph::GraphGlobalMem::Global() - .template new_block<::anakin::AK_INT8>(anakin_shape); + PBlock *weight1 = + new PBlock(anakin_shape, ::anakin::AK_INT8); + this->engine_->RegistBlock(weight1); float *weight_data = weight_tensor->data(); std::vector weight_int8; int weight_num = weight_tensor->numel(); @@ -98,9 +99,10 @@ void Conv2dFusionOpConverter::operator()( } else { auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace()); auto weight_shape = framework::vectorize2int(weight_tensor->dims()); - auto *weight1 = pblock_from_tensor(*weight_tensor, weight_shape); + auto *weight1 = pblock_from_tensor( + *weight_tensor, weight_shape, this->engine_); this->engine_->AddOpAttr(op_name, "weight_1", *weight1); - auto weight2 = pblock_from_var(*b_v); + auto weight2 = pblock_from_var(*b_v, this->engine_); this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } } diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc index e779aca73083975dffbe39a1577ebb80690e5dd1..872ebaba3c0bcbcd467452d0da344da7dc3f6dde 100644 --- a/paddle/fluid/inference/anakin/convert/dropout.cc +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -39,7 +39,8 @@ void DropoutOpConverter::operator()( auto dropout_prob = boost::get(op_desc.GetAttr("dropout_prob")); auto factor = 1 - dropout_prob; - auto *weight1 = pblock_from_vector(std::vector({factor})); + auto *weight1 = pblock_from_vector( + std::vector({factor}), this->engine_); this->engine_->AddOpAttr(op_name, "weight_1", *weight1); this->engine_->AddOpAttr(op_name, "axis", 0); diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 10ceb2154b1b7c7bb42afde95e2a1eeca475050c..04af311992210ed0977bb63f3a08c27a36cc6ca6 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -77,8 +77,9 @@ void FcBaseOpConverter::operator()( const float int8_range = 127.; float in_scale = boost::get(op_desc.GetAttr("input_scale")); float weight_scale = boost::get(op_desc.GetAttr("weight_scale")); - auto *weight1 = ::anakin::graph::GraphGlobalMem::Global() - .template new_block<::anakin::AK_INT8>(anakin_shape); + PBlock *weight1 = + new PBlock(anakin_shape, ::anakin::AK_INT8); + this->engine_->RegistBlock(weight1); std::vector weight_int8; for (int i = 0; i < weight_num; i++) { bool is_valid_int8 = @@ -98,7 +99,8 @@ void FcBaseOpConverter::operator()( {weight_scale / int8_range}, false); this->engine_->AddTensorScale(input_name, in_scale / int8_range); } else { - auto *weight1 = pblock_from_vector(trans_weight_data); + auto *weight1 = pblock_from_vector(trans_weight_data, + this->engine_); this->engine_->AddOpAttr(op_name, "weight_1", *weight1); } @@ -106,7 +108,7 @@ void FcBaseOpConverter::operator()( if (with_bias) { auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); PADDLE_ENFORCE_NOT_NULL(b_v); - auto weight2 = pblock_from_var(*b_v); + auto weight2 = pblock_from_var(*b_v, this->engine_); this->engine_->AddOpAttr(op_name, "weight_2", *weight2); } } diff --git a/paddle/fluid/inference/anakin/convert/helper.h b/paddle/fluid/inference/anakin/convert/helper.h index 5581f7dd641c57e3092bccb1508fc9264bfd02be..7b0fb211dcd8aa03fdad91d7cacfa11d2ceaae43 100644 --- a/paddle/fluid/inference/anakin/convert/helper.h +++ b/paddle/fluid/inference/anakin/convert/helper.h @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/inference/anakin/engine.h" #include "framework/core/net/net.h" #include "framework/core/types.h" @@ -29,8 +30,8 @@ using anakin::saber::Shape; using anakin::AK_FLOAT; +using anakin::AK_INT8; using anakin::PBlock; -using anakin::graph::GraphGlobalMem; namespace paddle { namespace inference { @@ -38,31 +39,34 @@ namespace anakin { std::unique_ptr tensor_from_var( const framework::Variable& var, const platform::Place& place); -template -PBlock* pblock_from_tensor(const framework::LoDTensor& tensor, - std::vector shape) { - while (shape.size() < 4) { - shape.insert(shape.begin(), 1); + +template +PBlock* pblock_from_tensor(const framework::LoDTensor& tensor, + std::vector shape_vec, + AnakinEngine* engine) { + while (shape_vec.size() < 4) { + shape_vec.insert(shape_vec.begin(), 1); } - Shape anakin_shape(shape); - auto* weight = - GraphGlobalMem::Global().template new_block(anakin_shape); + Shape shape(shape_vec); + PBlock* weight = new PBlock(shape, AK_FLOAT); + engine->RegistBlock(weight); float* cpu_data = static_cast(weight->h_tensor().mutable_data()); std::copy_n(tensor.data(), tensor.numel(), cpu_data); - weight->d_tensor().set_shape(anakin_shape); + weight->d_tensor().set_shape(shape); weight->d_tensor().copy_from(weight->h_tensor()); return weight; } -template -PBlock* pblock_from_vector(const std::vector& vec, - std::vector shape_vec) { +template +PBlock* pblock_from_vector(const std::vector& vec, + std::vector shape_vec, + AnakinEngine* engine) { while (shape_vec.size() < 4) { shape_vec.insert(shape_vec.begin(), 1); } Shape shape(shape_vec); - auto* weight = - GraphGlobalMem::Global().template new_block(shape); + PBlock* weight = new PBlock(shape, AK_FLOAT); + engine->RegistBlock(weight); auto* weight_data = static_cast(weight->h_tensor().mutable_data()); std::copy(std::begin(vec), std::end(vec), weight_data); weight->d_tensor().set_shape(shape); @@ -70,17 +74,20 @@ PBlock* pblock_from_vector(const std::vector& vec, return weight; } -template -PBlock* pblock_from_vector(const std::vector& vec) { +template +PBlock* pblock_from_vector(const std::vector& vec, + AnakinEngine* engine) { int size = vec.size(); - return pblock_from_vector(vec, std::vector({1, 1, 1, size})); + return pblock_from_vector( + vec, std::vector({1, 1, 1, size}), engine); } -template -PBlock* pblock_from_var(const framework::Variable& var) { +template +PBlock* pblock_from_var(const framework::Variable& var, + AnakinEngine* engine) { auto tensor = tensor_from_var(var, platform::CPUPlace()); auto shape = framework::vectorize2int(tensor->dims()); - return pblock_from_tensor(*tensor, shape); + return pblock_from_tensor(*tensor, shape, engine); } } // namespace anakin diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 90bc9c2514c3c72bad6d900c25bddc9f26cab6b9..fdf2f228f59fe8889f00dc6a1d3b11540f8e779c 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -162,6 +162,12 @@ void AnakinEngine::Optimize() { PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization."); } +template +void AnakinEngine::RegistBlock( + ::anakin::PBlock *block_p) { + PADDLE_ENFORCE(graph_->RegistBlock(block_p), "Block register."); +} + template std::unique_ptr> AnakinEngine::Clone() { diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index ade15537db838fa6e2e38488c1743af9f92f83de..5e76331cc56b478827d734c1668b177f46185c55 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -90,6 +90,7 @@ class AnakinEngine { int GetMaxBatchSize() { return max_batch_size_; } void Freeze(); void Optimize(); + void RegistBlock(::anakin::PBlock *block_p); void Save(std::string path) { graph_->save(path); } bool IsInit() { return initialized_; } int GetDevice() { return device_; } diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc index 613481a55514f8d92575e7e0d6a23fd68fbb1018..422f415a5db62d9408834f600f875d7825d44952 100644 --- a/paddle/fluid/inference/anakin/test_anakin_engine.cc +++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/inference/anakin/engine.h" -using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; using anakin::Precision; using anakin::saber::NV; @@ -52,11 +51,9 @@ TEST_F(TestAnakinEngine, Execute) { engine_->AddOpAttr("op1", "axis", 1); std::vector shape = {1, 1, 1, 2}; Shape tmp_shape(shape); - // PBlock weight1(tmp_shape); - auto *weight1 = - GraphGlobalMem::Global().template new_block(tmp_shape); - // auto *weight1 = new PBlock(tmp_shape, AK_FLOAT); + PBlock *weight1 = new PBlock(tmp_shape, AK_FLOAT); + engine_->RegistBlock(weight1); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); cpu_data[0] = 2.; weight1->d_tensor().set_shape(tmp_shape); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index a3259f5321f80ca91001fd1f602fc927fb7244dc..d531f754ccafc8aa5b47e24a9140b94bc0b5603d 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -73,9 +73,7 @@ void PaddlePassBuilder::ClearPasses() { passes_.clear(); } // The following passes works for Anakin sub-graph engine. const std::vector kAnakinSubgraphPasses({ "infer_clean_graph_pass", // - "graph_viz_pass", // "quant_conv2d_dequant_fuse_pass", // - "graph_viz_pass", // "simplify_anakin_priorbox_detection_out_pass", // "fillconstant_elementwisemul_fuse", // "fc_fuse_pass", // @@ -83,11 +81,8 @@ const std::vector kAnakinSubgraphPasses({ // "conv_bn_fuse_pass", // // "conv_elementwise_add_fuse_pass", // "fc_gru_fuse_pass", // - "graph_viz_pass", // "anakin_subgraph_pass", // - "graph_viz_pass", // "fc_gru_fuse_pass", // - "graph_viz_pass", // }); GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {