diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc index 3b738aa159ebfd77f00c9e532fbd94542e2097db..5bdc0c5faed7131b873edf9b43c847c010b6e3f3 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc @@ -38,9 +38,13 @@ std::unique_ptr IdentityScaleOpCleanPass::ApplyImpl( ->assert_is_op("scale") ->assert_op_attr("scale", 1.) ->assert_op_attr("bias", 0.); - auto scale_out = detector.mutable_pattern() - ->NewNode("scale_out") - ->assert_is_op_output("scale"); + auto scale_out = + detector.mutable_pattern() + ->NewNode("scale_out") + ->assert_is_op_output("scale") + // scale's output var should has only one consumer, or it can't be + // removed. + ->assert_more([](Node* x) { return x->outputs.size() == 1UL; }); pre_op->LinksTo({scale_in}); scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 8ac8bc529183edc2f8f888ca7ba14611acaadc10..f90a74b9102ee62d15c2d738b53971c9bde51439 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -16,6 +16,12 @@ /*! \file paddle_api.h */ +/*! \mainpage Paddle Inference APIs + * \section intro_sec Introduction + * The Paddle inference library aims to offer an high performance inference SDK + * for Paddle users. + */ + #include #include #include @@ -34,26 +40,49 @@ enum PaddleDType { }; /** - *\brief Memory menager for PaddleTensor. + * \brief Memory manager for `PaddleTensor`. * - *The PaddleBuf holds a buffer for data input or output. The memory can be - *allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf - *should be reused for better performance. + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. * - *For user allocated memory, the following API can be used: - *- PaddleBuf(void* data, size_t length) to set an external memory by - *specifying - * the memory address and length. - *- Reset(void* data, size_t length) to reset the PaddleBuf with an external + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external *memory. - *ATTENTION, for user allocated memory, deallocation should be done by users + * ATTENTION, for user allocated memory, deallocation should be done by users *externally after the program finished. The PaddleBuf won't do any allocation *or deallocation. * - *To have the PaddleBuf allocate and manage the memory: - *- PaddleBuf(size_t length) will allocate a memory of size `length`. - *- Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION * if the allocated memory is larger than `length`, nothing will done. + * + * Usage: + * + * Let PaddleBuf manage the memory internally. + * \code{cpp} + * const int num_elements = 128; + * PaddleBuf buf(num_elements * sizeof(float)); + * \endcode + * + * Or + * \code{cpp} + * PaddleBuf buf; + * buf.Resize(num_elements * sizeof(float)); + * \endcode + * Works the exactly the same. + * + * One can also make the `PaddleBuf` use the external memory. + * \code{cpp} + * PaddleBuf buf; + * void* external_memory = new float[num_elements]; + * buf.Reset(external_memory, num_elements*sizeof(float)); + * ... + * delete[] external_memory; // manage the memory lifetime outside. + * \endcode */ class PaddleBuf { public: @@ -78,7 +107,7 @@ class PaddleBuf { /** Tell whether the buffer is empty. */ bool empty() const { return length_ == 0; } - /** Get the memory address. + /** Get the data's memory address. */ void* data() const { return data_; } /** Get the memory length. @@ -110,7 +139,8 @@ struct PaddleTensor { }; enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; -/** Tensor without copy, currently only supports AnalysisPredictor. + +/** Tensor without copy, currently only supports `AnalysisPredictor`. */ class ZeroCopyTensor { public: @@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config { * * Usage: * + * \code{.cpp} * NativeConfig config; * ... // change the configs. * auto native_predictor = CreatePaddlePredictor(config); + * \endcode * * FOR EXTENSION DEVELOPER: * Different predictors are designated by config type. Similar configs can be diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 039389a4cf99da6c2576c148d8c294e5d79aa7a8..f9c13c2fa84b3b5d629297d3f44a6f5889a734f4 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -66,8 +66,54 @@ void GpuPassStrategy::EnableMKLDNN() { LOG(ERROR) << "GPU not support MKLDNN yet"; } +GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { + passes_.assign({ + "infer_clean_graph_pass", // + "identity_scale_op_clean_pass", // + "conv_affine_channel_fuse_pass", // + "conv_eltwiseadd_affine_channel_fuse_pass", // + "conv_bn_fuse_pass", // +#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be + // guaranteed at least v7 + "conv_elementwise_add_act_fuse_pass", // + "conv_elementwise_add2_act_fuse_pass", // + "conv_elementwise_add_fuse_pass", // +#endif + }); + + for (int i = 6; i >= 3; i--) { + passes_.push_back("transpose_flatten" + std::to_string(i) + + "_concat_fuse_pass"); + } + use_gpu_ = true; +} + void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { analysis_passes_.push_back(pass); } +CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { + // NOTE the large fusions should be located in the front, so that they will + // not be damaged by smaller ones. + passes_.assign({ + "infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "seqpool_concat_fuse_pass", // + "seqconv_eltadd_relu_fuse_pass", // + // "embedding_fc_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "fc_gru_fuse_pass", // + "mul_gru_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // + "repeated_fc_relu_fuse_pass", // + "squared_mat_sub_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_eltwiseadd_bn_fuse_pass", // + "is_test_pass", // + "identity_scale_op_clean_pass", // + }); + use_gpu_ = false; +} } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index aa353f12ca7333713e2d640cce6b2dfbea3c4e26..2524d89fcd1322e105ad2217347aa2380448f2bc 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder { */ class CpuPassStrategy : public PassStrategy { public: - CpuPassStrategy() : PassStrategy({}) { - // NOTE the large fusions should be located in the front, so that they will - // not be damaged by smaller ones. - passes_.assign({ - "infer_clean_graph_pass", // - "attention_lstm_fuse_pass", // - "seqpool_concat_fuse_pass", // - "seqconv_eltadd_relu_fuse_pass", // - // "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // - "mul_lstm_fuse_pass", // - "fc_gru_fuse_pass", // - "mul_gru_fuse_pass", // - "seq_concat_fc_fuse_pass", // - "fc_fuse_pass", // - "repeated_fc_relu_fuse_pass", // - "squared_mat_sub_fuse_pass", // - "conv_bn_fuse_pass", // - "conv_eltwiseadd_bn_fuse_pass", // - "is_test_pass", // - "identity_scale_op_clean_pass", // - }); - use_gpu_ = false; - } + CpuPassStrategy(); explicit CpuPassStrategy(const CpuPassStrategy &other) : PassStrategy(other.AllPasses()) {} @@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy { */ class GpuPassStrategy : public PassStrategy { public: - GpuPassStrategy() : PassStrategy({}) { - passes_.assign({ - "infer_clean_graph_pass", // - "identity_scale_op_clean_pass", // - "conv_affine_channel_fuse_pass", // - "conv_eltwiseadd_affine_channel_fuse_pass", // - "conv_bn_fuse_pass", // -#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be - // guaranteed at least v7 - "conv_elementwise_add_act_fuse_pass", // - "conv_elementwise_add2_act_fuse_pass", // - "conv_elementwise_add_fuse_pass", // -#endif - }); - - for (int i = 6; i >= 3; i--) { - passes_.push_back("transpose_flatten" + std::to_string(i) + - "_concat_fuse_pass"); - } - use_gpu_ = true; - } + GpuPassStrategy(); explicit GpuPassStrategy(const GpuPassStrategy &other) : PassStrategy(other.AllPasses()) {