未验证 提交 e5c8a199 编写于 作者: Y Yan Chunwei 提交者: GitHub

Fea/infer for1.3 (#15760)

上级 d956fcb9
...@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl( ...@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
->assert_is_op("scale") ->assert_is_op("scale")
->assert_op_attr<float>("scale", 1.) ->assert_op_attr<float>("scale", 1.)
->assert_op_attr<float>("bias", 0.); ->assert_op_attr<float>("bias", 0.);
auto scale_out = detector.mutable_pattern() auto scale_out =
detector.mutable_pattern()
->NewNode("scale_out") ->NewNode("scale_out")
->assert_is_op_output("scale"); ->assert_is_op_output("scale")
// scale's output var should has only one consumer, or it can't be
// removed.
->assert_more([](Node* x) { return x->outputs.size() == 1UL; });
pre_op->LinksTo({scale_in}); pre_op->LinksTo({scale_in});
scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); scale_op->LinksFrom({scale_in}).LinksTo({scale_out});
......
...@@ -16,6 +16,12 @@ ...@@ -16,6 +16,12 @@
/*! \file paddle_api.h /*! \file paddle_api.h
*/ */
/*! \mainpage Paddle Inference APIs
* \section intro_sec Introduction
* The Paddle inference library aims to offer an high performance inference SDK
* for Paddle users.
*/
#include <cassert> #include <cassert>
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -34,26 +40,49 @@ enum PaddleDType { ...@@ -34,26 +40,49 @@ enum PaddleDType {
}; };
/** /**
*\brief Memory menager for PaddleTensor. * \brief Memory manager for `PaddleTensor`.
* *
*The PaddleBuf holds a buffer for data input or output. The memory can be * The PaddleBuf holds a buffer for data input or output. The memory can be
*allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
*should be reused for better performance. * should be reused for better performance.
* *
*For user allocated memory, the following API can be used: * For user allocated memory, the following API can be used:
*- PaddleBuf(void* data, size_t length) to set an external memory by * - PaddleBuf(void* data, size_t length) to set an external memory by
*specifying * specifying the memory address and length.
* the memory address and length. * - Reset(void* data, size_t length) to reset the PaddleBuf with an external
*- Reset(void* data, size_t length) to reset the PaddleBuf with an external
*memory. *memory.
*ATTENTION, for user allocated memory, deallocation should be done by users * ATTENTION, for user allocated memory, deallocation should be done by users
*externally after the program finished. The PaddleBuf won't do any allocation *externally after the program finished. The PaddleBuf won't do any allocation
*or deallocation. *or deallocation.
* *
*To have the PaddleBuf allocate and manage the memory: * To have the PaddleBuf allocate and manage the memory:
*- PaddleBuf(size_t length) will allocate a memory of size `length`. * - PaddleBuf(size_t length) will allocate a memory of size `length`.
*- Resize(size_t length) resize the memory to no less than `length`, ATTENTION * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done. * if the allocated memory is larger than `length`, nothing will done.
*
* Usage:
*
* Let PaddleBuf manage the memory internally.
* \code{cpp}
* const int num_elements = 128;
* PaddleBuf buf(num_elements * sizeof(float));
* \endcode
*
* Or
* \code{cpp}
* PaddleBuf buf;
* buf.Resize(num_elements * sizeof(float));
* \endcode
* Works the exactly the same.
*
* One can also make the `PaddleBuf` use the external memory.
* \code{cpp}
* PaddleBuf buf;
* void* external_memory = new float[num_elements];
* buf.Reset(external_memory, num_elements*sizeof(float));
* ...
* delete[] external_memory; // manage the memory lifetime outside.
* \endcode
*/ */
class PaddleBuf { class PaddleBuf {
public: public:
...@@ -78,7 +107,7 @@ class PaddleBuf { ...@@ -78,7 +107,7 @@ class PaddleBuf {
/** Tell whether the buffer is empty. /** Tell whether the buffer is empty.
*/ */
bool empty() const { return length_ == 0; } bool empty() const { return length_ == 0; }
/** Get the memory address. /** Get the data's memory address.
*/ */
void* data() const { return data_; } void* data() const { return data_; }
/** Get the memory length. /** Get the memory length.
...@@ -110,7 +139,8 @@ struct PaddleTensor { ...@@ -110,7 +139,8 @@ struct PaddleTensor {
}; };
enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
/** Tensor without copy, currently only supports AnalysisPredictor.
/** Tensor without copy, currently only supports `AnalysisPredictor`.
*/ */
class ZeroCopyTensor { class ZeroCopyTensor {
public: public:
...@@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config { ...@@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config {
* *
* Usage: * Usage:
* *
* \code{.cpp}
* NativeConfig config; * NativeConfig config;
* ... // change the configs. * ... // change the configs.
* auto native_predictor = CreatePaddlePredictor(config); * auto native_predictor = CreatePaddlePredictor(config);
* \endcode
* *
* FOR EXTENSION DEVELOPER: * FOR EXTENSION DEVELOPER:
* Different predictors are designated by config type. Similar configs can be * Different predictors are designated by config type. Similar configs can be
......
...@@ -66,8 +66,54 @@ void GpuPassStrategy::EnableMKLDNN() { ...@@ -66,8 +66,54 @@ void GpuPassStrategy::EnableMKLDNN() {
LOG(ERROR) << "GPU not support MKLDNN yet"; LOG(ERROR) << "GPU not support MKLDNN yet";
} }
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
"infer_clean_graph_pass", //
"identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif
});
for (int i = 6; i >= 3; i--) {
passes_.push_back("transpose_flatten" + std::to_string(i) +
"_concat_fuse_pass");
}
use_gpu_ = true;
}
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
analysis_passes_.push_back(pass); analysis_passes_.push_back(pass);
} }
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"identity_scale_op_clean_pass", //
});
use_gpu_ = false;
}
} // namespace paddle } // namespace paddle
...@@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder { ...@@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder {
*/ */
class CpuPassStrategy : public PassStrategy { class CpuPassStrategy : public PassStrategy {
public: public:
CpuPassStrategy() : PassStrategy({}) { CpuPassStrategy();
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"identity_scale_op_clean_pass", //
});
use_gpu_ = false;
}
explicit CpuPassStrategy(const CpuPassStrategy &other) explicit CpuPassStrategy(const CpuPassStrategy &other)
: PassStrategy(other.AllPasses()) {} : PassStrategy(other.AllPasses()) {}
...@@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy { ...@@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy {
*/ */
class GpuPassStrategy : public PassStrategy { class GpuPassStrategy : public PassStrategy {
public: public:
GpuPassStrategy() : PassStrategy({}) { GpuPassStrategy();
passes_.assign({
"infer_clean_graph_pass", //
"identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif
});
for (int i = 6; i >= 3; i--) {
passes_.push_back("transpose_flatten" + std::to_string(i) +
"_concat_fuse_pass");
}
use_gpu_ = true;
}
explicit GpuPassStrategy(const GpuPassStrategy &other) explicit GpuPassStrategy(const GpuPassStrategy &other)
: PassStrategy(other.AllPasses()) { : PassStrategy(other.AllPasses()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册