未验证 提交 e5c8a199 编写于 作者: Y Yan Chunwei 提交者: GitHub

Fea/infer for1.3 (#15760)

上级 d956fcb9
......@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
->assert_is_op("scale")
->assert_op_attr<float>("scale", 1.)
->assert_op_attr<float>("bias", 0.);
auto scale_out = detector.mutable_pattern()
->NewNode("scale_out")
->assert_is_op_output("scale");
auto scale_out =
detector.mutable_pattern()
->NewNode("scale_out")
->assert_is_op_output("scale")
// scale's output var should has only one consumer, or it can't be
// removed.
->assert_more([](Node* x) { return x->outputs.size() == 1UL; });
pre_op->LinksTo({scale_in});
scale_op->LinksFrom({scale_in}).LinksTo({scale_out});
......
......@@ -16,6 +16,12 @@
/*! \file paddle_api.h
*/
/*! \mainpage Paddle Inference APIs
* \section intro_sec Introduction
* The Paddle inference library aims to offer an high performance inference SDK
* for Paddle users.
*/
#include <cassert>
#include <memory>
#include <string>
......@@ -34,26 +40,49 @@ enum PaddleDType {
};
/**
*\brief Memory menager for PaddleTensor.
* \brief Memory manager for `PaddleTensor`.
*
*The PaddleBuf holds a buffer for data input or output. The memory can be
*allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
*should be reused for better performance.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
*For user allocated memory, the following API can be used:
*- PaddleBuf(void* data, size_t length) to set an external memory by
*specifying
* the memory address and length.
*- Reset(void* data, size_t length) to reset the PaddleBuf with an external
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
*memory.
*ATTENTION, for user allocated memory, deallocation should be done by users
* ATTENTION, for user allocated memory, deallocation should be done by users
*externally after the program finished. The PaddleBuf won't do any allocation
*or deallocation.
*
*To have the PaddleBuf allocate and manage the memory:
*- PaddleBuf(size_t length) will allocate a memory of size `length`.
*- Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*
* Usage:
*
* Let PaddleBuf manage the memory internally.
* \code{cpp}
* const int num_elements = 128;
* PaddleBuf buf(num_elements * sizeof(float));
* \endcode
*
* Or
* \code{cpp}
* PaddleBuf buf;
* buf.Resize(num_elements * sizeof(float));
* \endcode
* Works the exactly the same.
*
* One can also make the `PaddleBuf` use the external memory.
* \code{cpp}
* PaddleBuf buf;
* void* external_memory = new float[num_elements];
* buf.Reset(external_memory, num_elements*sizeof(float));
* ...
* delete[] external_memory; // manage the memory lifetime outside.
* \endcode
*/
class PaddleBuf {
public:
......@@ -78,7 +107,7 @@ class PaddleBuf {
/** Tell whether the buffer is empty.
*/
bool empty() const { return length_ == 0; }
/** Get the memory address.
/** Get the data's memory address.
*/
void* data() const { return data_; }
/** Get the memory length.
......@@ -110,7 +139,8 @@ struct PaddleTensor {
};
enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
/** Tensor without copy, currently only supports AnalysisPredictor.
/** Tensor without copy, currently only supports `AnalysisPredictor`.
*/
class ZeroCopyTensor {
public:
......@@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config {
*
* Usage:
*
* \code{.cpp}
* NativeConfig config;
* ... // change the configs.
* auto native_predictor = CreatePaddlePredictor(config);
* \endcode
*
* FOR EXTENSION DEVELOPER:
* Different predictors are designated by config type. Similar configs can be
......
......@@ -66,8 +66,54 @@ void GpuPassStrategy::EnableMKLDNN() {
LOG(ERROR) << "GPU not support MKLDNN yet";
}
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
"infer_clean_graph_pass", //
"identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif
});
for (int i = 6; i >= 3; i--) {
passes_.push_back("transpose_flatten" + std::to_string(i) +
"_concat_fuse_pass");
}
use_gpu_ = true;
}
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
analysis_passes_.push_back(pass);
}
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"identity_scale_op_clean_pass", //
});
use_gpu_ = false;
}
} // namespace paddle
......@@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder {
*/
class CpuPassStrategy : public PassStrategy {
public:
CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"identity_scale_op_clean_pass", //
});
use_gpu_ = false;
}
CpuPassStrategy();
explicit CpuPassStrategy(const CpuPassStrategy &other)
: PassStrategy(other.AllPasses()) {}
......@@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy {
*/
class GpuPassStrategy : public PassStrategy {
public:
GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
"infer_clean_graph_pass", //
"identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_bn_fuse_pass", //
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
#endif
});
for (int i = 6; i >= 3; i--) {
passes_.push_back("transpose_flatten" + std::to_string(i) +
"_concat_fuse_pass");
}
use_gpu_ = true;
}
GpuPassStrategy();
explicit GpuPassStrategy(const GpuPassStrategy &other)
: PassStrategy(other.AllPasses()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册