提交 29c19e44 编写于 作者: L liuruilong

add split, foramt codes

上级 efb9f441
......@@ -14,6 +14,10 @@ limitations under the License. */
#pragma once;
#include <string>
#include <utility>
#include <unordered_map>
namespace paddle_mobile {
enum class Precision : int { FP32 = 0 };
......@@ -67,4 +71,41 @@ enum PMStatus {
PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08 /*!< un-correct device. */
static const std::string G_OP_TYPE_CONV = "conv2d";
static const std::string G_OP_TYPE_BATCHNORM = "batch_norm";
static const std::string G_OP_TYPE_BOX_CODER = "box_coder";
static const std::string G_OP_TYPE_CONCAT = "concat";
static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "FusionConvAddRelu";
static const std::string G_OP_TYPE_FC = "fc";
static const std::string G_OP_TYPE_LRN = "lrn";
static const std::string G_OP_TYPE_MUL = "mul";
static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
static const std::string G_OP_TYPE_POOL2D = "pool2d";
static const std::string G_OP_TYPE_PRIOR_BOX = "prior_box";
static const std::string G_OP_TYPE_RELU = "relu";
static const std::string G_OP_TYPE_RESHAPE = "reshape";
static const std::string G_OP_TYPE_SIGMOID = "sigmoid";
static const std::string G_OP_TYPE_SOFTMAX = "softmax";
static const std::string G_OP_TYPE_TRANSPOSE = "transpose";
static const std::string G_OP_TYPE_SPLIT = "split";
static const std::string G_OP_TYPE_FEED = "feed";
static const std::string G_OP_TYPE_FETCH = "fetch";
static std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile
......@@ -19,61 +19,64 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "common/enforce.h"
#include "common/type_define.h"
#include "common/types.h"
#include "common/enforce.h"
#include "common/variant.h"
#include "framework/attribute.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/op_info.h"
#include "framework/op_kernel_type.h"
#include "common/type_define.h"
#include "framework/variable.h"
#include "framework/attribute.h"
#include "framework/op_registry.h"
#include "framework/paddle_mobile_object.h"
#include "framework/op_kernel_type.h"
#include "framework/program/block_desc.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/program-optimize/node.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
namespace paddle_mobile {
namespace framework {
using std::string;
using std::vector;
static std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {{"conv2d", {{"Input"}, {"Output"}}},
{"relu", {{"X"}, {"Out"}}},
{"softmax", {{"X"}, {"Out"}}},
{"mul", {{"X"}, {"Out"}}},
{"elementwise_add", {{"X", "Y"}, {"Out"}}},
{"pool2d", {{"X"}, {"Out"}}},
{"batch_norm", {{"X"}, {"Y"}}},
{"lrn", {{"X"}, {"Out"}}},
{"concat", {{"X"}, {"Out"}}},
{"feed", {{"X"}, {"Out"}}},
{"fetch", {{"X"}, {"Out"}}}};
template <typename Dtype>
class OperatorBase : PaddleMobileObject {
* @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor
* */
OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope);
virtual ~OperatorBase() {}
void Run() const;
vector<string> GetOutKeys() const;
std::vector<string> GetOutKeys() const;
virtual void RunImpl() const = 0;
virtual void InferShape() const = 0;
* @b op 运算所需的输入, 如上一层的输出结果、卷积核
* */
const VariableNameMap &Inputs() const { return inputs_; }
* @b op 的输出, 内存会提前被分配好, 运算结果会被存到分配好的内存内
* */
const VariableNameMap &Outputs() const { return outputs_; }
* @b op 类型
* */
const std::string &Type() const { return type_; }
* @b op 运算所需要用到的参数: 如 conv 运算所需要用到的 stride
* */
const AttributeMap &Attrs() const { return attrs_; }
void ClearVariables(const std::vector<std::string> &var_names) const {
if (this->scope_) {
* @b 根据输入形状和参数计算出输出形状
* */
virtual void InferShape() const = 0;
std::shared_ptr<Scope> scope_;
std::string type_;
......@@ -85,6 +88,9 @@ class OperatorBase : PaddleMobileObject {
void CheckAllInputOutputSet() const;
* @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase
* */
template <typename Dtype>
class OperatorWithKernel : public OperatorBase<Dtype> {
......@@ -97,11 +103,18 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
virtual void InferShape() const = 0;
* @b 所有kernel的父类
* */
template <typename Dtype, typename P>
class OpKernelBase : PaddleMobileObject {
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */
virtual void Compute(const P &para) const = 0;
virtual ~OpKernelBase() = default;
......@@ -118,8 +131,8 @@ class FusionOpMatcher : PaddleMobileObject {
virtual std::string Type() = 0;
virtual void FolderNodes(Node &node) {
node.Folder(node_.Depth(), Type(), {});
virtual void FolderNodes(Node *node) {
node->Folder(node_.Depth(), Type(), {});
virtual Node &BeginNode() { return node_; }
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <sstream>
#include "framework/operator.h"
#include "framework/program/program-optimize/node.h"
namespace paddle_mobile {
......@@ -73,24 +74,79 @@ void Node::OpDescs(uint index,
void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node) {
auto iter = std::find(op_desc->begin(), op_desc->end(), this->op_desc_);
Node *node, bool adding_thread, int thread_num) {
bool can_add_split = false;
if (outputs_.size() > 1) {
can_add_split = true;
if (op_input_output_key[op_desc_->type_].second.size() != 1) {
DLOG << "当前 op desc 输出数不为 1 ";
can_add_split = false;
for (const auto& output : outputs_) {
if (op_input_output_key.find(output->op_desc_->type_) != op_input_output_key.end()) {
auto inputs_and_outputs = op_input_output_key[output->op_desc_->type_];
auto outputs_of_output = output->op_desc_->Output(inputs_and_outputs.second[0]);
auto inputs_of_output = output->op_desc_->Input(inputs_and_outputs.first[0]);
for (int i = 0; i < inputs_of_output.size(); ++i) {
std::string input_of_output = inputs_of_output[i];
for (int j = 0; j < outputs_of_output.size(); ++j) {
std::string output_of_output = outputs_of_output[j];
if (input_of_output == output_of_output) {
DLOG << "output的 output 包含 input" << input_of_output;
can_add_split = false;
} else {
DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
can_add_split = false;
if (inputs_.size() > 1 && node != inputs_.back()) {
} else if (inputs_.size() > 1 && node == inputs_.back()) {
adding_thread = false;
} else {
if (adding_thread) {
Attribute attr;
this->op_desc_->attrs_["thread"] = attr;
for (auto &output : outputs_) {
output->OpDescs(op_desc, this);
if (can_add_split) {
adding_thread = true;
std::shared_ptr<class OpDesc> split_op_desc = std::make_shared<class OpDesc>();
split_op_desc->type_ = G_OP_TYPE_SPLIT;
auto outputs = this->op_desc_->Output(op_input_output_key[this->op_desc_->Type()].second[0]);
split_op_desc->inputs_ = {{op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}};
auto &split_outputs = split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]];
for (const auto& output : outputs_) {
DLOG << "add split";
for (int i = 0; i < outputs_.size(); ++i) {
auto &output = outputs_[i];
if (can_add_split) {
output->OpDescs(op_desc, this, adding_thread, i);
} else {
output->OpDescs(op_desc, this, adding_thread, thread_num);
std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
OpDescs(&op_descs, this);
OpDescs(&op_descs, this, false, 0);
return op_descs;
......@@ -42,13 +42,13 @@ class Node : PaddleMobileObject {
std::map<std::string, std::pair<std::string, std::string>> change_map);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs();
void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node);
std::shared_ptr<framework::OpDesc> OpDesc() { return op_desc_; }
std::string BeginType() { return type_; }
void Description();
void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node, bool adding_thread, int thread_num);
void OpDescs(uint size,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
void To(int index, std::shared_ptr<Node>);
......@@ -19,7 +19,7 @@ namespace paddle_mobile {
namespace framework {
std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
//std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
std::shared_ptr<ProgramDesc> ori_des) {
......@@ -86,7 +86,7 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
// DLOG << " match success " << " fusion node: \n" <<
// matcher->BeginNode() << "\nsub node: \n" << *sub_node;
// DLOG << "match node\n"<< *match_node;
// DLOG << " after match node\n"<< *match_node;
// match_node->Description();
......@@ -27,7 +27,6 @@ namespace framework {
class ProgramOptimize {
ProgramOptimize() {}
std::shared_ptr<ProgramDesc> Optimize();
std::shared_ptr<ProgramDesc> FushionOptimize(
std::shared_ptr<ProgramDesc> ori_des);
......@@ -15,15 +15,18 @@ limitations under the License. */
#include "io.h"
#include <fstream>
#include <vector>
#include "common/enforce.h"
#include "common/log.h"
#include "framework/framework.pb-c.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/program/program_desc.h"
#include "framework/program/var_desc.h"
#include "common/enforce.h"
#include "common/enforce.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/operator.h"
#include "framework/lod_tensor.h"
#include "framework/framework.pb-c.h"
#include "framework/program/var_desc.h"
#include "framework/program/program_desc.h"
#include "framework/program/program-optimize/program_optimize.h"
namespace paddle_mobile {
using framework::Variable;
......@@ -166,7 +169,7 @@ void Loader<Dtype, P>::LoadVar(framework::Variable *variable,
template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &dirname) {
const std::string &dirname, bool optimize) {
std::string model_filename = dirname + "/__model__";
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL;
......@@ -199,11 +202,11 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
// DLOG << "var name-- " << var_desc->Name();
auto var = scope->Var(var_desc->Name());
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable() &&
var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH &&
var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) {
// DLOG << "to load var ";
auto dim = var_desc->Tensor_desc().Dims();
auto tensor = var->GetMutable<framework::LoDTensor>();
......@@ -219,8 +222,12 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
// originProgramDesc->Description("program: ");
originProgramDesc->Description("program: ");
if (optimize) {
framework::ProgramOptimize program_optimize;
program.optimizeProgram = program_optimize.FushionOptimize(originProgramDesc);
paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
return program;
......@@ -231,33 +238,8 @@ template class Loader<CPU, Precision::FP32>;
#pragma mark - executor
template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
const std::vector<std::shared_ptr<framework::BlockDesc>> blocks =
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j];
auto op_base = framework::OpRegistry<Dtype>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size)
: program_(p), batch_size_(batch_size) {
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, bool use_optimize)
: program_(p), batch_size_(batch_size), use_optimize_(use_optimize) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
......@@ -389,7 +371,7 @@ void Executor<Dtype, P>::InitMemory() {
template <typename Dtype, Precision P>
void Executor<Dtype, P>::predict(const framework::Tensor &t, int block_id) {
void Executor<Dtype, P>::Predict(const framework::Tensor &t, int block_id) {
framework::Variable *g_feed_value = program_.scope->Var("feed");
framework::Tensor *feed_tensor =
......@@ -404,11 +386,11 @@ void Executor<Dtype, P>::predict(const framework::Tensor &t, int block_id) {
template <typename Dtype, Precision P>
std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::predict(
std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims));
predict(tensor, 0);
Predict(tensor, 0);
framework::Variable *g_feed_value = program_.scope->Var("col");
auto feed_tensor = g_feed_value->GetMutable<framework::Tensor>();
......@@ -30,7 +30,7 @@ namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32>
class Loader : PaddleMobileObject {
const framework::Program<Dtype, P> Load(const std::string &dirname);
const framework::Program<Dtype, P> Load(const std::string &dirname, bool optimize = true);
void LoadVar(framework::Variable *variable,
......@@ -45,13 +45,11 @@ class Executor {
Executor() = default;
Executor(const framework::Program<Dtype> p);
Executor(const framework::Program<Dtype> p, int batch_size = 1, bool use_optimize = true);
Executor(const framework::Program<Dtype> p, int batch_size);
// std::shared_ptr<framework::Tensor> Predict(framework::Tensor &t);
std::shared_ptr<framework::Tensor> predict(framework::Tensor &t);
std::vector<Ptype> predict(const std::vector<Ptype> &input,
std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims);
......@@ -61,7 +59,7 @@ class Executor {
framework::Program<Dtype> program_;
int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_;
void predict(const framework::Tensor &t, int block_id);
void Predict(const framework::Tensor &t, int block_id);
......@@ -23,18 +23,17 @@ namespace operators {
class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
FushionConvAddReluOpMatcher() {
node_ = framework::Node("conv2d");
node_ > std::make_shared<framework::Node>("elementwise_add") >
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
void FolderNodes(framework::Node &node) {
std::vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}});
node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
std::string Type() { return "FusionConvAddRelu"; }
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; }
class FusionFcOp {
......@@ -28,17 +28,17 @@ using std::vector;
class FusionFcMatcher : public framework::FusionOpMatcher {
FusionFcMatcher() {
node_ = framework::Node("mul");
node_ > std::make_shared<framework::Node>("elementwise_add");
node_ = framework::Node(G_OP_TYPE_MUL);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
void FolderNodes(framework::Node &node) {
vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}});
node.Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
std::string Type() { return "fc"; }
std::string Type() { return G_OP_TYPE_FC; }
template <typename DeviceType, typename T>
......@@ -20,11 +20,15 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template <typename T>
struct ReluFunctor {
inline T operator()(T in) const { return in > 0 ? in : 0; }
* @b 特化到具体平台的实现, param 从 op 层传入
* */
template <>
void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
const auto *input_x = param.InputX();
......@@ -696,6 +696,9 @@ class ReshapeParam : public OpParam {
bool inplace_;
* @b op 层实例化好这个 param 传递给 kernel 层使用
* */
class ReluParam : public OpParam {
ReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
......@@ -725,7 +728,6 @@ class FushionFcParam : public OpParam {
y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
axis_ = GetAttr<int>("axis", attrs);
const Tensor *InputX() const { return input_x_; }
const Tensor *InputY() const { return input_y_; }
......@@ -25,6 +25,10 @@ template class ReluOp<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
* @b 每一个 op 都需要注册一下的,
* USE_OP的参数 和 REGISTER_OPERATOR的第一个参数 都是需要和model中类型对应起来的
* */
namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR(relu, ops::ReluOp);
......@@ -28,6 +28,9 @@ using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
class ReluOp : public framework::OperatorWithKernel<DeviceType> {
* @b op 的实例化方法, 需要调用父类的实例化方法, 以及实例化自己的参数结构体
* */
ReluOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope)
......@@ -35,6 +38,9 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
param_(inputs, outputs, attrs, *scope) {}
* @b op 进行运算, 调用相应的 kernel 进行运算
* */
void RunImpl() const {
operators::ReluKernel<DeviceType, T> kernel;
......@@ -44,6 +50,10 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
void InferShape() const override;
* @b Relu kernel 进行运算时所需要用到参数的结构体,
* 结构体定义在: paddle-mobile/src/operators/op_param.h
* */
ReluParam param_;
......@@ -17,7 +17,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "common/io.h"
#include "io.h"
#include "common/log.h"
#include "framework/op_registry.h"
#include "operators/conv_op.h"
......@@ -77,7 +77,7 @@ class Executor4Test : public Executor<DeviceType> {
template <typename T = LoDTensor>
vector<std::shared_ptr<Tensor>> predict(const vector<Tensor> &ts,
vector<std::shared_ptr<Tensor>> Predict(const vector<Tensor> &ts,
const vector<string> &input_names,
const vector<string> &output_names,
const vector<DDim> &ddims) {
......@@ -116,7 +116,7 @@ class Executor4Test : public Executor<DeviceType> {
return output_tensor_sptrs;
std::shared_ptr<Tensor> predict(const Tensor &t, string input, string output,
std::shared_ptr<Tensor> Predict(const Tensor &t, string input, string output,
const DDim &dDim) {
auto scope = this->program_.scope;
Variable *g_feed_value = scope->Var(input);
......@@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/io.h"
#include "io.h"
#include "../test_helper.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto program = loader.Load(std::string("../models/googlenet"));
auto program = loader.Load(g_googlenet);
program.optimizeProgram->Description("program desc: ");
return 0;
......@@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/io.h"
#include "io.h"
#include "../test_helper.h"
#include "framework/program/program-optimize/node.h"
#include "framework/program/program-optimize/program_optimize.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet"
auto program = loader.Load("../models/googlenet");
auto program = loader.Load(g_googlenet);
paddle_mobile::framework::ProgramOptimize optimize;
// program.originProgram->Description("origin");
// program.originProgram->Description("origin");
auto optimize_program = optimize.FushionOptimize(program.originProgram);
if (optimize_program != nullptr) {
......@@ -21,16 +21,16 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto time1 = time();
auto program = loader.Load(std::string("../models/googlenet"));
auto program = loader.Load(g_googlenet, false);
auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1);
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims);
auto time3 = time();
executor.predict(input, dims);
executor.Predict(input, dims);
auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
return 0;
......@@ -129,7 +129,7 @@ int main() {
DLOG << "begin to run BatchNormOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(
/// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx1;
......@@ -116,7 +116,7 @@ int main() {
DLOG << "----------**********----------";
DLOG << "begin to run BoxCoderOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
auto program = loader.Load(std::string(g_mobilenet_ssd));
paddle_mobile::framework::Tensor priorbox;
SetupTensor<float>(&priorbox, {1917, 4}, static_cast<float>(0),
......@@ -57,7 +57,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
auto output = executor.predict<LoDTensor>(input_tensors, input_names,
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
......@@ -34,7 +34,7 @@ int main() {
// static_cast<float>(1));
auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
auto output = executor.predict(input, "data", "conv2d_0.tmp_0", out_ddim);
auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim);
auto output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) {
......@@ -50,7 +50,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224});
auto output = executor.predict<LoDTensor>(input_tensors, input_names,
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
......@@ -116,7 +116,7 @@ int main() {
DLOG << "begin to run Fc Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet"
auto program = loader.Load("../models/googlenet");
auto program = loader.Load(g_googlenet);
paddle_mobile::framework::ProgramOptimize optimize;
// program.originProgram->Description("origin");
auto optimize_program = optimize.FushionOptimize(program.originProgram);
......@@ -46,7 +46,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2});
auto output = executor.predict<LoDTensor>(input_tensors, input_names,
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
......@@ -50,7 +50,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 3});
auto output = executor.predict<LoDTensor>(input_tensors, input_names,
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
......@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h"
#include "../test_helper.h"
#include "common/io.h"
#include "io.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../models/googlenet"));
auto program = loader.Load(std::string(g_googlenet));
if (program.originProgram == nullptr) {
DLOG << "program read file";
......@@ -32,7 +32,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56});
auto output =
executor.predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim);
executor.Predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim);
float *output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) {
......@@ -127,7 +127,7 @@ int main() {
DLOG << "----------**********----------";
DLOG << "begin to run PriorBoxOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (1,3,300,300)
paddle_mobile::framework::Tensor input_image;
......@@ -46,7 +46,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
auto output = executor.predict<LoDTensor>(input_tensors, input_names,
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
......@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h"
#include "../test_helper.h"
#include "common/io.h"
#include "io.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) {
DLOG << "program read file";
......@@ -31,7 +31,7 @@ int main() {
auto input_ptr = input.data<float>();
auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
auto output =
executor.predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>();
DLOG << "input : ";
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "../../src/operators/kernel/sigmoid_kernel.h"
#include "../test_helper.h"
#include "common/io.h"
#include "io.h"
int main() {
paddle_mobile::framework::Tensor input;
......@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h"
#include "../test_helper.h"
#include "common/io.h"
#include "io.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../models/mobilenet"));
auto program = loader.Load(std::string(g_mobilenet));
if (program.originProgram == nullptr) {
DLOG << "program read file";
......@@ -30,7 +30,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000});
auto output =
executor.predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim);
executor.Predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) {
DLOG << " value of output: " << output_ptr[j];
......@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h"
#include "../test_helper.h"
#include "common/io.h"
#include "io.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) {
DLOG << "program read file";
......@@ -31,7 +31,7 @@ int main() {
auto input_ptr = input.data<float>();
auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2});
auto output =
executor.predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim);
executor.Predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>();
DLOG << "input : ";
......@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#include <chrono>
#include <fstream>
#include <random>
#include <fstream>
#include "common/log.h"
#include "framework/ddim.h"
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "./test_helper.h"
#include "common/enforce.h"
#include "common/io.h"
#include "io.h"
#include "common/log.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册