提交 410e5bdf 编写于 作者: E eclipsess

conflict

...@@ -14,6 +14,10 @@ limitations under the License. */ ...@@ -14,6 +14,10 @@ limitations under the License. */
#pragma once; #pragma once;
#include <string>
#include <unordered_map>
#include <utility>
namespace paddle_mobile { namespace paddle_mobile {
enum class Precision : int { FP32 = 0 }; enum class Precision : int { FP32 = 0 };
...@@ -67,4 +71,41 @@ enum PMStatus { ...@@ -67,4 +71,41 @@ enum PMStatus {
PMUnImplError = 0x07, /*!< Unimplement error. */ PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08 /*!< un-correct device. */ PMWrongDevice = 0x08 /*!< un-correct device. */
}; };
static const std::string G_OP_TYPE_CONV = "conv2d";
static const std::string G_OP_TYPE_BATCHNORM = "batch_norm";
static const std::string G_OP_TYPE_BOX_CODER = "box_coder";
static const std::string G_OP_TYPE_CONCAT = "concat";
static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "FusionConvAddRelu";
static const std::string G_OP_TYPE_FC = "fc";
static const std::string G_OP_TYPE_LRN = "lrn";
static const std::string G_OP_TYPE_MUL = "mul";
static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
static const std::string G_OP_TYPE_POOL2D = "pool2d";
static const std::string G_OP_TYPE_PRIOR_BOX = "prior_box";
static const std::string G_OP_TYPE_RELU = "relu";
static const std::string G_OP_TYPE_RESHAPE = "reshape";
static const std::string G_OP_TYPE_SIGMOID = "sigmoid";
static const std::string G_OP_TYPE_SOFTMAX = "softmax";
static const std::string G_OP_TYPE_TRANSPOSE = "transpose";
static const std::string G_OP_TYPE_SPLIT = "split";
static const std::string G_OP_TYPE_FEED = "feed";
static const std::string G_OP_TYPE_FETCH = "fetch";
static std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -23,6 +23,7 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const { ...@@ -23,6 +23,7 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const {
auto it = op_input_output_key.find(type_); auto it = op_input_output_key.find(type_);
if (it == op_input_output_key.end()) { if (it == op_input_output_key.end()) {
DLOG << type_ << " has no outputs"; DLOG << type_ << " has no outputs";
return {};
} }
return it->second.second; return it->second.second;
} }
......
...@@ -38,42 +38,46 @@ namespace paddle_mobile { ...@@ -38,42 +38,46 @@ namespace paddle_mobile {
namespace framework { namespace framework {
using std::string; using std::string;
using std::vector; using std::vector;
static std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {{"conv2d", {{"Input"}, {"Output"}}},
{"relu", {{"X"}, {"Out"}}},
{"softmax", {{"X"}, {"Out"}}},
{"mul", {{"X"}, {"Out"}}},
{"elementwise_add", {{"X", "Y"}, {"Out"}}},
{"pool2d", {{"X"}, {"Out"}}},
{"batch_norm", {{"X"}, {"Y"}}},
{"lrn", {{"X"}, {"Out"}}},
{"concat", {{"X"}, {"Out"}}},
{"feed", {{"X"}, {"Out"}}},
{"fetch", {{"X"}, {"Out"}}},
{"reshape", {{"X"}, {"Out"}}}};
template <typename Dtype> template <typename Dtype>
class OperatorBase : PaddleMobileObject { class OperatorBase : PaddleMobileObject {
public: public:
/*
* @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor
* */
OperatorBase(const std::string &type, const VariableNameMap &inputs, OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope); std::shared_ptr<Scope> scope);
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
void Run() const; void Run() const;
vector<string> GetOutKeys() const; std::vector<string> GetOutKeys() const;
virtual void RunImpl() const = 0; virtual void RunImpl() const = 0;
virtual void InferShape() const = 0;
/*
* @b op 运算所需的输入, 如上一层的输出结果、卷积核
* */
const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Inputs() const { return inputs_; }
/*
* @b op 的输出, 内存会提前被分配好, 运算结果会被存到分配好的内存内
* */
const VariableNameMap &Outputs() const { return outputs_; } const VariableNameMap &Outputs() const { return outputs_; }
/*
* @b op 类型
* */
const std::string &Type() const { return type_; } const std::string &Type() const { return type_; }
/*
* @b op 运算所需要用到的参数: 如 conv 运算所需要用到的 stride
* */
const AttributeMap &Attrs() const { return attrs_; } const AttributeMap &Attrs() const { return attrs_; }
void ClearVariables(const std::vector<std::string> &var_names) const { void ClearVariables(const std::vector<std::string> &var_names) const {
if (this->scope_) { if (this->scope_) {
this->scope_->EraseVars(var_names); this->scope_->EraseVars(var_names);
} }
} }
/*
* @b 根据输入形状和参数计算出输出形状
* */
virtual void InferShape() const = 0;
protected: protected:
std::shared_ptr<Scope> scope_; std::shared_ptr<Scope> scope_;
...@@ -86,6 +90,9 @@ class OperatorBase : PaddleMobileObject { ...@@ -86,6 +90,9 @@ class OperatorBase : PaddleMobileObject {
void CheckAllInputOutputSet() const; void CheckAllInputOutputSet() const;
}; };
/*
* @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase
* */
template <typename Dtype> template <typename Dtype>
class OperatorWithKernel : public OperatorBase<Dtype> { class OperatorWithKernel : public OperatorBase<Dtype> {
public: public:
...@@ -98,11 +105,18 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -98,11 +105,18 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
}; };
/*
* @b 所有kernel的父类
* */
template <typename Dtype, typename P> template <typename Dtype, typename P>
class OpKernelBase : PaddleMobileObject { class OpKernelBase : PaddleMobileObject {
public: public:
/*
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */
virtual void Compute(const P &para) const = 0; virtual void Compute(const P &para) const = 0;
virtual ~OpKernelBase() = default; virtual ~OpKernelBase() = default;
}; };
...@@ -119,8 +133,8 @@ class FusionOpMatcher : PaddleMobileObject { ...@@ -119,8 +133,8 @@ class FusionOpMatcher : PaddleMobileObject {
virtual std::string Type() = 0; virtual std::string Type() = 0;
virtual void FolderNodes(Node &node) { virtual void FolderNodes(Node *node) {
node.Folder(node_.Depth(), Type(), {}); node->Folder(node_.Depth(), Type(), {});
} }
virtual Node &BeginNode() { return node_; } virtual Node &BeginNode() { return node_; }
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <sstream> #include <sstream>
#include "framework/operator.h"
#include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/node.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -73,24 +74,86 @@ void Node::OpDescs(uint index, ...@@ -73,24 +74,86 @@ void Node::OpDescs(uint index,
} }
void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc, void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node) { Node *node, bool adding_thread, int thread_num) {
auto iter = std::find(op_desc->begin(), op_desc->end(), this->op_desc_); bool can_add_split = false;
if (outputs_.size() > 1) {
can_add_split = true;
if (op_input_output_key[op_desc_->type_].second.size() != 1) {
DLOG << "当前 op desc 输出数不为 1 ";
can_add_split = false;
}
for (const auto &output : outputs_) {
if (op_input_output_key.find(output->op_desc_->type_) !=
op_input_output_key.end()) {
auto inputs_and_outputs = op_input_output_key[output->op_desc_->type_];
auto outputs_of_output =
output->op_desc_->Output(inputs_and_outputs.second[0]);
auto inputs_of_output =
output->op_desc_->Input(inputs_and_outputs.first[0]);
for (int i = 0; i < inputs_of_output.size(); ++i) {
std::string input_of_output = inputs_of_output[i];
for (int j = 0; j < outputs_of_output.size(); ++j) {
std::string output_of_output = outputs_of_output[j];
if (input_of_output == output_of_output) {
DLOG << "output的 output 包含 input" << input_of_output;
can_add_split = false;
break;
}
}
}
} else {
DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
can_add_split = false;
}
}
}
if (inputs_.size() > 1 && node != inputs_.back()) { if (inputs_.size() > 1 && node != inputs_.back()) {
return; return;
} else if (inputs_.size() > 1 && node == inputs_.back()) { } else if (inputs_.size() > 1 && node == inputs_.back()) {
adding_thread = false;
op_desc->push_back(this->op_desc_); op_desc->push_back(this->op_desc_);
} else { } else {
op_desc->push_back(this->op_desc_); op_desc->push_back(this->op_desc_);
} }
if (adding_thread) {
Attribute attr;
attr.Set<int>(thread_num);
this->op_desc_->attrs_["thread"] = attr;
}
for (auto &output : outputs_) { if (can_add_split) {
output->OpDescs(op_desc, this); adding_thread = true;
std::shared_ptr<class OpDesc> split_op_desc =
std::make_shared<class OpDesc>();
split_op_desc->type_ = G_OP_TYPE_SPLIT;
auto outputs = this->op_desc_->Output(
op_input_output_key[this->op_desc_->Type()].second[0]);
split_op_desc->inputs_ = {
{op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}};
auto &split_outputs =
split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]];
for (const auto &output : outputs_) {
split_outputs.push_back(outputs[0]);
}
DLOG << "add split";
op_desc->push_back(split_op_desc);
}
for (int i = 0; i < outputs_.size(); ++i) {
auto &output = outputs_[i];
if (can_add_split) {
output->OpDescs(op_desc, this, adding_thread, i);
} else {
output->OpDescs(op_desc, this, adding_thread, thread_num);
}
} }
} }
std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() { std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
std::vector<std::shared_ptr<framework::OpDesc>> op_descs; std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
OpDescs(&op_descs, this); OpDescs(&op_descs, this, false, 0);
return op_descs; return op_descs;
} }
......
...@@ -42,13 +42,13 @@ class Node : PaddleMobileObject { ...@@ -42,13 +42,13 @@ class Node : PaddleMobileObject {
std::map<std::string, std::pair<std::string, std::string>> change_map); std::map<std::string, std::pair<std::string, std::string>> change_map);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size); std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(); std::vector<std::shared_ptr<framework::OpDesc>> OpDescs();
void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node);
std::shared_ptr<framework::OpDesc> OpDesc() { return op_desc_; } std::shared_ptr<framework::OpDesc> OpDesc() { return op_desc_; }
std::string BeginType() { return type_; } std::string BeginType() { return type_; }
void Description(); void Description();
private: private:
void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node, bool adding_thread, int thread_num);
void OpDescs(uint size, void OpDescs(uint size,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc); std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
void To(int index, std::shared_ptr<Node>); void To(int index, std::shared_ptr<Node>);
......
...@@ -19,7 +19,7 @@ namespace paddle_mobile { ...@@ -19,7 +19,7 @@ namespace paddle_mobile {
namespace framework { namespace framework {
std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {} // std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
std::shared_ptr<ProgramDesc> ori_des) { std::shared_ptr<ProgramDesc> ori_des) {
...@@ -86,7 +86,7 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( ...@@ -86,7 +86,7 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
// DLOG << " match success " << " fusion node: \n" << // DLOG << " match success " << " fusion node: \n" <<
// matcher->BeginNode() << "\nsub node: \n" << *sub_node; // matcher->BeginNode() << "\nsub node: \n" << *sub_node;
// DLOG << "match node\n"<< *match_node; // DLOG << "match node\n"<< *match_node;
matcher->FolderNodes(*match_node); matcher->FolderNodes(match_node.get());
// DLOG << " after match node\n"<< *match_node; // DLOG << " after match node\n"<< *match_node;
// match_node->Description(); // match_node->Description();
......
...@@ -27,7 +27,6 @@ namespace framework { ...@@ -27,7 +27,6 @@ namespace framework {
class ProgramOptimize { class ProgramOptimize {
public: public:
ProgramOptimize() {} ProgramOptimize() {}
std::shared_ptr<ProgramDesc> Optimize();
std::shared_ptr<ProgramDesc> FushionOptimize( std::shared_ptr<ProgramDesc> FushionOptimize(
std::shared_ptr<ProgramDesc> ori_des); std::shared_ptr<ProgramDesc> ori_des);
......
...@@ -15,11 +15,13 @@ limitations under the License. */ ...@@ -15,11 +15,13 @@ limitations under the License. */
#include "io.h" #include "io.h"
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
#include "common/enforce.h"
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/program_optimize.h"
#include "framework/program/program_desc.h" #include "framework/program/program_desc.h"
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
...@@ -166,7 +168,7 @@ void Loader<Dtype, P>::LoadVar(framework::Variable *variable, ...@@ -166,7 +168,7 @@ void Loader<Dtype, P>::LoadVar(framework::Variable *variable,
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &dirname) { const std::string &dirname, bool optimize) {
std::string model_filename = dirname + "/__model__"; std::string model_filename = dirname + "/__model__";
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL; uint8_t *buf = NULL;
...@@ -203,7 +205,6 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -203,7 +205,6 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
if (var_desc->Persistable() && if (var_desc->Persistable() &&
var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH && var_desc->Type() != framework::VARTYPE_TYPE_FEED_MINIBATCH &&
var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) { var_desc->Type() != framework::VARTYPE_TYPE_FETCH_LIST) {
// DLOG << "to load var ";
auto dim = var_desc->Tensor_desc().Dims(); auto dim = var_desc->Tensor_desc().Dims();
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(dim)); tensor->Resize(framework::make_ddim(dim));
...@@ -219,8 +220,13 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -219,8 +220,13 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
} }
} }
} }
// originProgramDesc->Description("program: ");
originProgramDesc->Description("program: "); if (optimize) {
framework::ProgramOptimize program_optimize;
program.optimizeProgram =
program_optimize.FushionOptimize(originProgramDesc);
}
paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
return program; return program;
...@@ -231,33 +237,9 @@ template class Loader<CPU, Precision::FP32>; ...@@ -231,33 +237,9 @@ template class Loader<CPU, Precision::FP32>;
#pragma mark - executor #pragma mark - executor
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p) : program_(p) { Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
if (use_optimize_) { bool use_optimize)
to_predict_program_ = program_.optimizeProgram; : program_(p), batch_size_(batch_size), use_optimize_(use_optimize) {
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<framework::BlockDesc>> blocks =
to_predict_program_->Blocks();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j];
auto op_base = framework::OpRegistry<Dtype>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope);
op_base->InferShape();
ops_of_block_[*block_desc.get()].push_back(op_base);
}
}
InitMemory();
}
template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size)
: program_(p), batch_size_(batch_size) {
if (use_optimize_) { if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram; to_predict_program_ = program_.optimizeProgram;
} else { } else {
...@@ -389,7 +371,7 @@ void Executor<Dtype, P>::InitMemory() { ...@@ -389,7 +371,7 @@ void Executor<Dtype, P>::InitMemory() {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::predict(const framework::Tensor &t, int block_id) { void Executor<Dtype, P>::Predict(const framework::Tensor &t, int block_id) {
framework::Variable *g_feed_value = program_.scope->Var("feed"); framework::Variable *g_feed_value = program_.scope->Var("feed");
framework::Tensor *feed_tensor = framework::Tensor *feed_tensor =
g_feed_value->GetMutable<framework::LoDTensor>(); g_feed_value->GetMutable<framework::LoDTensor>();
...@@ -404,11 +386,11 @@ void Executor<Dtype, P>::predict(const framework::Tensor &t, int block_id) { ...@@ -404,11 +386,11 @@ void Executor<Dtype, P>::predict(const framework::Tensor &t, int block_id) {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::predict( std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) { const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims)); framework::Tensor tensor(input, framework::make_ddim(dims));
predict(tensor, 0); Predict(tensor, 0);
framework::Variable *g_feed_value = program_.scope->Var("col"); framework::Variable *g_feed_value = program_.scope->Var("col");
auto feed_tensor = g_feed_value->GetMutable<framework::Tensor>(); auto feed_tensor = g_feed_value->GetMutable<framework::Tensor>();
......
...@@ -30,7 +30,8 @@ namespace paddle_mobile { ...@@ -30,7 +30,8 @@ namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32> template <typename Dtype, Precision P = Precision::FP32>
class Loader : PaddleMobileObject { class Loader : PaddleMobileObject {
public: public:
const framework::Program<Dtype, P> Load(const std::string &dirname); const framework::Program<Dtype, P> Load(const std::string &dirname,
bool optimize = true);
private: private:
void LoadVar(framework::Variable *variable, void LoadVar(framework::Variable *variable,
...@@ -45,13 +46,12 @@ class Executor { ...@@ -45,13 +46,12 @@ class Executor {
Executor() = default; Executor() = default;
Executor(const framework::Program<Dtype> p); Executor(const framework::Program<Dtype> p, int batch_size = 1,
bool use_optimize = true);
Executor(const framework::Program<Dtype> p, int batch_size); // std::shared_ptr<framework::Tensor> Predict(framework::Tensor &t);
std::shared_ptr<framework::Tensor> predict(framework::Tensor &t); std::vector<Ptype> Predict(const std::vector<Ptype> &input,
std::vector<Ptype> predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
protected: protected:
...@@ -61,7 +61,7 @@ class Executor { ...@@ -61,7 +61,7 @@ class Executor {
framework::Program<Dtype> program_; framework::Program<Dtype> program_;
int batch_size_ = 1; int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_; std::shared_ptr<framework::ProgramDesc> to_predict_program_;
void predict(const framework::Tensor &t, int block_id); void Predict(const framework::Tensor &t, int block_id);
std::map<framework::BlockDesc, std::map<framework::BlockDesc,
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>> std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_; ops_of_block_;
......
...@@ -21,13 +21,6 @@ limitations under the License. */ ...@@ -21,13 +21,6 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
int ConvOutputSize(int input_size, int filter_size, int dilation, int padding,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
return output_size;
}
template <typename Dtype, typename T> template <typename Dtype, typename T>
void ConvOp<Dtype, T>::InferShape() const { void ConvOp<Dtype, T>::InferShape() const {
// std::cout << " begin get dims: " << std::endl; // std::cout << " begin get dims: " << std::endl;
......
...@@ -44,5 +44,12 @@ class ConvOp : public framework::OperatorWithKernel<DeviceType> { ...@@ -44,5 +44,12 @@ class ConvOp : public framework::OperatorWithKernel<DeviceType> {
ConvParam param_; ConvParam param_;
}; };
inline int ConvOutputSize(int input_size, int filter_size, int dilation,
int padding, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
return output_size;
}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/depthwise_conv_op.h"
#include <vector>
#include "framework/data_type.h"
#include "framework/op_proto_maker.h"
#include "framework/op_registry.h"
#include "operators/conv_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void DepthwiseConvOp<Dtype, T>::InferShape() const {
auto in_dims = param_.Input()->dims();
auto filter_dims = param_.Filter()->dims();
const std::vector<int> &strides = param_.Strides();
std::vector<int> paddings = param_.Paddings();
int groups = param_.Groups();
std::vector<int> dilations = param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
param_.Output()->Resize(ddim);
}
template class DepthwiseConvOp<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
USE_OP(depthwise_conv2d);
REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/depthwise_conv_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class DepthwiseConvOp : public framework::OperatorWithKernel<DeviceType> {
public:
DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
scope),
param_(inputs, outputs, attrs, *scope) {}
using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
void InferShape() const override;
void RunImpl() const {
operators::DepthwiseConvKernel<DeviceType, T> kernel;
kernel.Compute(param_);
this->ClearVariables({"Filter", "Input"});
}
private:
ConvParam param_;
};
} // namespace operators
} // namespace paddle_mobile
...@@ -23,18 +23,18 @@ namespace operators { ...@@ -23,18 +23,18 @@ namespace operators {
class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher { class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
public: public:
FushionConvAddReluOpMatcher() { FushionConvAddReluOpMatcher() {
node_ = framework::Node("conv2d"); node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>("elementwise_add") > node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>("relu"); std::make_shared<framework::Node>(G_OP_TYPE_RELU);
} }
void FolderNodes(framework::Node &node) { void FolderNodes(framework::Node &node) {
std::vector<std::shared_ptr<framework::OpDesc>> origin_descs = std::vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node.OpDescs(node_.Depth()); node.OpDescs(node_.Depth());
node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); node.Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
} }
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; }
std::string Type() { return "FusionConvAddRelu"; }
}; };
class FusionFcOp { class FusionFcOp {
......
...@@ -28,17 +28,18 @@ using std::vector; ...@@ -28,17 +28,18 @@ using std::vector;
class FusionFcMatcher : public framework::FusionOpMatcher { class FusionFcMatcher : public framework::FusionOpMatcher {
public: public:
FusionFcMatcher() { FusionFcMatcher() {
node_ = framework::Node("mul"); node_ = framework::Node(G_OP_TYPE_MUL);
node_ > std::make_shared<framework::Node>("elementwise_add"); node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
} }
void FolderNodes(framework::Node &node) { void FolderNodes(framework::Node &node) {
vector<std::shared_ptr<framework::OpDesc>> origin_descs = vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node.OpDescs(node_.Depth()); node.OpDescs(node_.Depth());
node.Folder(node_.Depth(), Type(), {{"elementwise_add", {"Y", "Z"}}}); node.Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
} }
std::string Type() { return "fc"; } std::string Type() { return G_OP_TYPE_FC; }
}; };
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
......
...@@ -17,19 +17,6 @@ limitations under the License. */ ...@@ -17,19 +17,6 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
bool IsExpand(const std::vector<int64_t> &filter_dim,
const std::vector<int> &strides, const std::vector<int> &paddings,
const std::vector<int> &dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
template <> template <>
void ConvKernel<CPU, float>::Compute(const ConvParam &param) const { void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
LOG(kLOG_DEBUG) << param; LOG(kLOG_DEBUG) << param;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/depthwise_conv_kernel.h"
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam &param) const {
LOG(kLOG_DEBUG) << param;
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
DLOG << " compute end get Attrs " << strides[0];
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
DLOG << " col_shape = " << col_shape;
DLOG << " col_matrix_shape = " << col_matrix_shape;
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
DLOG << " input_shape = " << input_shape;
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
DLOG << " filter.dims() = " << filter.dims();
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
DLOG << " in_batch.dims() = " << in_batch.dims();
DLOG << " out_batch.dims() = " << out_batch.dims();
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
DLOG << " out_slice " << out_slice.dims();
DLOG << " filter_slice " << filter_slice.dims();
DLOG << " col_matrix " << col_matrix.dims();
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(0));
auto filter_ptr = filter_slice.data<float>();
}
}
}
template class DepthwiseConvKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
...@@ -25,6 +25,9 @@ struct ReluFunctor { ...@@ -25,6 +25,9 @@ struct ReluFunctor {
inline T operator()(T in) const { return in > 0 ? in : 0; } inline T operator()(T in) const { return in > 0 ? in : 0; }
}; };
/*
* @b 特化到具体平台的实现, param 从 op 层传入
* */
template <> template <>
void ReluKernel<CPU, float>::Compute(const ReluParam &param) const { void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
const auto *input_x = param.InputX(); const auto *input_x = param.InputX();
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/math/im2col.h" #include "operators/math/im2col.h"
#include "operators/math/math_function.h" #include "operators/math/math_function.h"
...@@ -23,12 +24,28 @@ limitations under the License. */ ...@@ -23,12 +24,28 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using namespace framework; using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class ConvKernel : public framework::OpKernelBase<DeviceType, ConvParam> { class ConvKernel : public OpKernelBase<DeviceType, ConvParam> {
public: public:
void Compute(const ConvParam &param) const; void Compute(const ConvParam &param) const;
}; };
inline bool IsExpand(const std::vector<int64_t> &filter_dim,
const std::vector<int> &strides,
const std::vector<int> &paddings,
const std::vector<int> &dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
#pragma once;
namespace paddle_mobile {
namespace operators {
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class DepthwiseConvKernel : public OpKernelBase<DeviceType, ConvParam> {
public:
void Compute(const ConvParam &param) const;
};
} // namespace operators
} // namespace paddle_mobile
...@@ -696,6 +696,9 @@ class ReshapeParam : public OpParam { ...@@ -696,6 +696,9 @@ class ReshapeParam : public OpParam {
bool inplace_; bool inplace_;
}; };
/*
* @b op 层实例化好这个 param 传递给 kernel 层使用
* */
class ReluParam : public OpParam { class ReluParam : public OpParam {
public: public:
ReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, ReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
...@@ -725,7 +728,6 @@ class FushionFcParam : public OpParam { ...@@ -725,7 +728,6 @@ class FushionFcParam : public OpParam {
y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs); y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
axis_ = GetAttr<int>("axis", attrs); axis_ = GetAttr<int>("axis", attrs);
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
const Tensor *InputY() const { return input_y_; } const Tensor *InputY() const { return input_y_; }
......
...@@ -25,6 +25,11 @@ template class ReluOp<CPU, float>; ...@@ -25,6 +25,11 @@ template class ReluOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
/*
* @b 每一个 op 都需要注册一下的,
* USE_OP的参数 和 REGISTER_OPERATOR的第一个参数
* 都是需要和model中类型对应起来的
* */
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
USE_OP(relu); USE_OP(relu);
REGISTER_OPERATOR(relu, ops::ReluOp); REGISTER_OPERATOR(relu, ops::ReluOp);
...@@ -28,6 +28,9 @@ using paddle_mobile::framework::Tensor; ...@@ -28,6 +28,9 @@ using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class ReluOp : public framework::OperatorWithKernel<DeviceType> { class ReluOp : public framework::OperatorWithKernel<DeviceType> {
public: public:
/*
* @b op 的实例化方法, 需要调用父类的实例化方法, 以及实例化自己的参数结构体
* */
ReluOp(const std::string &type, const VariableNameMap &inputs, ReluOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
...@@ -35,6 +38,9 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> { ...@@ -35,6 +38,9 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
scope), scope),
param_(inputs, outputs, attrs, *scope) {} param_(inputs, outputs, attrs, *scope) {}
/*
* @b op 进行运算, 调用相应的 kernel 进行运算
* */
void RunImpl() const { void RunImpl() const {
operators::ReluKernel<DeviceType, T> kernel; operators::ReluKernel<DeviceType, T> kernel;
kernel.Compute(param_); kernel.Compute(param_);
...@@ -44,6 +50,10 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> { ...@@ -44,6 +50,10 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
void InferShape() const override; void InferShape() const override;
protected: protected:
/*
* @b Relu kernel 进行运算时所需要用到参数的结构体,
* 结构体定义在: paddle-mobile/src/operators/op_param.h
* */
ReluParam param_; ReluParam param_;
}; };
......
...@@ -99,3 +99,7 @@ target_link_libraries(test-mobilenet paddle-mobile) ...@@ -99,3 +99,7 @@ target_link_libraries(test-mobilenet paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) target_link_libraries(test-sigmoid paddle-mobile)
# gen test
ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-depthwise-conv-op paddle-mobile)
...@@ -17,9 +17,9 @@ limitations under the License. */ ...@@ -17,9 +17,9 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "common/io.h"
#include "common/log.h" #include "common/log.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "io.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/elementwise_add_op.h" #include "operators/elementwise_add_op.h"
#include "operators/pool_op.h" #include "operators/pool_op.h"
...@@ -73,10 +73,11 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -73,10 +73,11 @@ class Executor4Test : public Executor<DeviceType> {
} }
} }
} }
this->InitMemory();
} }
template <typename T = LoDTensor> template <typename T = LoDTensor>
vector<std::shared_ptr<Tensor>> predict(const vector<Tensor> &ts, vector<std::shared_ptr<Tensor>> Predict(const vector<Tensor> &ts,
const vector<string> &input_names, const vector<string> &input_names,
const vector<string> &output_names, const vector<string> &output_names,
const vector<DDim> &ddims) { const vector<DDim> &ddims) {
...@@ -115,7 +116,7 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -115,7 +116,7 @@ class Executor4Test : public Executor<DeviceType> {
return output_tensor_sptrs; return output_tensor_sptrs;
} }
std::shared_ptr<Tensor> predict(const Tensor &t, string input, string output, std::shared_ptr<Tensor> Predict(const Tensor &t, string input, string output,
const DDim &dDim) { const DDim &dDim) {
auto scope = this->program_.scope; auto scope = this->program_.scope;
Variable *g_feed_value = scope->Var(input); Variable *g_feed_value = scope->Var(input);
......
...@@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "common/io.h" #include "../test_helper.h"
#include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto program = loader.Load(std::string("../models/googlenet")); auto program = loader.Load(g_googlenet);
program.optimizeProgram->Description("program desc: ");
return 0; return 0;
} }
...@@ -12,14 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "common/io.h" #include "../test_helper.h"
#include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/node.h"
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet" // "../../../test/models/googlenet"
auto program = loader.Load("../models/googlenet"); auto program = loader.Load(g_googlenet);
paddle_mobile::framework::ProgramOptimize optimize; paddle_mobile::framework::ProgramOptimize optimize;
// program.originProgram->Description("origin"); // program.originProgram->Description("origin");
auto optimize_program = optimize.FushionOptimize(program.originProgram); auto optimize_program = optimize.FushionOptimize(program.originProgram);
......
...@@ -21,16 +21,16 @@ int main() { ...@@ -21,16 +21,16 @@ int main() {
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto time1 = time(); auto time1 = time();
auto program = loader.Load(std::string("../models/googlenet")); auto program = loader.Load(g_googlenet, false);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); GetInput<float>(g_test_image_1x3x224x224, &input, dims);
auto time3 = time(); auto time3 = time();
executor.predict(input, dims); executor.Predict(input, dims);
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
return 0; return 0;
......
...@@ -19,10 +19,10 @@ limitations under the License. */ ...@@ -19,10 +19,10 @@ limitations under the License. */
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto time1 = time(); auto time1 = time();
auto program = loader.Load(g_mobilenet); auto program = loader.Load(g_mobilenet, false);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
Tensor input_tensor; Tensor input_tensor;
...@@ -32,7 +32,7 @@ int main() { ...@@ -32,7 +32,7 @@ int main() {
std::vector<float> input(input_tensor.data<float>(), std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel()); input_tensor.data<float>() + input_tensor.numel());
auto time3 = time(); auto time3 = time();
executor.predict(input, dims); executor.Predict(input, dims);
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
return 0; return 0;
......
...@@ -21,10 +21,10 @@ int main() { ...@@ -21,10 +21,10 @@ int main() {
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto time1 = time(); auto time1 = time();
auto program = loader.Load(g_yolo); auto program = loader.Load(g_yolo, false);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
std::vector<int64_t> dims{1, 3, 227, 227}; std::vector<int64_t> dims{1, 3, 227, 227};
Tensor input_tensor; Tensor input_tensor;
...@@ -34,7 +34,7 @@ int main() { ...@@ -34,7 +34,7 @@ int main() {
std::vector<float> input(input_tensor.data<float>(), std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel()); input_tensor.data<float>() + input_tensor.numel());
auto time3 = time(); auto time3 = time();
executor.predict(input, dims); executor.Predict(input, dims);
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
return 0; return 0;
......
...@@ -128,8 +128,7 @@ int main() { ...@@ -128,8 +128,7 @@ int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run BatchNormOp Test"; DLOG << "begin to run BatchNormOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string( auto program = loader.Load(std::string(g_resnet));
"../../test/models/image_classification_resnet.inference.model"));
/// input x (4,10,2,2) /// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx1; paddle_mobile::framework::Tensor inputx1;
......
...@@ -116,7 +116,7 @@ int main() { ...@@ -116,7 +116,7 @@ int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run BoxCoderOp Test"; DLOG << "begin to run BoxCoderOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); auto program = loader.Load(std::string(g_mobilenet_ssd));
paddle_mobile::framework::Tensor priorbox; paddle_mobile::framework::Tensor priorbox;
SetupTensor<float>(&priorbox, {1917, 4}, static_cast<float>(0), SetupTensor<float>(&priorbox, {1917, 4}, static_cast<float>(0),
......
...@@ -57,7 +57,7 @@ int main() { ...@@ -57,7 +57,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2}); auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
out_ddims.push_back(out_ddim); out_ddims.push_back(out_ddim);
auto output = executor.predict<LoDTensor>(input_tensors, input_names, auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims); output_names, out_ddims);
auto output0_data = output[0]->data<float>(); auto output0_data = output[0]->data<float>();
......
...@@ -34,7 +34,7 @@ int main() { ...@@ -34,7 +34,7 @@ int main() {
// static_cast<float>(1)); // static_cast<float>(1));
auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
auto output = executor.predict(input, "data", "conv2d_0.tmp_0", out_ddim); auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim);
auto output_ptr = output->data<float>(); auto output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) { for (int j = 0; j < output->numel(); ++j) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../executor_for_test.h"
#include "../test_include.h"
#include "operators/depthwise_conv_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_mobilenet_ssd);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU, paddle_mobile::operators::DepthwiseConvOp<
paddle_mobile::CPU, float>>
executor(program, "depthwise_conv2d");
paddle_mobile::framework::LoDTensor input;
// GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
// use SetupTensor if not has local input image .
SetupTensor<float>(&input, {1, 32, 150, 150}, static_cast<float>(0),
static_cast<float>(1));
auto input_ptr = input.data<float>();
auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 150, 150});
auto output = executor.Predict(input, "batch_norm_0.tmp_3",
"depthwise_conv2d_0.tmp_0", out_ddim);
auto output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) {
DLOG << " value of output: " << output_ptr[j];
}
return 0;
}
...@@ -50,7 +50,7 @@ int main() { ...@@ -50,7 +50,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 224, 224});
out_ddims.push_back(out_ddim); out_ddims.push_back(out_ddim);
auto output = executor.predict<LoDTensor>(input_tensors, input_names, auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims); output_names, out_ddims);
auto output0_data = output[0]->data<float>(); auto output0_data = output[0]->data<float>();
......
...@@ -116,7 +116,7 @@ int main() { ...@@ -116,7 +116,7 @@ int main() {
DLOG << "begin to run Fc Test"; DLOG << "begin to run Fc Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet" // "../../../test/models/googlenet"
auto program = loader.Load("../models/googlenet"); auto program = loader.Load(g_googlenet);
paddle_mobile::framework::ProgramOptimize optimize; paddle_mobile::framework::ProgramOptimize optimize;
// program.originProgram->Description("origin"); // program.originProgram->Description("origin");
auto optimize_program = optimize.FushionOptimize(program.originProgram); auto optimize_program = optimize.FushionOptimize(program.originProgram);
......
...@@ -46,7 +46,7 @@ int main() { ...@@ -46,7 +46,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2}); auto out_ddim = paddle_mobile::framework::make_ddim({3, 4, 2, 2});
out_ddims.push_back(out_ddim); out_ddims.push_back(out_ddim);
auto output = executor.predict<LoDTensor>(input_tensors, input_names, auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims); output_names, out_ddims);
auto output0_data = output[0]->data<float>(); auto output0_data = output[0]->data<float>();
......
...@@ -50,7 +50,7 @@ int main() { ...@@ -50,7 +50,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({3, 3}); auto out_ddim = paddle_mobile::framework::make_ddim({3, 3});
out_ddims.push_back(out_ddim); out_ddims.push_back(out_ddim);
auto output = executor.predict<LoDTensor>(input_tensors, input_names, auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims); output_names, out_ddims);
auto output0_data = output[0]->data<float>(); auto output0_data = output[0]->data<float>();
......
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h" #include "../executor_for_test.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "common/io.h" #include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../models/googlenet")); auto program = loader.Load(std::string(g_googlenet));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
} }
...@@ -32,7 +32,7 @@ int main() { ...@@ -32,7 +32,7 @@ int main() {
static_cast<float>(1)); static_cast<float>(1));
auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 56, 56});
auto output = auto output =
executor.predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim); executor.Predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0", out_ddim);
float *output_ptr = output->data<float>(); float *output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) { for (int j = 0; j < output->numel(); ++j) {
......
...@@ -127,7 +127,7 @@ int main() { ...@@ -127,7 +127,7 @@ int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run PriorBoxOp Test"; DLOG << "begin to run PriorBoxOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (1,3,300,300) /// input x (1,3,300,300)
paddle_mobile::framework::Tensor input_image; paddle_mobile::framework::Tensor input_image;
......
...@@ -46,7 +46,7 @@ int main() { ...@@ -46,7 +46,7 @@ int main() {
auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 2, 3, 4});
out_ddims.push_back(out_ddim); out_ddims.push_back(out_ddim);
auto output = executor.predict<LoDTensor>(input_tensors, input_names, auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims); output_names, out_ddims);
auto output0_data = output[0]->data<float>(); auto output0_data = output[0]->data<float>();
......
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h" #include "../executor_for_test.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "common/io.h" #include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
} }
...@@ -31,7 +31,7 @@ int main() { ...@@ -31,7 +31,7 @@ int main() {
auto input_ptr = input.data<float>(); auto input_ptr = input.data<float>();
auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2}); auto out_ddim = paddle_mobile::framework::make_ddim({2, 9, 2});
auto output = auto output =
executor.predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim); executor.Predict(input, "transpose_0.tmp_0", "reshape_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>(); auto *output_ptr = output->data<float>();
DLOG << "input : "; DLOG << "input : ";
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "../../src/operators/kernel/sigmoid_kernel.h" #include "../../src/operators/kernel/sigmoid_kernel.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "common/io.h" #include "io.h"
int main() { int main() {
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
......
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h" #include "../executor_for_test.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "common/io.h" #include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../models/mobilenet")); auto program = loader.Load(std::string(g_mobilenet));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
} }
...@@ -30,7 +30,7 @@ int main() { ...@@ -30,7 +30,7 @@ int main() {
static_cast<float>(1)); static_cast<float>(1));
auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 1000});
auto output = auto output =
executor.predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim); executor.Predict(input, "reshape_0.tmp_0", "softmax_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>(); auto *output_ptr = output->data<float>();
for (int j = 0; j < output->numel(); ++j) { for (int j = 0; j < output->numel(); ++j) {
DLOG << " value of output: " << output_ptr[j]; DLOG << " value of output: " << output_ptr[j];
......
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#include "../executor_for_test.h" #include "../executor_for_test.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "common/io.h" #include "io.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
} }
...@@ -31,7 +31,7 @@ int main() { ...@@ -31,7 +31,7 @@ int main() {
auto input_ptr = input.data<float>(); auto input_ptr = input.data<float>();
auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2}); auto out_ddim = paddle_mobile::framework::make_ddim({1, 3, 4, 2});
auto output = auto output =
executor.predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim); executor.Predict(input, "conv2d_22.tmp_1", "transpose_0.tmp_0", out_ddim);
auto *output_ptr = output->data<float>(); auto *output_ptr = output->data<float>();
DLOG << "input : "; DLOG << "input : ";
......
...@@ -20,7 +20,6 @@ limitations under the License. */ ...@@ -20,7 +20,6 @@ limitations under the License. */
#include "./test_helper.h" #include "./test_helper.h"
#include "common/enforce.h" #include "common/enforce.h"
#include "common/io.h"
#include "common/log.h" #include "common/log.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h" #include "framework/operator.h"
...@@ -30,3 +29,4 @@ limitations under the License. */ ...@@ -30,3 +29,4 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/variable.h" #include "framework/variable.h"
#include "io.h"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册