未验证 提交 ad3844d6 编写于 作者: H Houjiang Chen 提交者: GitHub

Merge pull request #1495 from hjchen2/backup

支持attention模型、重构sgemm和depthwise conv3x3、实现winograd和depthwise conv5x5 v8版本 
...@@ -23,7 +23,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) ...@@ -23,7 +23,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/) include_directories(src/)
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
if(IS_IOS) if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \ set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
......
...@@ -31,7 +31,8 @@ namespace paddle_mobile { ...@@ -31,7 +31,8 @@ namespace paddle_mobile {
#ifdef ANDROID #ifdef ANDROID
extern const char *ANDROID_LOG_TAG; static const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
#define ANDROIDLOGI(...) \ #define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \ __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
......
...@@ -37,8 +37,7 @@ template <typename Dtype> ...@@ -37,8 +37,7 @@ template <typename Dtype>
using OpCreator = std::function<framework::OperatorBase<Dtype> *( using OpCreator = std::function<framework::OperatorBase<Dtype> *(
const std::string & /*type*/, const VariableNameMap & /*inputs*/, const std::string & /*type*/, const VariableNameMap & /*inputs*/,
const VariableNameMap & /*outputs*/, const VariableNameMap & /*outputs*/,
const framework::AttributeMap & /*attrs*/, const framework::AttributeMap & /*attrs*/, framework::Scope * /*scope*/)>;
std::shared_ptr<framework::Scope> /*scope*/)>;
using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/, using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
framework::BlockDesc * /*block*/)>; framework::BlockDesc * /*block*/)>;
......
文件模式从 100755 更改为 100644
...@@ -205,6 +205,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; ...@@ -205,6 +205,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_PAD2D;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key; op_input_output_key;
......
...@@ -91,7 +91,14 @@ class Attribute { ...@@ -91,7 +91,14 @@ class Attribute {
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: {
attr.Set<int>(attr_desc->block_idx); break;
}
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS: {
vector<int> val(attr_desc->n_longs);
for (int i = 0; i < attr_desc->n_longs; ++i) {
val[i] = attr_desc->longs[i];
}
attr.Set<vector<int>>(val);
break; break;
} }
default: default:
...@@ -139,6 +146,14 @@ class Attribute { ...@@ -139,6 +146,14 @@ class Attribute {
return vistor(attr.variant_.Get<vector<bool>>()); return vistor(attr.variant_.Get<vector<bool>>());
} else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) { } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
return vistor(attr.variant_.Get<int64_t>()); return vistor(attr.variant_.Get<int64_t>());
} else if (attr.variant_.TypeId() ==
typeid(framework::BlockDesc *).hash_code()) {
return vistor(attr.variant_.Get<framework::BlockDesc *>());
} else if (attr.variant_.TypeId() ==
typeid(vector<framework::BlockDesc *>).hash_code()) {
return vistor(attr.variant_.Get<vector<framework::BlockDesc *>>());
} else if (attr.variant_.TypeId() == typeid(vector<int64_t>).hash_code()) {
return vistor(attr.variant_.Get<vector<int64_t>>());
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("type not support"); PADDLE_MOBILE_THROW_EXCEPTION("type not support");
} }
...@@ -146,7 +161,8 @@ class Attribute { ...@@ -146,7 +161,8 @@ class Attribute {
private: private:
Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool, Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
vector<bool>, BlockDesc *, int64_t> vector<bool>, BlockDesc *, vector<BlockDesc *>, int64_t,
vector<int64_t>>
variant_; variant_;
}; };
......
...@@ -42,6 +42,7 @@ inline DataLayout StringToDataLayout(const std::string &str) { ...@@ -42,6 +42,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str()) PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
} }
return DataLayout::kNCHW;
} }
inline std::string DataLayoutToString(const DataLayout &data_layout) { inline std::string DataLayoutToString(const DataLayout &data_layout) {
......
...@@ -82,6 +82,8 @@ struct Dim<0> { ...@@ -82,6 +82,8 @@ struct Dim<0> {
int64_t &operator[](int idx); int64_t &operator[](int idx);
int64_t operator[](int idx) const; int64_t operator[](int idx) const;
int64_t head;
}; };
namespace { namespace {
...@@ -131,6 +133,7 @@ int64_t &indexer(Dim<D> &dim, int idx) { ...@@ -131,6 +133,7 @@ int64_t &indexer(Dim<D> &dim, int idx) {
template <> template <>
int64_t &indexer<0>(Dim<0> &dim, int idx) { int64_t &indexer<0>(Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
return dim.head;
} }
template <int D> template <int D>
...@@ -147,6 +150,7 @@ int64_t indexer(const Dim<D> &dim, int idx) { ...@@ -147,6 +150,7 @@ int64_t indexer(const Dim<D> &dim, int idx) {
template <> template <>
int64_t indexer<0>(const Dim<0> &dim, int idx) { int64_t indexer<0>(const Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
return dim.head;
} }
} // namespace } // namespace
......
...@@ -57,32 +57,30 @@ Executor<Device, T>::Executor(const Program<Device> &program, ...@@ -57,32 +57,30 @@ Executor<Device, T>::Executor(const Program<Device> &program,
PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr, PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
"program_desc_ should not be nullptr"); "program_desc_ should not be nullptr");
const auto &blocks = program_desc_->Blocks(); const auto &blocks = program_desc_->Blocks();
ops_of_block_.resize(blocks.size());
std::shared_ptr<BlockDesc> block_desc = blocks[0];
for (int i = 0; i < blocks.size(); ++i) { std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
std::shared_ptr<BlockDesc> block_desc = blocks[i]; for (int j = 0; j < ops.size(); ++j) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops(); std::shared_ptr<OpDesc> op_desc = ops[j];
for (int j = 0; j < ops.size(); ++j) { DLOG << "create op: " << op_desc->Type();
std::shared_ptr<OpDesc> op_desc = ops[j];
DLOG << "create op: " << op_desc->Type(); auto op_handler = OpRegistry<Device>::CreateOp(
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
auto op_handler = OpRegistry<Device>::CreateOp( op_desc->GetAttrMap(), program_.scope.get());
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(), // infer shape to reshape inputs and outputs before predict,
op_desc->GetAttrMap(), program_.scope); // but for lod mode, it still need to infer shape in runtime
// infer shape to reshape inputs and outputs before predict, if (!lod_mode) {
// but for lod mode, it still need to infer shape in runtime op_handler->InferShape();
if (!lod_mode) {
op_handler->InferShape();
}
ops_of_block_[i].push_back(op_handler);
} }
ops_of_block0_.push_back(op_handler);
} }
if (program_.combined) { if (program_.combined) {
InitCombineMemory(); InitCombineMemory();
} else { } else {
InitMemory(); InitMemory();
} }
// resize feed and fetch list
InitFeedFetchList();
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
program_.scope->EraseVars({"feed", "fetch"}); program_.scope->EraseVars({"feed", "fetch"});
...@@ -90,13 +88,37 @@ Executor<Device, T>::Executor(const Program<Device> &program, ...@@ -90,13 +88,37 @@ Executor<Device, T>::Executor(const Program<Device> &program,
#endif #endif
int count = 0; int count = 0;
for (int block_id = 0; block_id < ops_of_block_.size(); ++block_id) { for (auto &op_handler : ops_of_block0_) {
for (auto &op_handler : ops_of_block_[block_id]) { DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type(); op_handler->Init();
op_handler->Init(); }
ops_list_.push_back(op_handler); }
template <typename Device, typename T>
void Executor<Device, T>::InitFeedFetchList() {
std::unordered_map<std::string, int> feed_indices, fetch_indices;
for (const auto &block : program_desc_->Blocks()) {
for (const auto &op_desc : block->Ops()) {
if (op_desc->Type() == "feed") {
std::string name = op_desc->Output("Out")[0];
feed_indices[name] = op_desc->GetAttr("col").Get<int>();
} else if (op_desc->Type() == "fetch") {
std::string name = op_desc->Input("X")[0];
fetch_indices[name] = op_desc->GetAttr("col").Get<int>();
}
} }
} }
feed_indices_.swap(feed_indices);
fetch_indices_.swap(fetch_indices);
auto *feed_var = program_.scope->Var("feed");
auto *feed_list = feed_var->template GetMutable<framework::LoDTensorArray>();
feed_list->resize(feed_indices_.size());
auto *fetch_var = program_.scope->Var("fetch");
auto *fetch_list =
fetch_var->template GetMutable<framework::LoDTensorArray>();
fetch_list->resize(fetch_indices_.size());
} }
template <typename T> template <typename T>
...@@ -181,20 +203,20 @@ void Executor<Device, T>::InitMemory() { ...@@ -181,20 +203,20 @@ void Executor<Device, T>::InitMemory() {
for (const auto &block : program_desc_->Blocks()) { for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name()); auto var = program_.scope->Var(var_desc->Name());
auto tensor = var->template GetMutable<LoDTensor>();
if (var_desc->Persistable()) { if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
var->template GetMutable<framework::LoDTensorArray>();
continue; continue;
} }
char *origin_data = char *origin_data =
ReadFileToBuff(program_.model_path + "/" + var_desc->Name()); ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
char *data = origin_data; char *data = origin_data;
auto tensor = var->template GetMutable<LoDTensor>();
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor); LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
delete[] origin_data; delete[] origin_data;
} else { } else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { DLOG << "init no persistable var: " << var_desc->Name();
varInputMemory(var_desc, var, tensor); varInputMemory(var_desc, var);
}
} }
} }
} }
...@@ -216,23 +238,18 @@ void Executor<Device, T>::InitCombineMemory() { ...@@ -216,23 +238,18 @@ void Executor<Device, T>::InitCombineMemory() {
for (const auto &block : program_desc_->Blocks()) { for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name()); auto var = program_.scope->Var(var_desc->Name());
auto tensor = var->template GetMutable<LoDTensor>();
if (var_desc->Persistable()) { if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
var->template GetMutable<framework::LoDTensorArray>();
continue; continue;
} }
DLOG << " init combine memory persistable: " << var_desc->Name(); DLOG << " init combine memory persistable: " << var_desc->Name();
auto tensor = var->template GetMutable<LoDTensor>();
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor); LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
} else { } else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { DLOG << " init combine memory no persistable: " << var_desc->Name();
DLOG << " init combine memory no persistable in lod: " varInputMemory(var_desc, var);
<< var_desc->Name();
varInputMemory(var_desc, var, tensor);
} else {
DLOG << " init combine memory no persistable: " << var_desc->Name();
}
} }
} }
} }
...@@ -250,6 +267,7 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) { ...@@ -250,6 +267,7 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) {
auto tensor = var->template GetMutable<LoDTensor>(); auto tensor = var->template GetMutable<LoDTensor>();
if (var_desc->Persistable()) { if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
var->template GetMutable<framework::LoDTensorArray>();
continue; continue;
} }
} else { } else {
...@@ -260,6 +278,9 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) { ...@@ -260,6 +278,9 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) {
input_tensor.dims()[3]}); input_tensor.dims()[3]});
tensor->Resize(new_dim); tensor->Resize(new_dim);
tensor->template mutable_data<T>(); tensor->template mutable_data<T>();
} else {
PADDLE_MOBILE_THROW_EXCEPTION("Unsupported var type `%d`",
var_desc->Type());
} }
} }
} }
...@@ -272,34 +293,44 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) { ...@@ -272,34 +293,44 @@ void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) {
template <typename Device, typename T> template <typename Device, typename T>
bool Executor<Device, T>::varInputMemory( bool Executor<Device, T>::varInputMemory(
const std::shared_ptr<VarDesc> &var_desc, Variable *var, const std::shared_ptr<VarDesc> &var_desc, Variable *var) const {
LoDTensor *tensor) const {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>();
tensor->init(typeid(float)); tensor->init(typeid(float));
return true; return true;
#endif #endif
auto type = var_desc->Tensor_desc().DataType(); auto TypeId = [](const VarType_Type &type) -> std::type_index {
switch (type) { switch (type) {
case VARTYPE_TYPE_FP32: case VARTYPE_TYPE_BOOL:
tensor->mutable_data<float>(); return typeid(bool);
break; case VARTYPE_TYPE_FP32:
case VARTYPE_TYPE_INT8: return typeid(float);
tensor->mutable_data<int8_t>(); case VARTYPE_TYPE_INT8:
break; return typeid(int8_t);
case VARTYPE_TYPE_INT32: case VARTYPE_TYPE_INT32:
tensor->mutable_data<int32_t>(); return typeid(int);
break; case VARTYPE_TYPE_INT64:
case VARTYPE_TYPE_INT64: return typeid(int64_t);
tensor->mutable_data<int64_t>(); default:
break; PADDLE_MOBILE_THROW_EXCEPTION("got unhandled var type `%d`", type);
default: }
break; };
auto type = var_desc->Type();
if (type == VARTYPE_TYPE_LOD_TENSOR) {
auto data_type = var_desc->Tensor_desc().DataType();
framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>();
tensor->mutable_data(TypeId(data_type));
} else if (type == VARTYPE_TYPE_STEP_SCOPES) {
std::vector<framework::Scope *> *step_scopes =
var->template GetMutable<std::vector<framework::Scope *>>();
} else if (type == VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY) {
framework::LoDTensorArray *tensor_array =
var->template GetMutable<framework::LoDTensorArray>();
} else {
PADDLE_MOBILE_THROW_EXCEPTION("got unhandled var type `%d`", type);
} }
bool is_mute_match = return true;
(type == VARTYPE_TYPE_FP32) || (type == VARTYPE_TYPE_INT8) ||
(type == VARTYPE_TYPE_INT32) || (type == VARTYPE_TYPE_INT64);
PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type);
return is_mute_match;
} }
template <typename Device, typename T> template <typename Device, typename T>
...@@ -323,11 +354,19 @@ PMStatus Executor<Device, T>::Predict( ...@@ -323,11 +354,19 @@ PMStatus Executor<Device, T>::Predict(
template <typename Device, typename T> template <typename Device, typename T>
std::vector<T> Executor<Device, T>::Predict(const std::vector<T> &input, std::vector<T> Executor<Device, T>::Predict(const std::vector<T> &input,
const std::vector<int64_t> &dims) { const std::vector<int64_t> &dims) {
PADDLE_MOBILE_ENFORCE(feed_indices_.size() != 0,
"We don't know which tensor should be assign, since no "
"feed op found in this model");
PADDLE_MOBILE_ENFORCE(fetch_indices_.size() != 0,
"We don't know which tensor should be fetch out, since "
"no fetch op found in this model");
std::string input_name = feed_indices_.begin()->first;
Tensor feed_tensor(input, make_ddim(dims)); Tensor feed_tensor(input, make_ddim(dims));
SetInput(feed_tensor, "feed"); SetInput(feed_tensor, input_name);
std::vector<T> output; std::vector<T> output;
if (this->Predict() == PMSuccess) { if (this->Predict() == PMSuccess) {
const auto output_tensor = GetOutput("fetch"); std::string output_name = fetch_indices_.begin()->first;
const auto output_tensor = GetOutput(output_name);
output.resize(output_tensor->numel()); output.resize(output_tensor->numel());
memcpy(output.data(), output_tensor->template data<T>(), memcpy(output.data(), output_tensor->template data<T>(),
output.size() * sizeof(T)); output.size() * sizeof(T));
...@@ -338,11 +377,13 @@ std::vector<T> Executor<Device, T>::Predict(const std::vector<T> &input, ...@@ -338,11 +377,13 @@ std::vector<T> Executor<Device, T>::Predict(const std::vector<T> &input,
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::SetInput(const Tensor &input, void Executor<Device, T>::SetInput(const Tensor &input,
const std::string &var_name) { const std::string &var_name) {
auto *target_var = program_.scope->FindVar(var_name); int index = 0;
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist", if (feed_indices_.find(var_name) != feed_indices_.end()) {
var_name.c_str()); index = feed_indices_.find(var_name)->second;
}
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); auto *feed_var = program_.scope->Var("feed");
framework::LoDTensor &target =
feed_var->template GetMutable<framework::LoDTensorArray>()->at(index);
if (config_.load_when_predict) { if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) { if (input_dim_last_ != input.dims()) {
...@@ -351,68 +392,92 @@ void Executor<Device, T>::SetInput(const Tensor &input, ...@@ -351,68 +392,92 @@ void Executor<Device, T>::SetInput(const Tensor &input,
} }
} }
target_tensor->Resize(input.dims()); target.Resize(input.dims());
target_tensor->ShareDataWith(input); target.ShareDataWith(input);
} }
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::SetInput(const LoDTensor &input, void Executor<Device, T>::SetInput(const LoDTensor &input,
const std::string &var_name) { const std::string &var_name) {
auto *target_var = program_.scope->FindVar(var_name); int index = 0;
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist", if (feed_indices_.find(var_name) != feed_indices_.end()) {
var_name.c_str()); index = feed_indices_.find(var_name)->second;
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); }
auto *feed_var = program_.scope->Var("feed");
framework::LoDTensor &target =
feed_var->template GetMutable<framework::LoDTensorArray>()->at(index);
if (config_.load_when_predict) { if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) { if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(*target_tensor); InitNoPersistableMemory(input);
input_dim_last_ = input.dims(); input_dim_last_ = input.dims();
} }
} }
target_tensor->Resize(input.dims()); target.Resize(input.dims());
target_tensor->ShareDataWith(input); target.ShareDataWith(input);
target_tensor->set_lod(input.lod()); target.set_lod(input.lod());
}
template <typename Device, typename T>
std::shared_ptr<LoDTensor> Executor<Device, T>::GetOutput(
const std::string &var_name) {
const auto &iter = fetch_indices_.find(var_name);
if (var_name == "fetch" || iter != fetch_indices_.end()) {
int index = 0;
if (iter != fetch_indices_.end()) {
index = iter->second;
}
auto *fetch_var = program_.scope->Var("fetch");
framework::LoDTensor &target =
fetch_var->template GetMutable<framework::LoDTensorArray>()->at(index);
return std::make_shared<LoDTensor>(target);
} else {
auto *fetch_var = program_.scope->Var(var_name);
framework::LoDTensor *target =
fetch_var->template GetMutable<framework::LoDTensor>();
return std::make_shared<LoDTensor>(*target);
}
} }
template <typename Device, typename T> template <typename Device, typename T>
PMStatus Executor<Device, T>::Predict() { PMStatus Executor<Device, T>::Predict() {
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
std::vector<ProfInfo> profile(ops_list_.size()); std::vector<ProfInfo> profile(ops_of_block0_.size());
struct timespec ts; struct timespec ts;
int op_index = 0; int op_index = 0;
#endif #endif
for (auto &block : ops_of_block_) { for (auto &op_handler : ops_of_block0_) {
for (auto &op_handler : block) {
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif #endif
if (lod_mode_) { if (lod_mode_) {
op_handler->InferShape(); op_handler->InferShape();
} }
op_handler->Run(); op_handler->Run();
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
++op_index; ++op_index;
#endif #endif
}
} }
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
std::unordered_map<std::string, uint64_t> _tp; std::unordered_map<std::string, uint64_t> _tp;
for (int i = 0; i < profile.size(); i++) { for (int i = 0; i < profile.size(); i++) {
const auto &pInfo = profile[i]; const auto &pInfo = profile[i];
uint64_t timeCost = pInfo.runEnd - pInfo.runBegin; uint64_t timeCost = pInfo.runEnd - pInfo.runBegin;
if (ops_list_[i]->Type() == "conv2d" || if (ops_of_block0_[i]->Type() == "conv2d" ||
ops_list_[i]->Type() == "depthwise_conv2d") { ops_of_block0_[i]->Type() == "depthwise_conv2d") {
auto inputs = ops_list_[i]->Inputs(); auto inputs = ops_of_block0_[i]->Inputs();
auto *filter = auto *filter =
GetVarValue<LoDTensor>("Filter", inputs, *(program_.scope)); GetVarValue<LoDTensor>("Filter", inputs, *(program_.scope));
int kernel_size = filter->dims()[2]; int kernel_size = filter->dims()[2];
_tp[ops_list_[i]->Type() + "_" + std::to_string(kernel_size)] += timeCost; _tp[ops_of_block0_[i]->Type() + "_" + std::to_string(kernel_size)] +=
timeCost;
} else { } else {
_tp[ops_list_[i]->Type()] += timeCost; _tp[ops_of_block0_[i]->Type()] += timeCost;
} }
} }
printf("====================[ profile ]======================\n"); printf("====================[ profile ]======================\n");
...@@ -437,16 +502,6 @@ PMStatus Executor<Device, T>::Predict() { ...@@ -437,16 +502,6 @@ PMStatus Executor<Device, T>::Predict() {
return PMSuccess; return PMSuccess;
} }
template <typename Device, typename T>
std::shared_ptr<LoDTensor> Executor<Device, T>::GetOutput(
const std::string &var_name) {
auto *target_var = program_.scope->FindVar(var_name);
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str());
auto *output_tensor = target_var->template GetMutable<LoDTensor>();
return std::make_shared<LoDTensor>(*output_tensor);
}
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::InjectVariable(const Tensor &t, void Executor<Device, T>::InjectVariable(const Tensor &t,
...@@ -476,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) { ...@@ -476,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
} }
} }
template <typename Device, typename T>
void Executor<Device, T>::FeedTensorData(const vector<framework::Tensor> &v) {
auto input_size = v.size();
int index = 0;
auto vars = program_.scope->VarContain("feed", &index);
PADDLE_MOBILE_ENFORCE(input_size == vars.size(),
"input data number not correct");
for (int i = 0; i < input_size; i++) {
auto var = program_.scope->Var("feed", i + index);
auto feed_tensor = var->template GetMutable<LoDTensor>();
feed_tensor->ShareDataWith(v[i]);
}
}
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<void *> *v) { void Executor<Device, T>::GetResults(std::vector<void *> *v) {
auto output_size = v->size(); auto output_size = v->size();
...@@ -524,11 +565,11 @@ framework::Tensor *Executor<Device, T>::GetTensorByName( ...@@ -524,11 +565,11 @@ framework::Tensor *Executor<Device, T>::GetTensorByName(
const std::string &name) { const std::string &name) {
auto var = program_.scope->Var(name); auto var = program_.scope->Var(name);
return var->template GetMutable<LoDTensor>(); return var->template GetMutable<LoDTensor>();
}; }
template <typename Device, typename T> template <typename Device, typename T>
std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) { std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
auto &ops = ops_of_block_[0]; auto &ops = ops_of_block0_;
PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range"); PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range");
auto op = id < 0 ? ops[ops.size() - 1] : ops[id]; auto op = id < 0 ? ops[ops.size() - 1] : ops[id];
...@@ -542,7 +583,7 @@ std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) { ...@@ -542,7 +583,7 @@ std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::Predict_From_To(int start, int end) { void Executor<Device, T>::Predict_From_To(int start, int end) {
auto &ops = ops_of_block_[0]; auto &ops = ops_of_block0_;
end = end < 0 ? static_cast<int>(ops.size()) : end; end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(), PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong"); "start or end parameter is wrong");
......
...@@ -53,7 +53,6 @@ class Executor { ...@@ -53,7 +53,6 @@ class Executor {
void InjectVariable(const Tensor &t, std::string var_name); void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t); void FeedData(const Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetTensorResults(std::vector<framework::Tensor *> *v); void GetTensorResults(std::vector<framework::Tensor *> *v);
...@@ -68,8 +67,9 @@ class Executor { ...@@ -68,8 +67,9 @@ class Executor {
protected: protected:
Executor() = default; Executor() = default;
bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc, Variable *var, bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc,
LoDTensor *tensor) const; Variable *var) const;
void InitFeedFetchList();
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void InitNoPersistableMemory(const Tensor &input_tensor); void InitNoPersistableMemory(const Tensor &input_tensor);
...@@ -85,10 +85,9 @@ class Executor { ...@@ -85,10 +85,9 @@ class Executor {
PaddleMobileConfigInternal config_; PaddleMobileConfigInternal config_;
Program<Device> program_; Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_; std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr; std::vector<std::shared_ptr<OperatorBase<Device>>> ops_of_block0_;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_; std::unordered_map<std::string, int> feed_indices_;
// operators list std::unordered_map<std::string, int> fetch_indices_;
std::vector<OperatorBasePtr> ops_list_;
// for super resoltion // for super resoltion
DDim input_dim_last_; DDim input_dim_last_;
......
...@@ -13,13 +13,6 @@ void paddle_mobile__framework__proto__version__init( ...@@ -13,13 +13,6 @@ void paddle_mobile__framework__proto__version__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__version__get_packed_size(
const PaddleMobile__Framework__Proto__Version *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__version__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__Version * PaddleMobile__Framework__Proto__Version *
paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -54,13 +47,6 @@ void paddle_mobile__framework__proto__op_desc__init( ...@@ -54,13 +47,6 @@ void paddle_mobile__framework__proto__op_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
const PaddleMobile__Framework__Proto__OpDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__op_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__OpDesc * PaddleMobile__Framework__Proto__OpDesc *
paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -95,13 +81,6 @@ void paddle_mobile__framework__proto__op_proto__init( ...@@ -95,13 +81,6 @@ void paddle_mobile__framework__proto__op_proto__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
const PaddleMobile__Framework__Proto__OpProto *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__op_proto__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__OpProto * PaddleMobile__Framework__Proto__OpProto *
paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -162,13 +141,6 @@ void paddle_mobile__framework__proto__var_type__init( ...@@ -162,13 +141,6 @@ void paddle_mobile__framework__proto__var_type__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__var_type__get_packed_size(
const PaddleMobile__Framework__Proto__VarType *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__var_type__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__VarType * PaddleMobile__Framework__Proto__VarType *
paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -191,13 +163,6 @@ void paddle_mobile__framework__proto__var_desc__init( ...@@ -191,13 +163,6 @@ void paddle_mobile__framework__proto__var_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
const PaddleMobile__Framework__Proto__VarDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__var_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__VarDesc * PaddleMobile__Framework__Proto__VarDesc *
paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -220,13 +185,6 @@ void paddle_mobile__framework__proto__block_desc__init( ...@@ -220,13 +185,6 @@ void paddle_mobile__framework__proto__block_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
const PaddleMobile__Framework__Proto__BlockDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__block_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__BlockDesc * PaddleMobile__Framework__Proto__BlockDesc *
paddle_mobile__framework__proto__block_desc__unpack( paddle_mobile__framework__proto__block_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
...@@ -248,13 +206,6 @@ void paddle_mobile__framework__proto__program_desc__init( ...@@ -248,13 +206,6 @@ void paddle_mobile__framework__proto__program_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
const PaddleMobile__Framework__Proto__ProgramDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__program_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__ProgramDesc * PaddleMobile__Framework__Proto__ProgramDesc *
paddle_mobile__framework__proto__program_desc__unpack( paddle_mobile__framework__proto__program_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
...@@ -310,7 +261,7 @@ const ProtobufCMessageDescriptor ...@@ -310,7 +261,7 @@ const ProtobufCMessageDescriptor
NULL /* reserved[123] */ NULL /* reserved[123] */
}; };
static const ProtobufCFieldDescriptor static const ProtobufCFieldDescriptor
paddle_mobile__framework__proto__op_desc__attr__field_descriptors[13] = { paddle_mobile__framework__proto__op_desc__attr__field_descriptors[14] = {
{ {
"name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
0, /* quantifier_offset */ 0, /* quantifier_offset */
...@@ -405,6 +356,13 @@ static const ProtobufCFieldDescriptor ...@@ -405,6 +356,13 @@ static const ProtobufCFieldDescriptor
NULL, NULL, 0, /* flags */ NULL, NULL, 0, /* flags */
0, NULL, NULL /* reserved1,reserved2, etc */ 0, NULL, NULL /* reserved1,reserved2, etc */
}, },
{
"longs", 15, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT64,
offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_longs),
offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, longs), NULL,
NULL, 0, /* flags */
0, NULL, NULL /* reserved1,reserved2, etc */
},
}; };
static const unsigned static const unsigned
paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = { paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = {
...@@ -417,6 +375,7 @@ static const unsigned ...@@ -417,6 +375,7 @@ static const unsigned
2, /* field[2] = i */ 2, /* field[2] = i */
5, /* field[5] = ints */ 5, /* field[5] = ints */
11, /* field[11] = l */ 11, /* field[11] = l */
13, /* field[13] = longs */
0, /* field[0] = name */ 0, /* field[0] = name */
4, /* field[4] = s */ 4, /* field[4] = s */
7, /* field[7] = strings */ 7, /* field[7] = strings */
...@@ -424,7 +383,7 @@ static const unsigned ...@@ -424,7 +383,7 @@ static const unsigned
}; };
static const ProtobufCIntRange static const ProtobufCIntRange
paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = { paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = {
{1, 0}, {10, 8}, {0, 13}}; {1, 0}, {10, 8}, {0, 14}};
const ProtobufCMessageDescriptor const ProtobufCMessageDescriptor
paddle_mobile__framework__proto__op_desc__attr__descriptor = { paddle_mobile__framework__proto__op_desc__attr__descriptor = {
PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
...@@ -433,7 +392,7 @@ const ProtobufCMessageDescriptor ...@@ -433,7 +392,7 @@ const ProtobufCMessageDescriptor
"PaddleMobile__Framework__Proto__OpDesc__Attr", "PaddleMobile__Framework__Proto__OpDesc__Attr",
"paddle_mobile.framework.proto", "paddle_mobile.framework.proto",
sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr), sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr),
13, 14,
paddle_mobile__framework__proto__op_desc__attr__field_descriptors, paddle_mobile__framework__proto__op_desc__attr__field_descriptors,
paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name, paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name,
2, 2,
...@@ -1448,7 +1407,7 @@ const ProtobufCMessageDescriptor ...@@ -1448,7 +1407,7 @@ const ProtobufCMessageDescriptor
NULL /* reserved[123] */ NULL /* reserved[123] */
}; };
static const ProtobufCEnumValue static const ProtobufCEnumValue
paddle_mobile__framework__proto__attr_type__enum_values_by_number[11] = { paddle_mobile__framework__proto__attr_type__enum_values_by_number[12] = {
{"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0}, {"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0},
{"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1}, {"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1},
{"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2}, {"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2},
...@@ -1460,15 +1419,16 @@ static const ProtobufCEnumValue ...@@ -1460,15 +1419,16 @@ static const ProtobufCEnumValue
{"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8}, {"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8},
{"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9}, {"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9},
{"BLOCKS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS", 10}, {"BLOCKS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS", 10},
{"LONGS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS", 11},
}; };
static const ProtobufCIntRange static const ProtobufCIntRange
paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0}, paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0},
{0, 11}}; {0, 12}};
static const ProtobufCEnumValueIndex static const ProtobufCEnumValueIndex
paddle_mobile__framework__proto__attr_type__enum_values_by_name[11] = { paddle_mobile__framework__proto__attr_type__enum_values_by_name[12] = {
{"BLOCK", 8}, {"BLOCKS", 10}, {"BOOLEAN", 6}, {"BOOLEANS", 7}, {"BLOCK", 8}, {"BLOCKS", 10}, {"BOOLEAN", 6}, {"BOOLEANS", 7},
{"FLOAT", 1}, {"FLOATS", 4}, {"INT", 0}, {"INTS", 3}, {"FLOAT", 1}, {"FLOATS", 4}, {"INT", 0}, {"INTS", 3},
{"LONG", 9}, {"STRING", 2}, {"STRINGS", 5}, {"LONG", 9}, {"LONGS", 11}, {"STRING", 2}, {"STRINGS", 5},
}; };
const ProtobufCEnumDescriptor const ProtobufCEnumDescriptor
paddle_mobile__framework__proto__attr_type__descriptor = { paddle_mobile__framework__proto__attr_type__descriptor = {
...@@ -1477,9 +1437,9 @@ const ProtobufCEnumDescriptor ...@@ -1477,9 +1437,9 @@ const ProtobufCEnumDescriptor
"AttrType", "AttrType",
"PaddleMobile__Framework__Proto__AttrType", "PaddleMobile__Framework__Proto__AttrType",
"paddle_mobile.framework.proto", "paddle_mobile.framework.proto",
11, 12,
paddle_mobile__framework__proto__attr_type__enum_values_by_number, paddle_mobile__framework__proto__attr_type__enum_values_by_number,
11, 12,
paddle_mobile__framework__proto__attr_type__enum_values_by_name, paddle_mobile__framework__proto__attr_type__enum_values_by_name,
1, 1,
paddle_mobile__framework__proto__attr_type__value_ranges, paddle_mobile__framework__proto__attr_type__value_ranges,
......
...@@ -102,8 +102,9 @@ typedef enum _PaddleMobile__Framework__Proto__AttrType { ...@@ -102,8 +102,9 @@ typedef enum _PaddleMobile__Framework__Proto__AttrType {
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS = PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS = 10,
10 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE( PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS =
11 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE) PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
} PaddleMobile__Framework__Proto__AttrType; } PaddleMobile__Framework__Proto__AttrType;
...@@ -152,13 +153,15 @@ struct _PaddleMobile__Framework__Proto__OpDesc__Attr { ...@@ -152,13 +153,15 @@ struct _PaddleMobile__Framework__Proto__OpDesc__Attr {
int64_t l; int64_t l;
size_t n_blocks_idx; size_t n_blocks_idx;
int32_t *blocks_idx; int32_t *blocks_idx;
size_t n_longs;
int64_t *longs;
}; };
#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \ #define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \
{ \ { \
PROTOBUF_C_MESSAGE_INIT( \ PROTOBUF_C_MESSAGE_INIT( \
&paddle_mobile__framework__proto__op_desc__attr__descriptor) \ &paddle_mobile__framework__proto__op_desc__attr__descriptor) \
, NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \ , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \
0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0, 0, NULL \ 0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, NULL \
} }
struct _PaddleMobile__Framework__Proto__OpDesc__Var { struct _PaddleMobile__Framework__Proto__OpDesc__Var {
...@@ -417,8 +420,6 @@ struct _PaddleMobile__Framework__Proto__ProgramDesc { ...@@ -417,8 +420,6 @@ struct _PaddleMobile__Framework__Proto__ProgramDesc {
/* PaddleMobile__Framework__Proto__Version methods */ /* PaddleMobile__Framework__Proto__Version methods */
void paddle_mobile__framework__proto__version__init( void paddle_mobile__framework__proto__version__init(
PaddleMobile__Framework__Proto__Version *message); PaddleMobile__Framework__Proto__Version *message);
size_t paddle_mobile__framework__proto__version__get_packed_size(
const PaddleMobile__Framework__Proto__Version *message);
PaddleMobile__Framework__Proto__Version * PaddleMobile__Framework__Proto__Version *
paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -435,8 +436,6 @@ void paddle_mobile__framework__proto__op_desc__var__init( ...@@ -435,8 +436,6 @@ void paddle_mobile__framework__proto__op_desc__var__init(
/* PaddleMobile__Framework__Proto__OpDesc methods */ /* PaddleMobile__Framework__Proto__OpDesc methods */
void paddle_mobile__framework__proto__op_desc__init( void paddle_mobile__framework__proto__op_desc__init(
PaddleMobile__Framework__Proto__OpDesc *message); PaddleMobile__Framework__Proto__OpDesc *message);
size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
const PaddleMobile__Framework__Proto__OpDesc *message);
PaddleMobile__Framework__Proto__OpDesc * PaddleMobile__Framework__Proto__OpDesc *
paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -453,8 +452,6 @@ void paddle_mobile__framework__proto__op_proto__attr__init( ...@@ -453,8 +452,6 @@ void paddle_mobile__framework__proto__op_proto__attr__init(
/* PaddleMobile__Framework__Proto__OpProto methods */ /* PaddleMobile__Framework__Proto__OpProto methods */
void paddle_mobile__framework__proto__op_proto__init( void paddle_mobile__framework__proto__op_proto__init(
PaddleMobile__Framework__Proto__OpProto *message); PaddleMobile__Framework__Proto__OpProto *message);
size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
const PaddleMobile__Framework__Proto__OpProto *message);
PaddleMobile__Framework__Proto__OpProto * PaddleMobile__Framework__Proto__OpProto *
paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -483,8 +480,6 @@ void paddle_mobile__framework__proto__var_type__tuple__init( ...@@ -483,8 +480,6 @@ void paddle_mobile__framework__proto__var_type__tuple__init(
/* PaddleMobile__Framework__Proto__VarType methods */ /* PaddleMobile__Framework__Proto__VarType methods */
void paddle_mobile__framework__proto__var_type__init( void paddle_mobile__framework__proto__var_type__init(
PaddleMobile__Framework__Proto__VarType *message); PaddleMobile__Framework__Proto__VarType *message);
size_t paddle_mobile__framework__proto__var_type__get_packed_size(
const PaddleMobile__Framework__Proto__VarType *message);
PaddleMobile__Framework__Proto__VarType * PaddleMobile__Framework__Proto__VarType *
paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -495,8 +490,6 @@ void paddle_mobile__framework__proto__var_type__free_unpacked( ...@@ -495,8 +490,6 @@ void paddle_mobile__framework__proto__var_type__free_unpacked(
/* PaddleMobile__Framework__Proto__VarDesc methods */ /* PaddleMobile__Framework__Proto__VarDesc methods */
void paddle_mobile__framework__proto__var_desc__init( void paddle_mobile__framework__proto__var_desc__init(
PaddleMobile__Framework__Proto__VarDesc *message); PaddleMobile__Framework__Proto__VarDesc *message);
size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
const PaddleMobile__Framework__Proto__VarDesc *message);
PaddleMobile__Framework__Proto__VarDesc * PaddleMobile__Framework__Proto__VarDesc *
paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -507,8 +500,6 @@ void paddle_mobile__framework__proto__var_desc__free_unpacked( ...@@ -507,8 +500,6 @@ void paddle_mobile__framework__proto__var_desc__free_unpacked(
/* PaddleMobile__Framework__Proto__BlockDesc methods */ /* PaddleMobile__Framework__Proto__BlockDesc methods */
void paddle_mobile__framework__proto__block_desc__init( void paddle_mobile__framework__proto__block_desc__init(
PaddleMobile__Framework__Proto__BlockDesc *message); PaddleMobile__Framework__Proto__BlockDesc *message);
size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
const PaddleMobile__Framework__Proto__BlockDesc *message);
PaddleMobile__Framework__Proto__BlockDesc * PaddleMobile__Framework__Proto__BlockDesc *
paddle_mobile__framework__proto__block_desc__unpack( paddle_mobile__framework__proto__block_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data); ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
...@@ -518,8 +509,6 @@ void paddle_mobile__framework__proto__block_desc__free_unpacked( ...@@ -518,8 +509,6 @@ void paddle_mobile__framework__proto__block_desc__free_unpacked(
/* PaddleMobile__Framework__Proto__ProgramDesc methods */ /* PaddleMobile__Framework__Proto__ProgramDesc methods */
void paddle_mobile__framework__proto__program_desc__init( void paddle_mobile__framework__proto__program_desc__init(
PaddleMobile__Framework__Proto__ProgramDesc *message); PaddleMobile__Framework__Proto__ProgramDesc *message);
size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
const PaddleMobile__Framework__Proto__ProgramDesc *message);
PaddleMobile__Framework__Proto__ProgramDesc * PaddleMobile__Framework__Proto__ProgramDesc *
paddle_mobile__framework__proto__program_desc__unpack( paddle_mobile__framework__proto__program_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data); ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
......
...@@ -35,6 +35,7 @@ enum AttrType { ...@@ -35,6 +35,7 @@ enum AttrType {
BLOCK = 8; BLOCK = 8;
LONG = 9; LONG = 9;
BLOCKS = 10; BLOCKS = 10;
LONGS = 11;
} }
// OpDesc describes an instance of a C++ framework::OperatorBase // OpDesc describes an instance of a C++ framework::OperatorBase
...@@ -55,6 +56,7 @@ message OpDesc { ...@@ -55,6 +56,7 @@ message OpDesc {
optional int32 block_idx = 12; optional int32 block_idx = 12;
optional int64 l = 13; optional int64 l = 13;
repeated int32 blocks_idx = 14; repeated int32 blocks_idx = 14;
repeated int64 longs = 15;
}; };
message Var { message Var {
......
...@@ -125,10 +125,6 @@ LOAD_OP1(prior_box, CPU); ...@@ -125,10 +125,6 @@ LOAD_OP1(prior_box, CPU);
LOAD_OP2(fusion_conv_add_relu, CPU, FPGA); LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_relu); LOAD_FUSION_MATCHER(fusion_conv_add_relu);
#endif #endif
#ifdef FUSION_CONVADDADDPRELU_OP
LOAD_OP2(fusion_conv_add_add_prelu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_add_prelu);
#endif
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
LOAD_OP2(fusion_conv_add, CPU, MALI_GPU); LOAD_OP2(fusion_conv_add, CPU, MALI_GPU);
LOAD_FUSION_MATCHER(fusion_conv_add); LOAD_FUSION_MATCHER(fusion_conv_add);
...@@ -178,10 +174,6 @@ LOAD_FUSION_MATCHER(fusion_conv_add_bn); ...@@ -178,10 +174,6 @@ LOAD_FUSION_MATCHER(fusion_conv_add_bn);
#ifdef DROPOUT_OP #ifdef DROPOUT_OP
LOAD_OP2(dropout, CPU, FPGA); LOAD_OP2(dropout, CPU, FPGA);
#endif #endif
#ifdef FUSION_CONVADDPRELU_OP
LOAD_OP2(fusion_conv_add_prelu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_prelu);
#endif
#ifdef FUSION_DWCONVBNRELU_OP #ifdef FUSION_DWCONVBNRELU_OP
LOAD_OP1(fusion_dwconv_bn_relu, CPU); LOAD_OP1(fusion_dwconv_bn_relu, CPU);
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu); LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
...@@ -324,3 +316,15 @@ LOAD_OP1(psroi_pool, CPU); ...@@ -324,3 +316,15 @@ LOAD_OP1(psroi_pool, CPU);
#ifdef ROI_PERSPECTIVE_OP #ifdef ROI_PERSPECTIVE_OP
LOAD_OP1(roi_perspective_transform, CPU); LOAD_OP1(roi_perspective_transform, CPU);
#endif #endif
#ifdef BEAM_SEARCH_OP
LOAD_OP1(beam_search, CPU);
#endif
#ifdef BEAM_SEARCH_DECODE_OP
LOAD_OP1(beam_search_decode, CPU);
#endif
#ifdef PAD2D_OP
LOAD_OP1(pad2d, CPU);
#endif
#ifdef ONE_HOT_OP
LOAD_OP1(one_hot, CPU);
#endif
...@@ -221,6 +221,8 @@ inline Print &operator<<(Print &printer, const LoDTensor &tensor) { ...@@ -221,6 +221,8 @@ inline Print &operator<<(Print &printer, const LoDTensor &tensor) {
printer << static_cast<int>(tensor.data<int8_t>()[i]) << " "; printer << static_cast<int>(tensor.data<int8_t>()[i]) << " ";
} else if (tensor.type() == typeid(int32_t)) { } else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " "; printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(bool)) {
printer << tensor.data<bool>()[i] << " ";
} }
} }
#endif // PADDLE_MOBILE_FPGA #endif // PADDLE_MOBILE_FPGA
......
...@@ -58,8 +58,7 @@ struct OpInfoFiller { ...@@ -58,8 +58,7 @@ struct OpInfoFiller {
void operator()(const std::string& op_type, OpInfo<Dtype>* info) const { void operator()(const std::string& op_type, OpInfo<Dtype>* info) const {
info->creator_ = [](const std::string& type, const VariableNameMap& inputs, info->creator_ = [](const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const VariableNameMap& outputs,
const AttributeMap& attrs, const AttributeMap& attrs, framework::Scope* scope) {
std::shared_ptr<Scope> scope) {
return new T(type, inputs, outputs, attrs, scope); return new T(type, inputs, outputs, attrs, scope);
}; };
} }
...@@ -91,7 +90,7 @@ class OpRegistry { ...@@ -91,7 +90,7 @@ class OpRegistry {
static std::shared_ptr<OperatorBase<Dtype>> CreateOp( static std::shared_ptr<OperatorBase<Dtype>> CreateOp(
const std::string& type, const VariableNameMap& inputs, const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap attrs, const VariableNameMap& outputs, const AttributeMap attrs,
std::shared_ptr<paddle_mobile::framework::Scope> scope) { paddle_mobile::framework::Scope* scope) {
auto& info = OpInfoMap<Dtype>::Instance()->Get(type); auto& info = OpInfoMap<Dtype>::Instance()->Get(type);
auto op = info.Creator()(type, inputs, outputs, attrs, scope); auto op = info.Creator()(type, inputs, outputs, attrs, scope);
return std::shared_ptr<OperatorBase<Dtype>>(op); return std::shared_ptr<OperatorBase<Dtype>>(op);
......
...@@ -43,7 +43,7 @@ OperatorBase<Dtype>::OperatorBase(const std::string &type, ...@@ -43,7 +43,7 @@ OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) framework::Scope *scope)
: type_(type), : type_(type),
inputs_(inputs), inputs_(inputs),
outputs_(outputs), outputs_(outputs),
...@@ -67,30 +67,22 @@ void OperatorBase<Dtype>::Run() { ...@@ -67,30 +67,22 @@ void OperatorBase<Dtype>::Run() {
for (const auto key : input_keys) { for (const auto key : input_keys) {
auto var_vec_in = inputs_.at(key); auto var_vec_in = inputs_.at(key);
for (int i = 0; i < var_vec_in.size(); ++i) { for (int i = 0; i < var_vec_in.size(); ++i) {
auto vari = this->scope_->FindVar(var_vec_in[i]); auto var = this->scope_->FindVar(var_vec_in[i]);
if (vari->IsInitialized()) { if (var->IsInitialized() &&
const Tensor *tensor = vari->template Get<framework::LoDTensor>(); var->template IsType<framework::LoDTensor>()) {
if (tensor) { const Tensor *tensor = var->template Get<framework::LoDTensor>();
DLOG << type_ << " input- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
#ifdef PADDLE_MOBILE_FPGA
DLOG << var_vec_in[i];
#endif
}
} }
} }
} }
for (const auto key : GetOutKeys()) { for (const auto key : GetOutKeys()) {
auto var_vec_out = outputs_.at(key); auto var_vec_out = outputs_.at(key);
for (int i = 0; i < var_vec_out.size(); ++i) { for (int i = 0; i < var_vec_out.size(); ++i) {
auto vari = scope_->FindVar(var_vec_out[i]); auto var = scope_->FindVar(var_vec_out[i]);
if (vari->IsInitialized()) { if (var->IsInitialized() &&
const Tensor *tensor = vari->template Get<framework::LoDTensor>(); var->template IsType<framework::LoDTensor>()) {
if (tensor) { const Tensor *tensor = var->template Get<framework::LoDTensor>();
DLOG << type_ << " output- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
#ifdef PADDLE_MOBILE_FPGA
DLOG << var_vec_out[i];
#endif
}
} }
} }
} }
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -58,7 +57,7 @@ class OperatorBase { ...@@ -58,7 +57,7 @@ class OperatorBase {
public: public:
OperatorBase(const std::string &type, const VariableNameMap &inputs, OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope); framework::Scope *scope);
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
virtual void Init() = 0; virtual void Init() = 0;
...@@ -81,11 +80,10 @@ class OperatorBase { ...@@ -81,11 +80,10 @@ class OperatorBase {
} }
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InsertTensors(); void InsertTensors();
void ChangeNameMap(string key, std::vector<string> value);
#endif #endif
protected: protected:
std::shared_ptr<Scope> scope_; framework::Scope *scope_;
std::string type_; std::string type_;
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
...@@ -98,35 +96,15 @@ class OperatorBase { ...@@ -98,35 +96,15 @@ class OperatorBase {
template <typename Dtype, typename ParamType, typename KernelType> template <typename Dtype, typename ParamType, typename KernelType>
class OperatorWithKernel : public OperatorBase<Dtype> { class OperatorWithKernel : public OperatorBase<Dtype> {
public: public:
#ifndef PADDLE_MOBILE_FPGA1
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs, OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) framework::Scope *scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope), : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, scope.get()) { param_(inputs, outputs, attrs, scope) {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
kernel_.InitCLHelper(scope->GetCLScpoe()); kernel_.InitCLHelper(scope->GetCLScpoe());
#endif #endif
} }
#else
OperatorWithKernel(const std::string &type, const VariableNameMap inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {
static int feed_num = 0;
static int fetch_num = 0;
if (type == "feed") {
auto new_name = string("feed") + std::to_string(feed_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(inputs)).at("X") = {string(new_name)};
} else if (type == "fetch") {
auto new_name = string("fetch") + std::to_string(fetch_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(outputs)).at("Out") = {string(new_name)};
}
param_ = ParamType(inputs, outputs, attrs, *scope);
}
#endif
virtual void RunImpl() { this->kernel_.Compute(this->param_); } virtual void RunImpl() { this->kernel_.Compute(this->param_); }
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
...@@ -198,21 +176,20 @@ class FusionOpMatcher { ...@@ -198,21 +176,20 @@ class FusionOpMatcher {
std::shared_ptr<OpDesc> new_opdesc_; std::shared_ptr<OpDesc> new_opdesc_;
}; };
#define DECLARE_OPERATOR(OpName, OpParam, OpKernel) \ #define DECLARE_OPERATOR(OpName, OpParam, OpKernel) \
template <typename DeviceType, typename T> \ template <typename DeviceType, typename T> \
class OpName##Op : public framework::OperatorWithKernel< \ class OpName##Op : public framework::OperatorWithKernel< \
DeviceType, OpParam<DeviceType>, \ DeviceType, OpParam<DeviceType>, \
operators::OpKernel<DeviceType, T>> { \ operators::OpKernel<DeviceType, T>> { \
public: \ public: \
OpName##Op(const std::string &type, const VariableNameMap &inputs, \ OpName##Op(const std::string &type, const VariableNameMap &inputs, \
const VariableNameMap &outputs, \ const VariableNameMap &outputs, \
const framework::AttributeMap &attrs, \ const framework::AttributeMap &attrs, framework::Scope *scope) \
std::shared_ptr<framework::Scope> scope) \ : framework::OperatorWithKernel<DeviceType, OpParam<DeviceType>, \
: framework::OperatorWithKernel<DeviceType, OpParam<DeviceType>, \ operators::OpKernel<DeviceType, T>>( \
operators::OpKernel<DeviceType, T>>( \ type, inputs, outputs, attrs, scope) {} \
type, inputs, outputs, attrs, scope) {} \ \
\ void InferShape() const override; \
void InferShape() const override; \
}; };
#define DECLARE_KERNEL(OpName, OpParam) \ #define DECLARE_KERNEL(OpName, OpParam) \
...@@ -228,7 +205,7 @@ class FusionOpMatcher { ...@@ -228,7 +205,7 @@ class FusionOpMatcher {
cls(const std::string &type, const ::paddle_mobile::VariableNameMap &inputs, \ cls(const std::string &type, const ::paddle_mobile::VariableNameMap &inputs, \
const ::paddle_mobile::VariableNameMap &outputs, \ const ::paddle_mobile::VariableNameMap &outputs, \
const ::paddle_mobile::framework::AttributeMap &attrs, \ const ::paddle_mobile::framework::AttributeMap &attrs, \
std::shared_ptr<::paddle_mobile::framework::Scope> scope) \ ::paddle_mobile::framework::Scope *scope) \
: parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {} : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}
} // namespace framework } // namespace framework
......
...@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) { ...@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) {
PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k]; PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k];
std::string attr_name(attr->name); std::string attr_name(attr->name);
attrs_[attr_name] = Attribute::GetAttrValue(attr); attrs_[attr_name] = Attribute::GetAttrValue(attr);
proto_attrs_.push_back(*attr);
} }
} }
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&OpDesc::GetProtoAttr() const {
return proto_attrs_;
}
const std::vector<std::string> &OpDesc::Input(const std::string &name) const { const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
return inputs_.find(name)->second; return inputs_.find(name)->second;
} }
...@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const { ...@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
return it->second; return it->second;
} }
void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
this->attrs_[name].Set<BlockDesc *>(block);
}
void OpDesc::SetBlocksAttr(const std::string &name,
std::vector<BlockDesc *> blocks) {
this->attrs_[name].Set<std::vector<BlockDesc *>>(blocks);
}
std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() { std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() {
return attrs_; return attrs_;
} }
......
...@@ -29,11 +29,13 @@ class OpDesc { ...@@ -29,11 +29,13 @@ class OpDesc {
friend class ProgramOptimize; friend class ProgramOptimize;
friend class FusionOpMatcher; friend class FusionOpMatcher;
friend class Node; friend class Node;
explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc); explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc);
OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) { OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) {
this->inputs_ = op_desc.inputs_; this->inputs_ = op_desc.inputs_;
this->outputs_ = op_desc.outputs_; this->outputs_ = op_desc.outputs_;
this->attrs_ = op_desc.attrs_; this->attrs_ = op_desc.attrs_;
this->proto_attrs_ = op_desc.proto_attrs_;
} }
OpDesc() {} OpDesc() {}
...@@ -41,6 +43,12 @@ class OpDesc { ...@@ -41,6 +43,12 @@ class OpDesc {
const std::vector<std::string> &Output(const std::string &name) const; const std::vector<std::string> &Output(const std::string &name) const;
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&GetProtoAttr() const;
void SetBlockAttr(const std::string &name, BlockDesc *block);
void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> block);
VariableNameMap &GetInputs() { return inputs_; } VariableNameMap &GetInputs() { return inputs_; }
VariableNameMap &GetOutputs() { return outputs_; } VariableNameMap &GetOutputs() { return outputs_; }
...@@ -60,6 +68,7 @@ class OpDesc { ...@@ -60,6 +68,7 @@ class OpDesc {
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
AttributeMap attrs_; AttributeMap attrs_;
std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr> proto_attrs_;
}; };
Print &operator<<(Print &printer, const OpDesc &op_desc); Print &operator<<(Print &printer, const OpDesc &op_desc);
......
...@@ -15,8 +15,8 @@ limitations under the License. */ ...@@ -15,8 +15,8 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "framework/program/program_desc.h"
#include "framework/program/tensor_desc.h" #include "framework/program/tensor_desc.h"
#include "program_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) { ...@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
for (int i = 0; i < desc->n_blocks; ++i) { for (int i = 0; i < desc->n_blocks; ++i) {
blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i])); blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i]));
} }
for (auto &block : blocks_) {
for (auto op : block->Ops()) {
for (const auto &attr : op->GetProtoAttr()) {
if (attr.type == PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK) {
size_t blk_idx = attr.block_idx;
op->SetBlockAttr(attr.name, this->MutableBlock(blk_idx));
} else if (attr.type ==
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS) {
size_t n_blocks_idx = attr.n_blocks_idx;
int32_t *blks_idx = attr.blocks_idx;
std::vector<BlockDesc *> block_descs;
for (size_t i = 0; i < n_blocks_idx; ++i) {
block_descs.push_back(this->MutableBlock(blks_idx[i]));
}
op->SetBlocksAttr(attr.name, block_descs);
}
}
}
}
} }
void ProgramDesc::Description(std::string header) { void ProgramDesc::Description(std::string header) {
...@@ -60,9 +79,8 @@ void ProgramDesc::Description(std::string header) { ...@@ -60,9 +79,8 @@ void ProgramDesc::Description(std::string header) {
} }
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
LOG(kLOG_DEBUG1) << "var name: " << var_desc->Name();
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
LOG(kLOG_DEBUG1) << "var name: " << var_desc->Name();
const TensorDesc &tensor_desc = var_desc->Tensor_desc(); const TensorDesc &tensor_desc = var_desc->Tensor_desc();
LOG(kLOG_DEBUG2) << "in var tensor desc dims size: " LOG(kLOG_DEBUG2) << "in var tensor desc dims size: "
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
...@@ -31,6 +32,14 @@ class ProgramDesc { ...@@ -31,6 +32,14 @@ class ProgramDesc {
std::shared_ptr<BlockDesc> Block(size_t idx); std::shared_ptr<BlockDesc> Block(size_t idx);
BlockDesc *MutableBlock(size_t idx) {
if (idx == -1) {
return nullptr;
} else {
return blocks_[idx].get();
}
}
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; } const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
ProgramDesc(const ProgramDesc &program_desc) { ProgramDesc(const ProgramDesc &program_desc) {
for (auto &block : program_desc.blocks_) { for (auto &block : program_desc.blocks_) {
......
...@@ -32,15 +32,7 @@ class Scope { ...@@ -32,15 +32,7 @@ class Scope {
Scope() = default; Scope() = default;
~Scope() { ~Scope() {
for (auto &var : vars_) { DropKids();
delete var.second;
}
vars_.clear();
for (auto kid : kids_) {
delete kid;
}
kids_.clear();
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
delete cl_scope_; delete cl_scope_;
#endif #endif
......
...@@ -209,8 +209,9 @@ class Tensor : public TensorBase { ...@@ -209,8 +209,9 @@ class Tensor : public TensorBase {
} }
inline void set_type(std::type_index type) { holder_->set_type(type); } inline void set_type(std::type_index type) { holder_->set_type(type); }
inline void *get_data() { inline void *get_data() {
return (void *)(((PlaceholderImpl *)(holder_.get()))->ptr_.get()); return (
} // NOLINT void *)(((PlaceholderImpl *)(holder_.get()))->ptr_.get()); // NOLINT
}
inline void *init(std::type_index type) { inline void *init(std::type_index type) {
if (holder_ != nullptr) { if (holder_ != nullptr) {
......
...@@ -14,13 +14,26 @@ limitations under the License. */ ...@@ -14,13 +14,26 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "framework/tensor.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
#include "tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst); void TensorCopy(const Tensor& src, Tensor* dst);
template <typename T>
void TensorFromVector(const std::vector<T>& src, Tensor* dst);
template <typename T>
void TensorFromVector(const std::vector<T>& src, Tensor* dst) {
auto src_ptr = static_cast<const void*>(src.data());
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void*>(dst->mutable_data<T>());
auto size = src.size() * sizeof(T);
memory::Copy(dst_ptr, src_ptr, size);
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "io/api_paddle_mobile.h" #include "io/api_paddle_mobile.h"
#include <string>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "framework/tensor.h" #include "framework/tensor.h"
...@@ -145,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors( ...@@ -145,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
tensors[i].init(typeid(float)); tensors[i].init(typeid(float));
ConvertPaddleTensors(inputs[i], &tensors[i]); ConvertPaddleTensors(inputs[i], &tensors[i]);
} }
paddle_mobile_->FeedTensorData(tensors); // paddle_mobile_->FeedTensorData(tensors);
} }
template <typename Device, typename T> template <typename Device, typename T>
...@@ -169,7 +170,7 @@ void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name, ...@@ -169,7 +170,7 @@ void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name,
PaddleTensor *output) { PaddleTensor *output) {
framework::Tensor *t = paddle_mobile_->GetTensorByName(name); framework::Tensor *t = paddle_mobile_->GetTensorByName(name);
ConvertTensors(*t, output); ConvertTensors(*t, output);
}; }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::Predict_From_To(int start, int end) { void PaddleMobilePredictor<Device, T>::Predict_From_To(int start, int end) {
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "io/paddle_inference_api.h" #include "io/paddle_inference_api.h"
......
...@@ -39,8 +39,6 @@ using framework::Tensor; ...@@ -39,8 +39,6 @@ using framework::Tensor;
using paddle_mobile::CPU; using paddle_mobile::CPU;
using std::string; using std::string;
const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
static std::mutex shared_mutex; static std::mutex shared_mutex;
......
...@@ -152,14 +152,14 @@ PMStatus PaddleMobile<Device, T>::Predict() { ...@@ -152,14 +152,14 @@ PMStatus PaddleMobile<Device, T>::Predict() {
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::Tensor &input, void PaddleMobile<Device, T>::Feed(const std::string &var_name,
const std::string &var_name) { const framework::Tensor &input) {
executor_->SetInput(input, var_name); executor_->SetInput(input, var_name);
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::LoDTensor &input, void PaddleMobile<Device, T>::Feed(const std::string &var_name,
const std::string &var_name) { const framework::LoDTensor &input) {
executor_->SetInput(input, var_name); executor_->SetInput(input, var_name);
} }
...@@ -227,16 +227,11 @@ template <typename Device, typename T> ...@@ -227,16 +227,11 @@ template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) { void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) {
executor_->FeedData(t); executor_->FeedData(t);
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) { void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) {
executor_->FeedData(v); executor_->FeedData(v);
}; }
template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedTensorData(
const std::vector<framework::Tensor> &v) {
executor_->FeedTensorData(v);
};
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) { void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) {
...@@ -253,7 +248,7 @@ template <typename Device, typename T> ...@@ -253,7 +248,7 @@ template <typename Device, typename T>
framework::Tensor *PaddleMobile<Device, T>::GetTensorByName( framework::Tensor *PaddleMobile<Device, T>::GetTensorByName(
const std::string &name) { const std::string &name) {
return executor_->GetTensorByName(name); return executor_->GetTensorByName(name);
}; }
template <typename Device, typename T> template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult( std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
......
...@@ -33,7 +33,7 @@ namespace paddle_mobile { ...@@ -33,7 +33,7 @@ namespace paddle_mobile {
template <typename Device, typename T = float> template <typename Device, typename T = float>
class PaddleMobile { class PaddleMobile {
public: public:
PaddleMobile(PaddleMobileConfigInternal config) : config_(config) { explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL #ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
...@@ -69,8 +69,8 @@ class PaddleMobile { ...@@ -69,8 +69,8 @@ class PaddleMobile {
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
PMStatus Predict(); PMStatus Predict();
void Feed(const framework::LoDTensor &input, const std::string &var_name); void Feed(const std::string &var_name, const framework::LoDTensor &input);
void Feed(const framework::Tensor &input, const std::string &var_name); void Feed(const std::string &var_name, const framework::Tensor &input);
typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr; typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
LoDTensorPtr Fetch(const std::string &var_name); LoDTensorPtr Fetch(const std::string &var_name);
...@@ -91,7 +91,6 @@ class PaddleMobile { ...@@ -91,7 +91,6 @@ class PaddleMobile {
void InjectVariable(const framework::Tensor &t, std::string var_name); void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetTensorResults(std::vector<framework::Tensor *> *v); void GetTensorResults(std::vector<framework::Tensor *> *v);
......
...@@ -17,11 +17,12 @@ limitations under the License. */ ...@@ -17,11 +17,12 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \ #define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \ template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \ void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \ const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \ this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
} }
#ifdef RELU_OP #ifdef RELU_OP
......
...@@ -32,8 +32,7 @@ class BatchNormOp ...@@ -32,8 +32,7 @@ class BatchNormOp
public: public:
BatchNormOp(const string &type, const VariableNameMap &inputs, BatchNormOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, BatchNormParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, BatchNormParam<DeviceType>,
BatchNormKernel<DeviceType, T>>( BatchNormKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -11,27 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,27 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h" #ifdef BEAM_SEARCH_DECODE_OP
#include "../central-arm-func/conv_add_arm_func.h"
namespace paddle_mobile { #pragma once
namespace operators {
template <> #include "operators/beam_search_decode_op.h"
bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) {
return true;
}
template <> namespace paddle_mobile {
void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> &param) { namespace operators {
ConvAddCompute<float>(param);
}
template class ConvAddKernel<CPU, float>; template <typename Dtype, typename T>
void BeamSearchDecodeOp<Dtype, T>::InferShape() const {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(beam_search_decode, ops::BeamSearchDecodeOp);
#endif #endif
#endif // BEAM_SEARCH_DECODE_OP
...@@ -12,27 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,27 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADDPRELU_OP #ifdef BEAM_SEARCH_DECODE_OP
#include "operators/kernel/conv_add_prelu_kernel.h" #pragma once
#include "operators/kernel/central-arm-func/conv_add_prelu_arm_func.h"
#include <string>
#include "framework/operator.h"
#include "operators/kernel/beam_search_decode_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> DECLARE_OPERATOR(BeamSearchDecode, BeamSearchDecodeParam,
bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) { BeamSearchDecodeKernel);
return true;
}
template <>
void ConvAddPReluKernel<CPU, float>::Compute(
const FusionConvAddPReluParam<CPU> &param) {
ConvAddPReluCompute<float>(param);
}
template class ConvAddPReluKernel<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#endif #endif // BEAM_SEARCH_DECODE_OP
...@@ -12,27 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,27 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP #ifdef BEAM_SEARCH_OP
#include "operators/kernel/conv_add_relu_kernel.h" #pragma once
#include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h"
#include "operators/beam_search_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <typename Dtype, typename T>
bool ConvAddReluKernel<CPU, float>::Init(FusionConvAddReluParam<CPU> *param) { void BeamSearchOp<Dtype, T>::InferShape() const {}
return true;
}
template <>
void ConvAddReluKernel<CPU, float>::Compute(
const FusionConvAddReluParam<CPU> &param) {
ConvAddReluCompute<float, float>(param);
}
template class ConvAddReluKernel<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(beam_search, ops::BeamSearchOp);
#endif #endif
#endif // BEAM_SEARCH_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BEAM_SEARCH_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/beam_search_kernel.h"
namespace paddle_mobile {
namespace operators {
DECLARE_OPERATOR(BeamSearch, BeamSearchParam, BeamSearchKernel);
} // namespace operators
} // namespace paddle_mobile
#endif // BEAM_SEARCH_OP
...@@ -34,8 +34,7 @@ class BilinearOp : public framework::OperatorWithKernel< ...@@ -34,8 +34,7 @@ class BilinearOp : public framework::OperatorWithKernel<
public: public:
BilinearOp(const std::string &type, const VariableNameMap &inputs, BilinearOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, BilinearInterpParam<DeviceType>, DeviceType, BilinearInterpParam<DeviceType>,
operators::BilinearInterpKernel<DeviceType, T>>( operators::BilinearInterpKernel<DeviceType, T>>(
......
...@@ -34,8 +34,7 @@ class BoxCoderOp : public framework::OperatorWithKernel< ...@@ -34,8 +34,7 @@ class BoxCoderOp : public framework::OperatorWithKernel<
public: public:
BoxCoderOp(const std::string &type, const VariableNameMap &inputs, BoxCoderOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>,
operators::BoxCoderKernel<DeviceType, T>>( operators::BoxCoderKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,7 +31,7 @@ class CastOp : public framework::OperatorWithKernel< ...@@ -31,7 +31,7 @@ class CastOp : public framework::OperatorWithKernel<
public: public:
CastOp(const std::string &type, const VariableNameMap &inputs, CastOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, CastParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, CastParam<DeviceType>,
operators::CastKernel<DeviceType, T>>( operators::CastKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -30,7 +30,7 @@ class ConcatOp : public framework::OperatorWithKernel< ...@@ -30,7 +30,7 @@ class ConcatOp : public framework::OperatorWithKernel<
public: public:
ConcatOp(const string &type, const VariableNameMap &inputs, ConcatOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>,
operators::ConcatKernel<DeviceType, T>>( operators::ConcatKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const { ...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -30,7 +30,7 @@ class ConvOp : public framework::OperatorWithKernel< ...@@ -30,7 +30,7 @@ class ConvOp : public framework::OperatorWithKernel<
public: public:
ConvOp(const std::string &type, const VariableNameMap &inputs, ConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>( operators::ConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,8 +31,7 @@ class ConvOpTranspose : public framework::OperatorWithKernel< ...@@ -31,8 +31,7 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
public: public:
ConvOpTranspose(const std::string &type, const VariableNameMap &inputs, ConvOpTranspose(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ConvTransposeParam<DeviceType>, DeviceType, ConvTransposeParam<DeviceType>,
operators::ConvTransposeKernel<DeviceType, T>>( operators::ConvTransposeKernel<DeviceType, T>>(
......
...@@ -33,7 +33,7 @@ class CrfOp : public framework::OperatorWithKernel< ...@@ -33,7 +33,7 @@ class CrfOp : public framework::OperatorWithKernel<
public: public:
CrfOp(const std::string &type, const VariableNameMap &inputs, CrfOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>,
operators::CrfKernel<DeviceType, T>>( operators::CrfKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const { ...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -30,8 +30,7 @@ class DepthwiseConvOp : public framework::OperatorWithKernel< ...@@ -30,8 +30,7 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<
public: public:
DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs, DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>( operators::ConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -32,8 +32,7 @@ class DequantizeOp ...@@ -32,8 +32,7 @@ class DequantizeOp
public: public:
DequantizeOp(const std::string &type, const VariableNameMap &inputs, DequantizeOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, DequantizeParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, DequantizeParam<DeviceType>,
DequantizeKernel<DeviceType, T>>( DequantizeKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -34,7 +34,7 @@ class DropoutOp : public framework::OperatorWithKernel< ...@@ -34,7 +34,7 @@ class DropoutOp : public framework::OperatorWithKernel<
public: public:
DropoutOp(const std::string &type, const VariableNameMap &inputs, DropoutOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>,
operators::DropoutKernel<DeviceType, T>>( operators::DropoutKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -23,6 +23,7 @@ template <typename Dtype, typename T> ...@@ -23,6 +23,7 @@ template <typename Dtype, typename T>
void ElementwiseAddOp<Dtype, T>::InferShape() const { void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims(); auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim); this->param_.Out()->Resize(x_dim);
this->param_.Out()->set_lod(this->param_.InputX()->lod());
} }
} // namespace operators } // namespace operators
......
...@@ -32,7 +32,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<
ElementwiseAddOp(const string &type, const VariableNameMap &inputs, ElementwiseAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseAddParam<DeviceType>, DeviceType, ElementwiseAddParam<DeviceType>,
operators::ElementwiseAddKernel<DeviceType, T>>( operators::ElementwiseAddKernel<DeviceType, T>>(
......
...@@ -32,7 +32,7 @@ class ElementwiseMulOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseMulOp : public framework::OperatorWithKernel<
ElementwiseMulOp(const string &type, const VariableNameMap &inputs, ElementwiseMulOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>, DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>( operators::ElementwiseMulKernel<DeviceType, T>>(
......
...@@ -32,7 +32,7 @@ class ElementwiseSubOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseSubOp : public framework::OperatorWithKernel<
ElementwiseSubOp(const string &type, const VariableNameMap &inputs, ElementwiseSubOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>, DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>( operators::ElementwiseSubKernel<DeviceType, T>>(
......
...@@ -21,7 +21,8 @@ template <typename DeviceType, typename T> ...@@ -21,7 +21,8 @@ template <typename DeviceType, typename T>
void FeedOp<DeviceType, T>::InferShape() const { void FeedOp<DeviceType, T>::InferShape() const {
auto out_dims = this->param_.Out()->dims(); auto out_dims = this->param_.Out()->dims();
out_dims[0] = this->param_.BatchSize(); out_dims[0] = this->param_.BatchSize();
auto input_dims = this->param_.InputX()->dims(); int col = this->param_.Col();
auto input_dims = this->param_.InputX()->at(col).dims();
if (input_dims.size() == 4) { if (input_dims.size() == 4) {
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} else { } else {
......
...@@ -31,7 +31,7 @@ class FeedOp ...@@ -31,7 +31,7 @@ class FeedOp
public: public:
FeedOp(const std::string &type, const VariableNameMap &inputs, FeedOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
FeedKernel<DeviceType, T>>( FeedKernel<DeviceType, T>>(
......
...@@ -18,8 +18,9 @@ namespace operators { ...@@ -18,8 +18,9 @@ namespace operators {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
void FetchOp<DeviceType, T>::InferShape() const { void FetchOp<DeviceType, T>::InferShape() const {
int col = this->param_.Col();
auto x_dims = this->param_.InputX()->dims(); auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims); this->param_.Out()->at(col).Resize(x_dims);
} }
} // namespace operators } // namespace operators
......
...@@ -30,7 +30,7 @@ class FetchOp ...@@ -30,7 +30,7 @@ class FetchOp
public: public:
FetchOp(const string &type, const VariableNameMap &inputs, FetchOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>,
FetchKernel<DeviceType, T>>( FetchKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,11 +31,10 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> { ...@@ -31,11 +31,10 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> {
public: public:
FillConstantOp(const std::string &type, const VariableNameMap &inputs, FillConstantOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap attrs, const framework::AttributeMap attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs, : framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope), scope),
param_(inputs, outputs, attrs, scope.get()) {} param_(inputs, outputs, attrs, scope) {}
void RunImpl() { void RunImpl() {
auto data_type = auto data_type =
static_cast<_PaddleMobile__Framework__Proto__VarType__Type>( static_cast<_PaddleMobile__Framework__Proto__VarType__Type>(
......
...@@ -49,8 +49,7 @@ class FlattenOp : public framework::OperatorWithKernel< ...@@ -49,8 +49,7 @@ class FlattenOp : public framework::OperatorWithKernel<
public: public:
FlattenOp(const std::string &type, const VariableNameMap &inputs, FlattenOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>,
operators::FlattenKernel<DeviceType, T>>( operators::FlattenKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDADDPRELU_OP
#include "operators/fusion_conv_add_add_prelu_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_conv_add_add_prelu,
ops::FusionConvAddAddPReluOpMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
#endif
#endif // FUSION_CONVADDADDPRELU_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDADDPRELU_OP
#pragma once
#include <string>
#include <utility>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_add_prelu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD,
{{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; }
std::vector<std::pair<int, std::string>> NeedCheck() {
DLOG << " conv add add prelu check add X ";
return {{2, "Y"}, {2, "X"}};
}
};
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP #ifdef FUSION_CONVADDBN_OP
#include "operators/fusion_conv_add_bn_op.h" #include "operators/fusion_conv_add_bn_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddBNOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddBNOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_kernel.h" #include "operators/kernel/conv_add_bn_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -59,7 +59,7 @@ class FusionConvAddBNOp : public framework::OperatorWithKernel< ...@@ -59,7 +59,7 @@ class FusionConvAddBNOp : public framework::OperatorWithKernel<
FusionConvAddBNOp(const string &type, const VariableNameMap &inputs, FusionConvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam<DeviceType>, DeviceType, FusionConvAddBNParam<DeviceType>,
operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADDBNRELU_OP
#include "operators/fusion_conv_add_bn_relu_op.h" #include "operators/fusion_conv_add_bn_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -61,7 +61,7 @@ class FusionConvAddBNReluOp ...@@ -61,7 +61,7 @@ class FusionConvAddBNReluOp
FusionConvAddBNReluOp(const string &type, const VariableNameMap &inputs, FusionConvAddBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddBNReluParam<DeviceType>, DeviceType, FusionConvAddBNReluParam<DeviceType>,
operators::ConvAddBNReluKernel<DeviceType, T>>( operators::ConvAddBNReluKernel<DeviceType, T>>(
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
#include "operators/fusion_conv_add_op.h" #include "operators/fusion_conv_add_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -50,8 +50,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel< ...@@ -50,8 +50,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
public: public:
FusionConvAddOp(const string &type, const VariableNameMap &inputs, FusionConvAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, : framework::OperatorWithKernel<DeviceType,
FusionConvAddParam<DeviceType>, FusionConvAddParam<DeviceType>,
operators::ConvAddKernel<DeviceType, T>>( operators::ConvAddKernel<DeviceType, T>>(
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDPRELU_OP
#include "operators/fusion_conv_add_prelu_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_conv_add_prelu,
ops::FusionConvAddPReluOpMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#endif
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
#include "operators/fusion_conv_add_relu_op.h" #include "operators/fusion_conv_add_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
......
...@@ -51,7 +51,7 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< ...@@ -51,7 +51,7 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
FusionConvAddReluOp(const string &type, const VariableNameMap &inputs, FusionConvAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddReluParam<DeviceType>, DeviceType, FusionConvAddReluParam<DeviceType>,
operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNADDRELU_OP #ifdef FUSION_CONVBNADDRELU_OP
#include "operators/fusion_conv_bn_add_relu_op.h" #include "operators/fusion_conv_bn_add_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvBNAddReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvBNAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -67,7 +67,7 @@ class FusionConvBNAddReluOp ...@@ -67,7 +67,7 @@ class FusionConvBNAddReluOp
FusionConvBNAddReluOp(const string &type, const VariableNameMap &inputs, FusionConvBNAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvBNAddReluParam<DeviceType>, DeviceType, FusionConvBNAddReluParam<DeviceType>,
operators::ConvBNAddReluKernel<DeviceType, T>>( operators::ConvBNAddReluKernel<DeviceType, T>>(
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBN_OP #ifdef FUSION_CONVBN_OP
#include "operators/fusion_conv_bn_op.h" #include "operators/fusion_conv_bn_op.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -35,9 +36,9 @@ void FusionConvBNOp<Dtype, T>::InferShape() const { ...@@ -35,9 +36,9 @@ void FusionConvBNOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -56,8 +56,7 @@ class FusionConvBNOp : public framework::OperatorWithKernel< ...@@ -56,8 +56,7 @@ class FusionConvBNOp : public framework::OperatorWithKernel<
public: public:
FusionConvBNOp(const string &type, const VariableNameMap &inputs, FusionConvBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>,
operators::ConvBNKernel<DeviceType, T>>( operators::ConvBNKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP #ifdef FUSION_CONVBNRELU_OP
#include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvBNReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvBNReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -58,7 +58,7 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel< ...@@ -58,7 +58,7 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel<
FusionConvBNReluOp(const string &type, const VariableNameMap &inputs, FusionConvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam<DeviceType>, DeviceType, FusionConvBNReluParam<DeviceType>,
operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel< ...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNParam<DeviceType>, DeviceType, FusionDeconvAddBNParam<DeviceType>,
operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp ...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNReluParam<DeviceType>, DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
operators::DeconvAddBNReluKernel<DeviceType, T>>( operators::DeconvAddBNReluKernel<DeviceType, T>>(
......
...@@ -49,7 +49,7 @@ class FusionDeconvAddOp : public framework::OperatorWithKernel< ...@@ -49,7 +49,7 @@ class FusionDeconvAddOp : public framework::OperatorWithKernel<
FusionDeconvAddOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddParam<DeviceType>, DeviceType, FusionDeconvAddParam<DeviceType>,
operators::DeconvAddKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvAddKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -51,7 +51,7 @@ class FusionDeconvAddReluOp ...@@ -51,7 +51,7 @@ class FusionDeconvAddReluOp
FusionDeconvAddReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddReluParam<DeviceType>, DeviceType, FusionDeconvAddReluParam<DeviceType>,
operators::DeconvAddReluKernel<DeviceType, T>>( operators::DeconvAddReluKernel<DeviceType, T>>(
......
...@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp ...@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp
FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvBNReluParam<DeviceType>, DeviceType, FusionDeconvBNReluParam<DeviceType>,
operators::DeconvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -48,7 +48,7 @@ class FusionDeconvReluOp : public framework::OperatorWithKernel< ...@@ -48,7 +48,7 @@ class FusionDeconvReluOp : public framework::OperatorWithKernel<
FusionDeconvReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvReluParam<DeviceType>, DeviceType, FusionDeconvReluParam<DeviceType>,
operators::DeconvReluKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -60,7 +60,7 @@ class FusionDequantAddBNOp ...@@ -60,7 +60,7 @@ class FusionDequantAddBNOp
FusionDequantAddBNOp(const std::string &type, const VariableNameMap &inputs, FusionDequantAddBNOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantAddBNParam<DeviceType>, DeviceType, FusionDequantAddBNParam<DeviceType>,
operators::FusionDequantAddBNKernel<DeviceType, T>>( operators::FusionDequantAddBNKernel<DeviceType, T>>(
......
...@@ -62,7 +62,7 @@ class FusionDequantAddBNReluOp ...@@ -62,7 +62,7 @@ class FusionDequantAddBNReluOp
const VariableNameMap &inputs, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantAddBNParam<DeviceType>, DeviceType, FusionDequantAddBNParam<DeviceType>,
operators::FusionDequantAddBNReluKernel<DeviceType, T>>( operators::FusionDequantAddBNReluKernel<DeviceType, T>>(
......
...@@ -62,7 +62,7 @@ class FusionDequantAddBNReluQuantOp ...@@ -62,7 +62,7 @@ class FusionDequantAddBNReluQuantOp
const VariableNameMap &inputs, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantAddBNReluQuantParam<DeviceType>, DeviceType, FusionDequantAddBNReluQuantParam<DeviceType>,
operators::FusionDequantAddBNReluQuantKernel<DeviceType, T>>( operators::FusionDequantAddBNReluQuantKernel<DeviceType, T>>(
...@@ -109,7 +109,7 @@ class FusionDequantAddBNQuantOp ...@@ -109,7 +109,7 @@ class FusionDequantAddBNQuantOp
const VariableNameMap &inputs, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantAddBNQuantParam<DeviceType>, DeviceType, FusionDequantAddBNQuantParam<DeviceType>,
operators::FusionDequantAddBNQuantKernel<DeviceType, T>>( operators::FusionDequantAddBNQuantKernel<DeviceType, T>>(
......
...@@ -58,7 +58,7 @@ class FusionDequantBNOp : public framework::OperatorWithKernel< ...@@ -58,7 +58,7 @@ class FusionDequantBNOp : public framework::OperatorWithKernel<
FusionDequantBNOp(const std::string &type, const VariableNameMap &inputs, FusionDequantBNOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantBNParam<DeviceType>, DeviceType, FusionDequantBNParam<DeviceType>,
operators::FusionDequantBNKernel<DeviceType, T>>( operators::FusionDequantBNKernel<DeviceType, T>>(
...@@ -87,7 +87,7 @@ class FusionDequantBNReluOp ...@@ -87,7 +87,7 @@ class FusionDequantBNReluOp
FusionDequantBNReluOp(const std::string &type, const VariableNameMap &inputs, FusionDequantBNReluOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantBNParam<DeviceType>, DeviceType, FusionDequantBNParam<DeviceType>,
operators::FusionDequantBNReluKernel<DeviceType, T>>( operators::FusionDequantBNReluKernel<DeviceType, T>>(
......
...@@ -59,7 +59,7 @@ class FusionDequantBNReluOp ...@@ -59,7 +59,7 @@ class FusionDequantBNReluOp
FusionDequantBNReluOp(const std::string &type, const VariableNameMap &inputs, FusionDequantBNReluOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDequantBNReluParam<DeviceType>, DeviceType, FusionDequantBNReluParam<DeviceType>,
operators::FusionDequantBNReluKernel<DeviceType, T>>( operators::FusionDequantBNReluKernel<DeviceType, T>>(
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP #ifdef FUSION_DWCONVBNRELU_OP
#include "operators/fusion_dwconv_bn_relu_op.h" #include "operators/fusion_dwconv_bn_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionDWConvBNReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionDWConvBNReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -59,7 +59,7 @@ class FusionDWConvBNReluOp ...@@ -59,7 +59,7 @@ class FusionDWConvBNReluOp
FusionDWConvBNReluOp(const string &type, const VariableNameMap &inputs, FusionDWConvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDWConvBNReluParam<DeviceType>, DeviceType, FusionDWConvBNReluParam<DeviceType>,
operators::DWConvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::DWConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/elementwise_add_relu_kernel.h" #include "operators/kernel/elementwise_add_relu_kernel.h"
...@@ -50,7 +51,7 @@ class FusionElementwiseAddReluOp ...@@ -50,7 +51,7 @@ class FusionElementwiseAddReluOp
FusionElementwiseAddReluOp(const string &type, const VariableNameMap &inputs, FusionElementwiseAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseAddReluParam<DeviceType>, DeviceType, ElementwiseAddReluParam<DeviceType>,
operators::ElementwiseAddReluKernel<DeviceType, T>>( operators::ElementwiseAddReluKernel<DeviceType, T>>(
......
...@@ -50,8 +50,7 @@ class FusionFcOp : public framework::OperatorWithKernel< ...@@ -50,8 +50,7 @@ class FusionFcOp : public framework::OperatorWithKernel<
public: public:
FusionFcOp(const std::string &type, const VariableNameMap &inputs, FusionFcOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FusionFcParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FusionFcParam<DeviceType>,
operators::FusionFcKernel<DeviceType, T>>( operators::FusionFcKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -49,8 +49,7 @@ class FusionFcReluOp : public framework::OperatorWithKernel< ...@@ -49,8 +49,7 @@ class FusionFcReluOp : public framework::OperatorWithKernel<
public: public:
FusionFcReluOp(const string &type, const VariableNameMap &inputs, FusionFcReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionFcReluParam<DeviceType>, DeviceType, FusionFcReluParam<DeviceType>,
operators::FusionFcReluKernel<DeviceType, T>>(type, inputs, outputs, operators::FusionFcReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -33,7 +33,7 @@ class GruOp : public framework::OperatorWithKernel< ...@@ -33,7 +33,7 @@ class GruOp : public framework::OperatorWithKernel<
public: public:
GruOp(const std::string &type, const VariableNameMap &inputs, GruOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, GruParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, GruParam<DeviceType>,
operators::GruKernel<DeviceType, T>>( operators::GruKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/gru_unit_kernel.h" #include "operators/kernel/gru_unit_kernel.h"
#include "operators/op_param.h" #include "operators/op_param.h"
...@@ -30,10 +31,10 @@ class GruUnitOp : public framework::OperatorWithKernel< ...@@ -30,10 +31,10 @@ class GruUnitOp : public framework::OperatorWithKernel<
public: public:
GruUnitOp(const std::string &type, const VariableNameMap &inputs, GruUnitOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, GruUnitParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, GruUnitParam<DeviceType>,
operators::GruUnitKernel<DeviceType, T>>( operators::GruUnitKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope){}; type, inputs, outputs, attrs, scope) {}
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -31,8 +31,7 @@ class Im2SequenceOp : public framework::OperatorWithKernel< ...@@ -31,8 +31,7 @@ class Im2SequenceOp : public framework::OperatorWithKernel<
public: public:
Im2SequenceOp(const std::string &type, const VariableNameMap &inputs, Im2SequenceOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, Im2SequenceParam<DeviceType>, DeviceType, Im2SequenceParam<DeviceType>,
operators::Im2SequenceKernel<DeviceType, T>>(type, inputs, outputs, operators::Im2SequenceKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -32,8 +32,7 @@ class IncrementOp ...@@ -32,8 +32,7 @@ class IncrementOp
public: public:
IncrementOp(const string &type, const VariableNameMap &inputs, IncrementOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, IncrementParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, IncrementParam<DeviceType>,
IncrementKernel<DeviceType, T>>( IncrementKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,8 +31,7 @@ class IsEmptyOp ...@@ -31,8 +31,7 @@ class IsEmptyOp
public: public:
IsEmptyOp(const string &type, const VariableNameMap &inputs, IsEmptyOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, IsEmptyParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, IsEmptyParam<DeviceType>,
IsEmptyKernel<DeviceType, T>>( IsEmptyKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BEAM_SEARCH_DECODE_OP
#include "operators/kernel/beam_search_decode_kernel.h"
#include "framework/data_type.h"
namespace paddle_mobile {
namespace operators {
using LoDTensor = framework::LoDTensor;
using LoDTensorArray = framework::LoDTensorArray;
// all the lod have 2 levels.
// The first is source level, the second is sentence level.
// source level describe how many prefixes (branchs) for each source sentece
// (beam). sentence level describe how these candidates belong to the prefixes.
const size_t kSourceLevel = 0;
const size_t kSentenceLevel = 1;
template <typename T>
struct Sentence {
std::vector<int64_t> word_ids;
std::vector<T> scores;
};
template <typename T>
using SentenceVector = std::vector<Sentence<T>>;
template <typename T>
struct BeamSearchDecoder {
BeamSearchDecoder(size_t beam_size, int end_id)
: beam_size_(beam_size), end_id_(end_id) {}
/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
* reverse: whether ids of sentence in sentence_vector_list is reversed
* sort_by_score: whether to sort hypotheses of each sentence by scores.
*/
void ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor, bool reverse = true,
bool sort_by_score = true) const;
/**
* Gather the hypotheses for each source sentence by backtrace though the
* LoDTensorArray step_ids whose lods reserve the path in the tree.
*/
void Backtrace(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
size_t beam_size_;
int end_id_;
};
template <typename T>
void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor, bool reverse, bool sort_by_score) const {
size_t src_num = sentence_vector_list.size();
PADDLE_MOBILE_ENFORCE(src_num > 0, "src_num should be larger than 0");
std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0};
std::vector<int64_t> id_data;
std::vector<T> score_data;
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
if (sort_by_score) {
sort(sentence_vector_list[src_idx].begin(),
sentence_vector_list[src_idx].end(),
[reverse](const Sentence<T>& a, const Sentence<T>& b) {
if (reverse)
return a.scores.front() > b.scores.front();
else
return a.scores.back() > b.scores.back();
});
}
for (Sentence<T>& sentence : sentence_vector_list[src_idx]) {
if (reverse) {
id_data.insert(id_data.end(), sentence.word_ids.rbegin(),
sentence.word_ids.rend());
score_data.insert(score_data.end(), sentence.scores.rbegin(),
sentence.scores.rend());
} else {
id_data.insert(id_data.end(), sentence.word_ids.begin(),
sentence.word_ids.end());
score_data.insert(score_data.end(), sentence.scores.begin(),
sentence.scores.end());
}
sentence_level_lod.push_back(sentence_level_lod.back() +
sentence.word_ids.size());
}
source_level_lod.push_back(source_level_lod.back() +
sentence_vector_list[src_idx].size());
}
framework::LoD lod;
lod.push_back(source_level_lod);
lod.push_back(sentence_level_lod);
id_tensor->set_lod(lod);
id_tensor->Resize({static_cast<int64_t>(id_data.size())});
id_tensor->mutable_data<int64_t>();
framework::TensorFromVector<int64_t>(id_data, id_tensor);
score_tensor->set_lod(lod);
score_tensor->Resize({static_cast<int64_t>(score_data.size())});
score_tensor->mutable_data<T>();
framework::TensorFromVector<T>(score_data, score_tensor);
}
template <typename T>
void BeamSearchDecoder<T>::Backtrace(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
PADDLE_MOBILE_ENFORCE(!step_ids.empty(), "step num should be larger than 0");
PADDLE_MOBILE_ENFORCE(step_ids.size() == step_scores.size(),
"step_ids and step_scores should be the same");
const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;
std::vector<SentenceVector<T>> sentence_vector_list(
src_num, SentenceVector<T>(beam_size_));
std::vector<std::vector<size_t>> prefix_idx_vector_list(src_num);
for (int step_id = step_num - 1; step_id >= 0; --step_id) {
auto& cur_ids = step_ids.at(step_id);
auto& cur_scores = step_scores.at(step_id);
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
// for each source sentence
auto& sentence_vector = sentence_vector_list.at(src_idx);
auto& prefix_idx_vector = prefix_idx_vector_list.at(src_idx);
size_t src_prefix_start = cur_ids.lod().at(kSourceLevel)[src_idx];
size_t src_prefix_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1];
if (prefix_idx_vector.empty()) { // be finished and pruned at this step
// or the last time step
for (size_t prefix_idx = src_prefix_start; prefix_idx < src_prefix_end;
++prefix_idx) {
size_t candidate_start = cur_ids.lod().at(kSentenceLevel)[prefix_idx];
size_t candidate_end =
cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1];
for (size_t candidate_idx = candidate_start;
candidate_idx < candidate_end; ++candidate_idx) {
prefix_idx_vector.push_back(prefix_idx);
size_t idx = prefix_idx_vector.size() - 1;
auto cur_id = cur_ids.data<int64_t>()[candidate_idx];
auto cur_score = cur_scores.data<T>()[candidate_idx];
sentence_vector.at(idx).word_ids.push_back(cur_id);
sentence_vector.at(idx).scores.push_back(cur_score);
}
}
} else { // use prefix_idx_vector to backtrace
size_t src_candidate_start =
cur_ids.lod().at(kSentenceLevel)[src_prefix_start];
size_t prefix_idx = src_prefix_start;
size_t candidate_num =
cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] -
cur_ids.lod().at(kSentenceLevel)[prefix_idx];
for (size_t idx = 0; idx < prefix_idx_vector.size(); ++idx) {
auto candidate_idx = prefix_idx_vector.at(idx);
auto cur_id = cur_ids.data<int64_t>()[candidate_idx];
auto cur_score = cur_scores.data<T>()[candidate_idx];
if (cur_id != end_id_ || sentence_vector.at(idx).word_ids.empty()) {
// to skip redundant end tokens
sentence_vector.at(idx).word_ids.push_back(cur_id);
sentence_vector.at(idx).scores.push_back(cur_score);
}
while (src_candidate_start + candidate_num <=
candidate_idx) { // search the corresponding prefix
prefix_idx++;
candidate_num += cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] -
cur_ids.lod().at(kSentenceLevel)[prefix_idx];
}
prefix_idx_vector.at(idx) = prefix_idx;
}
}
}
}
ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor,
score_tensor, true, true);
}
struct BeamSearchDecodeFunctor {
BeamSearchDecodeFunctor(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor, LoDTensor* score_tensor,
size_t beam_size, int end_id)
: beam_size_(beam_size),
end_id_(end_id),
step_ids_(step_ids),
step_scores_(step_scores),
id_tensor_(id_tensor),
score_tensor_(score_tensor) {}
template <typename T>
void apply() const;
size_t beam_size_;
int end_id_;
const LoDTensorArray& step_ids_;
const LoDTensorArray& step_scores_;
LoDTensor* id_tensor_;
LoDTensor* score_tensor_;
};
template <typename T>
void BeamSearchDecodeFunctor::apply() const {
BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
beam_search_decoder.Backtrace(step_ids_, step_scores_, id_tensor_,
score_tensor_);
}
template <>
void BeamSearchDecodeFunctor::apply<bool>() const {
PADDLE_MOBILE_THROW_EXCEPTION("beam search decode op does not support bool.");
}
template <>
bool BeamSearchDecodeKernel<CPU, float>::Init(
BeamSearchDecodeParam<CPU>* param) {
return true;
}
template <>
void BeamSearchDecodeKernel<CPU, float>::Compute(
const BeamSearchDecodeParam<CPU>& param) {
const LoDTensorArray* ids = param.ids_;
const LoDTensorArray* scores = param.scores_;
const size_t step_num = ids->size();
PADDLE_MOBILE_ENFORCE(step_num > 0,
"beam search steps should be larger than 0");
for (size_t i = 0; i < step_num; ++i) {
PADDLE_MOBILE_ENFORCE(ids->at(i).lod().size() == 2,
"Level of LodTensor should be 2");
}
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_MOBILE_ENFORCE(source_num > 0, "source num should be larger than 0");
LoDTensor* sentence_ids = param.sentence_ids_;
LoDTensor* sentence_scores = param.sentence_scores_;
framework::VisitDataType(
framework::ToDataType(scores->at(0).type()),
BeamSearchDecodeFunctor(*ids, *scores, sentence_ids, sentence_scores,
param.beam_size_, param.end_id_));
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BEAM_SEARCH_OP
#include "operators/kernel/beam_search_kernel.h"
#include <numeric>
namespace paddle_mobile {
namespace operators {
template <typename Device, typename T>
class BeamSearchFunctor {
public:
void operator()(const framework::LoDTensor *pre_ids,
const framework::LoDTensor *pre_scores,
const framework::LoDTensor *ids,
const framework::LoDTensor *scores,
framework::LoDTensor *selected_ids,
framework::LoDTensor *selected_scores,
framework::Tensor *parent_idx, size_t level, size_t beam_size,
int end_id, bool is_accumulated) {
auto abs_lod = framework::ToAbsOffset(scores->lod());
auto &high_level = abs_lod[level];
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores, ids, scores, level,
beam_size, end_id, is_accumulated);
auto selected_items = ToMap(items, high_level.back());
PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id);
// calculate the output tensor's height
size_t num_instances = std::accumulate(
std::begin(selected_items), std::end(selected_items), 0,
[](size_t a, std::vector<Item> &b) { return a + b.size(); });
// the output tensor shape should be [num_instances, 1]
auto dims = framework::make_ddim(
std::vector<int64_t>({static_cast<int>(num_instances), 1}));
selected_ids->Resize(dims);
selected_scores->Resize(dims);
parent_idx->Resize({static_cast<int64_t>(num_instances)});
auto *selected_ids_data = selected_ids->mutable_data<int64_t>();
auto *selected_scores_data = selected_scores->mutable_data<float>();
auto *parent_idx_data = parent_idx->mutable_data<int>();
// fill in data
std::vector<size_t> low_level;
size_t low_offset = 0;
for (auto &items : selected_items) {
low_level.push_back(low_offset);
for (auto &item : items) {
parent_idx_data[low_offset] = static_cast<int>(low_level.size() - 1);
selected_ids_data[low_offset] = item.id;
selected_scores_data[low_offset] = item.score;
low_offset++;
}
}
low_level.push_back(low_offset);
// fill lod
framework::LoD lod(2);
lod[0].assign(high_level.begin(), high_level.end());
lod[1].assign(low_level.begin(), low_level.end());
selected_ids->set_lod(lod);
selected_scores->set_lod(lod);
}
/*
* The basic items help to sort.
*/
struct Item {
Item() {}
Item(size_t offset, size_t id, float score)
: offset(offset), id(id), score(score) {}
// offset in the higher lod level.
size_t offset;
// prefix id in the lower lod level.
// size_t prefix;
// the candidate id
size_t id;
// the corresponding score
float score;
inline bool operator<(const Item &in) const {
return (score < in.score) ||
((score == in.score) && (offset < in.offset));
}
inline void operator=(const Item &in) {
offset = in.offset;
id = in.id;
score = in.score;
}
};
protected:
/*
* Prune the source sentences all branchs finished, and it is optional.
* Pruning must one step later than finishing (thus pre_ids is needed here),
* since the end tokens must be writed out.
*/
void PruneEndBeams(const framework::LoDTensor *pre_ids,
const framework::LoD &abs_lod,
std::vector<std::vector<Item>> *items, size_t lod_level,
int end_id) {
auto *pre_ids_data = pre_ids->data<int64_t>();
auto &high_level = abs_lod[lod_level];
for (size_t src_idx = 0; src_idx < high_level.size() - 1; ++src_idx) {
size_t src_prefix_start = high_level[src_idx];
size_t src_prefix_end = high_level[src_idx + 1];
bool finish_flag = true;
for (size_t offset = src_prefix_start; offset < src_prefix_end;
offset++) {
for (auto &item : items->at(offset)) {
if (item.id != static_cast<size_t>(end_id) ||
pre_ids_data[offset] != end_id) {
finish_flag = false;
break;
}
}
if (!finish_flag) break;
}
if (finish_flag) { // all branchs of the beam (source sentence) end and
// prune this beam
for (size_t offset = src_prefix_start; offset < src_prefix_end;
offset++)
items->at(offset).clear();
}
}
}
/*
* Transform the items into a map whose key is offset, value is the items.
* NOTE low performance.
*/
std::vector<std::vector<Item>> ToMap(
const std::vector<std::vector<Item>> &items, size_t element_num) {
std::vector<std::vector<Item>> result;
result.resize(element_num);
for (auto &entries : items) {
for (const auto &item : entries) {
result[item.offset].push_back(item);
}
}
return result;
}
void Insert(std::vector<Item> *top_beam_ptr, const Item &item,
size_t beam_size) {
std::vector<Item> &top_beam = *top_beam_ptr;
size_t num_beams = top_beam.size();
if (num_beams < beam_size) {
top_beam.resize(num_beams + 1);
num_beams++;
} else {
if (item < top_beam[beam_size - 1]) {
return;
}
}
for (int k = static_cast<int>(num_beams) - 2; k >= 0; --k) {
if (top_beam[k] < item) {
top_beam[k + 1] = top_beam[k];
} else {
top_beam[k + 1] = item;
return;
}
}
top_beam[0] = item;
}
/*
* For each source, select top beam_size records.
*/
std::vector<std::vector<Item>> SelectTopBeamSizeItems(
const framework::LoDTensor *pre_ids,
const framework::LoDTensor *pre_scores, const framework::LoDTensor *ids,
const framework::LoDTensor *scores, size_t lod_level, size_t beam_size,
int end_id, bool is_accumulated) {
std::vector<std::vector<Item>> result;
// find the current candidates
auto abs_lod = framework::ToAbsOffset(scores->lod());
auto *pre_ids_data = pre_ids->data<int64_t>();
auto *pre_scores_data = pre_scores->data<float>();
auto *ids_data = ids ? ids->data<int64_t>() : nullptr;
auto *scores_data = scores->data<float>();
size_t num_seqs = scores->NumElements(lod_level);
size_t seq_width = 1;
for (int i = 1; i < scores->dims().size(); i++) {
seq_width *= scores->dims()[i];
}
for (size_t seq_id = 0; seq_id < num_seqs; ++seq_id) {
size_t seq_offset_start = abs_lod[lod_level][seq_id];
size_t seq_offset_end = abs_lod[lod_level][seq_id + 1];
std::vector<Item> top_beam;
top_beam.reserve(beam_size);
for (size_t offset = seq_offset_start; offset < seq_offset_end;
++offset) {
auto pre_id = pre_ids_data[offset];
auto pre_score = pre_scores_data[offset];
if (pre_id == end_id) {
// Allocate all probability mass to end_id for finished branchs and
// the other candidate ids can be ignored.
Item item(offset, end_id, pre_score);
Insert(&top_beam, item, beam_size);
} else {
size_t index = offset * seq_width;
for (size_t d = 0; d < seq_width; d++, index++) {
int64_t id = ids_data ? ids_data[index] : static_cast<int64_t>(d);
float score = is_accumulated
? scores_data[index]
: pre_score + std::log(scores_data[index]);
Item item(offset, id, score);
Insert(&top_beam, item, beam_size);
}
}
}
result.emplace_back(top_beam);
}
return result;
}
};
template <>
bool BeamSearchKernel<CPU, float>::Init(BeamSearchParam<CPU> *param) {
return true;
}
template <>
void BeamSearchKernel<CPU, float>::Compute(const BeamSearchParam<CPU> &param) {
BeamSearchFunctor<CPU, float> alg;
alg(param.pre_ids_, param.pre_scores_, param.ids_, param.scores_,
param.selected_ids_, param.selected_scores_, param.parent_idx_,
param.level_, param.beam_size_, param.end_id_, param.is_accumulated_);
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -16,7 +16,9 @@ limitations under the License. */ ...@@ -16,7 +16,9 @@ limitations under the License. */
#include "operators/kernel/conv_add_bn_relu_kernel.h" #include "operators/kernel/conv_add_bn_relu_kernel.h"
#include <cmath> #include <cmath>
#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h" #include "operators/kernel/arm/convolution/conv_common.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
#include "operators/math/channel_wise.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -43,9 +45,9 @@ bool ConvAddBNReluKernel<CPU, float>::Init( ...@@ -43,9 +45,9 @@ bool ConvAddBNReluKernel<CPU, float>::Init(
} }
// Tensor *new_scale = new Tensor(); // Tensor *new_scale = new Tensor();
// Tensor *new_bias = new Tensor(); // Tensor *new_bias = new Tensor();
auto *new_scale = param->CreateNewScale<framework::LoDTensor>();
auto *new_bias = param->CreateNewBiase<framework::LoDTensor>();
Tensor *new_scale = param->CreateNewScale<Tensor>();
Tensor *new_bias = param->CreateNewBiase<Tensor>();
auto new_scale_ptr = new_scale->mutable_data<float>({C}); auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C}); auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) { for (int i = 0; i < C; i++) {
...@@ -54,14 +56,36 @@ bool ConvAddBNReluKernel<CPU, float>::Init( ...@@ -54,14 +56,36 @@ bool ConvAddBNReluKernel<CPU, float>::Init(
} }
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
InitBaseConvKernel(param);
return true; return true;
} }
template <> template <>
void ConvAddBNReluKernel<CPU, float>::Compute( void ConvAddBNReluKernel<CPU, float>::Compute(
const FusionConvAddBNReluParam<CPU> &param) { const FusionConvAddBNReluParam<CPU> &param) {
ConvAddBNReluCompute<float>(param); switch (param.ExecMode()) {
case ConvParam<CPU>::EXEC_DEPTHWISE3x3S1_FLOAT:
case ConvParam<CPU>::EXEC_DEPTHWISE3x3S2_FLOAT:
DepthwiseConv3x3<float, float>(param);
break;
case ConvParam<CPU>::EXEC_DEPTHWISE5x5_FLOAT:
DepthwiseConv5x5<float, float>(param);
break;
case ConvParam<CPU>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<8, 3>(param);
break;
case ConvParam<CPU>::EXEC_GEMM_FLOAT:
GemmConv<float, float>(param);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
math::ScaleAddChannelWise<RELU>(param.Output(), param.NewScale(),
param.NewBias(), param.Output());
} }
template class ConvAddBNReluKernel<CPU, float>; template class ConvAddBNReluKernel<CPU, float>;
} // namespace operators } // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#include "operators/kernel/arm/convolution/conv_common.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
#include "operators/math/channel_wise.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) {
InitBaseConvKernel(param);
return true;
}
template <>
void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> &param) {
switch (param.ExecMode()) {
case ConvParam<CPU>::EXEC_DEPTHWISE3x3S1_FLOAT:
break;
case ConvParam<CPU>::EXEC_DEPTHWISE3x3S2_FLOAT:
math::DepthwiseConv3x3S2<float, float>(*param.Input(), *param.Filter(),
param.Paddings(), param.Output());
break;
case ConvParam<CPU>::EXEC_DEPTHWISE5x5_FLOAT:
DepthwiseConv5x5<float, float>(param);
break;
case ConvParam<CPU>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<8, 3>(param);
break;
case ConvParam<CPU>::EXEC_GEMM_FLOAT:
GemmConv<float, float>(param);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
math::AddChannelWise<IDENTITY>(param.Output(), param.Bias(), param.Output());
}
template class ConvAddKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册