提交 397bb881 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'upstream/develop' into develop

[submodule "src/operators/kernel/mali/ACL_Android"]
path = src/operators/kernel/mali/ACL_Android
url = https://github.com/halsay/ACL_Android.git
...@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON) ...@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON)
# select the platform to build # select the platform to build
option(CPU "build with arm CPU support" ON) option(CPU "build with arm CPU support" ON)
option(GPU_MALI "build with arm mali GPU support" OFF)
option(GPU_CL "build with OpenCL support" OFF) option(GPU_CL "build with OpenCL support" OFF)
option(FPGA "build with FPGA support" OFF) option(FPGA "build with FPGA support" OFF)
if(FPGA) if(FPGA)
...@@ -23,7 +22,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) ...@@ -23,7 +22,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/) include_directories(src/)
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
if(IS_IOS) if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \ set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
...@@ -97,31 +96,6 @@ else() ...@@ -97,31 +96,6 @@ else()
endforeach() endforeach()
endif() endif()
if (GPU_MALI)
add_definitions(-DPADDLE_MOBILE_MALI_GPU)
add_definitions(-DUSE_ACL=1)
add_definitions(-DUSE_OPENCL)
set(ACL_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/ACL_Android)
include_directories(${ACL_ROOT} ${ACL_ROOT}/include)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_core")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_graph")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build/opencl-1.2-stubs")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ACL=1")
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/mali/*.cpp src/operators/kernel/mali/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/mali/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(FPGA) if(FPGA)
add_definitions(-DPADDLE_MOBILE_FPGA) add_definitions(-DPADDLE_MOBILE_FPGA)
file(GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc) file(GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc)
...@@ -213,7 +187,7 @@ else() ...@@ -213,7 +187,7 @@ else()
set(NET "default" CACHE STRING "select net type") set(NET "default" CACHE STRING "select net type")
endif() endif()
set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGA_NET_V1" "FPGA_NET_V2" "NLP") set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGA_NET_V1" "FPGA_NET_V2" "NLP" "op")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
# build library # build library
......
...@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake . ...@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake .
DEBUGING ON DEBUGING ON
FPGA OFF FPGA OFF
LOG_PROFILE ON LOG_PROFILE ON
MALI_GPU OFF
NET googlenet NET googlenet
USE_EXCEPTION ON USE_EXCEPTION ON
USE_OPENMP OFF USE_OPENMP OFF
......
...@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d); ...@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d);
REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp); REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(conv2d);
REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(conv2d); USE_OP_FPGA(conv2d);
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
......
...@@ -31,7 +31,8 @@ namespace paddle_mobile { ...@@ -31,7 +31,8 @@ namespace paddle_mobile {
#ifdef ANDROID #ifdef ANDROID
extern const char *ANDROID_LOG_TAG; static const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
#define ANDROIDLOGI(...) \ #define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \ __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
......
...@@ -37,8 +37,7 @@ template <typename Dtype> ...@@ -37,8 +37,7 @@ template <typename Dtype>
using OpCreator = std::function<framework::OperatorBase<Dtype> *( using OpCreator = std::function<framework::OperatorBase<Dtype> *(
const std::string & /*type*/, const VariableNameMap & /*inputs*/, const std::string & /*type*/, const VariableNameMap & /*inputs*/,
const VariableNameMap & /*outputs*/, const VariableNameMap & /*outputs*/,
const framework::AttributeMap & /*attrs*/, const framework::AttributeMap & /*attrs*/, framework::Scope * /*scope*/)>;
std::shared_ptr<framework::Scope> /*scope*/)>;
using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/, using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
framework::BlockDesc * /*block*/)>; framework::BlockDesc * /*block*/)>;
......
文件模式从 100755 更改为 100644
...@@ -205,6 +205,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; ...@@ -205,6 +205,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_PAD2D;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key; op_input_output_key;
......
...@@ -70,10 +70,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { ...@@ -70,10 +70,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
DLOG << "Wrong ofm dimension"; DLOG << "Wrong ofm dimension";
} }
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half); ofm_tensor->fpga_data_num = memory_size / sizeof(half);
fpga::fpga_flush(p, memory_size);
} }
void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
...@@ -89,10 +90,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { ...@@ -89,10 +90,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
DLOG << "Wrong ofm dimension"; DLOG << "Wrong ofm dimension";
} }
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half); ofm_tensor->fpga_data_num = memory_size / sizeof(half);
fpga::fpga_flush(p, memory_size);
} }
void format_fp32_ofm(framework::Tensor *ofm_tensor) { void format_fp32_ofm(framework::Tensor *ofm_tensor) {
...@@ -108,10 +110,11 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) { ...@@ -108,10 +110,11 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) {
DLOG << "Wrong ofm dimension"; DLOG << "Wrong ofm dimension";
} }
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(float)); ofm_tensor->set_type(typeid(float));
ofm_tensor->fpga_data_num = memory_size / sizeof(float); ofm_tensor->fpga_data_num = memory_size / sizeof(float);
fpga::fpga_flush(p, memory_size);
} }
float filter_find_max(framework::Tensor *filter_tensor) { float filter_find_max(framework::Tensor *filter_tensor) {
...@@ -463,9 +466,24 @@ void expand_EW_arg(EWAddArgs *arg) { ...@@ -463,9 +466,24 @@ void expand_EW_arg(EWAddArgs *arg) {
uint64_t image_amount_per_row = uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels, align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels,
IMAGE_ALIGNMENT); IMAGE_ALIGNMENT);
uint64_t image_image_pixel = ((uint64_t)args.image0.channels << 32) | //////////////////////////////////////////////////////////
// temporary modify for EW and DMA problem
uint64_t image_image_pixel = 0;
if ((args.image0.width * args.image0.channels) >= 24576) {
if ((args.image0.width * args.image0.channels) % 32 != 0) {
DLOG << "EW parameter can not be support";
} else {
image_amount_per_row = image_amount_per_row / 2;
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)(args.image0.width / 2) << 16) |
(uint64_t)(args.image0.height * 2);
}
} else {
image_image_pixel = ((uint64_t)args.image0.channels << 32) |
((uint64_t)args.image0.width << 16) | ((uint64_t)args.image0.width << 16) |
(uint64_t)args.image0.height; (uint64_t)args.image0.height;
}
//////////////////////////////////////////////////////////
(*arg).driver.image0_address_phy = image0_address_phy; (*arg).driver.image0_address_phy = image0_address_phy;
(*arg).driver.image1_address_phy = image1_address_phy; (*arg).driver.image1_address_phy = image1_address_phy;
...@@ -560,6 +578,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -560,6 +578,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
reinterpret_cast<char *>(arg->conv_arg[i].filter_address), deleter)); reinterpret_cast<char *>(arg->conv_arg[i].filter_address), deleter));
memcpy(arg->conv_arg[i].filter_address, filter_head, filter_size); memcpy(arg->conv_arg[i].filter_address, filter_head, filter_size);
fpga_flush(arg->conv_arg[i].filter_address, filter_size); fpga_flush(arg->conv_arg[i].filter_address, filter_size);
// for test
// {
// static int cnt = 0;
// if(cnt == 4){
// int8_t result = 0;
// std::string str = "fc_filter";
// fpga::savefile<int8_t>(str, arg->conv_arg[i].filter_address,
// filter_size, result);
//
// }
// cnt++;
//}
size_t bs_size = 2 * size_t bs_size = 2 *
align_to_x(arg->conv_arg[i].filter_num, BS_NUM_ALIGNMENT) * align_to_x(arg->conv_arg[i].filter_num, BS_NUM_ALIGNMENT) *
...@@ -570,6 +600,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -570,6 +600,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
reinterpret_cast<char *>(arg->conv_arg[i].sb_address), deleter)); reinterpret_cast<char *>(arg->conv_arg[i].sb_address), deleter));
memcpy(arg->conv_arg[i].sb_address, bs_head, bs_size); memcpy(arg->conv_arg[i].sb_address, bs_head, bs_size);
fpga_flush(arg->conv_arg[i].sb_address, bs_size); fpga_flush(arg->conv_arg[i].sb_address, bs_size);
// for test
/*{
static int cnt = 0;
if(cnt == 4){
float result = 0;
std::string str = "fc_bs";
fpga::savefile<float>(str, arg->conv_arg[i].sb_address, bs_size/4,
result);
}
cnt++;
}*/
if (n > 1) { if (n > 1) {
arg->conv_arg[i].output.scale_address = arg->conv_arg[i].output.scale_address =
......
...@@ -268,6 +268,7 @@ void format_fc_filter(float **data_in, int num, int channel, int height, ...@@ -268,6 +268,7 @@ void format_fc_filter(float **data_in, int num, int channel, int height,
quantize(data_in, data_size, max); quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT char **quantize_data = (char **)data_in; // NOLINT
convert_fc_filter(quantize_data, num, chw); convert_fc_filter(quantize_data, num, chw);
convert_to_hwc(quantize_data, num, channel, height, width);
align_element(quantize_data, num, chw); align_element(quantize_data, num, chw);
if (num_after_alignment != num) { if (num_after_alignment != num) {
align_num(quantize_data, num_per_div_before_alignment, num, chw); align_num(quantize_data, num_per_div_before_alignment, num, chw);
......
...@@ -91,7 +91,14 @@ class Attribute { ...@@ -91,7 +91,14 @@ class Attribute {
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: {
attr.Set<int>(attr_desc->block_idx); break;
}
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS: {
vector<int> val(attr_desc->n_longs);
for (int i = 0; i < attr_desc->n_longs; ++i) {
val[i] = attr_desc->longs[i];
}
attr.Set<vector<int>>(val);
break; break;
} }
default: default:
...@@ -139,6 +146,14 @@ class Attribute { ...@@ -139,6 +146,14 @@ class Attribute {
return vistor(attr.variant_.Get<vector<bool>>()); return vistor(attr.variant_.Get<vector<bool>>());
} else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) { } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
return vistor(attr.variant_.Get<int64_t>()); return vistor(attr.variant_.Get<int64_t>());
} else if (attr.variant_.TypeId() ==
typeid(framework::BlockDesc *).hash_code()) {
return vistor(attr.variant_.Get<framework::BlockDesc *>());
} else if (attr.variant_.TypeId() ==
typeid(vector<framework::BlockDesc *>).hash_code()) {
return vistor(attr.variant_.Get<vector<framework::BlockDesc *>>());
} else if (attr.variant_.TypeId() == typeid(vector<int64_t>).hash_code()) {
return vistor(attr.variant_.Get<vector<int64_t>>());
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("type not support"); PADDLE_MOBILE_THROW_EXCEPTION("type not support");
} }
...@@ -146,7 +161,8 @@ class Attribute { ...@@ -146,7 +161,8 @@ class Attribute {
private: private:
Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool, Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
vector<bool>, BlockDesc *, int64_t> vector<bool>, BlockDesc *, vector<BlockDesc *>, int64_t,
vector<int64_t>>
variant_; variant_;
}; };
......
...@@ -27,9 +27,9 @@ bool CLEngine::Init() { ...@@ -27,9 +27,9 @@ bool CLEngine::Init() {
return true; return true;
} }
cl_int status; cl_int status;
SetPlatform(); bool is_setplatform_success = SetPlatform();
SetClDeviceId(); bool is_setcldeviceid_success = SetClDeviceId();
is_init_success_ = is_setplatform_success && is_setcldeviceid_success;
initialized_ = true; initialized_ = true;
return initialized_; return initialized_;
// setClCommandQueue(); // setClCommandQueue();
...@@ -44,11 +44,14 @@ CLEngine *CLEngine::Instance() { ...@@ -44,11 +44,14 @@ CLEngine *CLEngine::Instance() {
return &cl_engine_; return &cl_engine_;
} }
bool CLEngine::isInitSuccess() { return is_init_success_; }
bool CLEngine::SetPlatform() { bool CLEngine::SetPlatform() {
platform_ = NULL; // the chosen platform platform_ = NULL; // the chosen platform
cl_uint numPlatforms; // the NO. of platforms cl_uint numPlatforms; // the NO. of platforms
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
return false;
}
/**For clarity, choose the first available platform. */ /**For clarity, choose the first available platform. */
if (numPlatforms > 0) { if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>( cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
...@@ -56,10 +59,10 @@ bool CLEngine::SetPlatform() { ...@@ -56,10 +59,10 @@ bool CLEngine::SetPlatform() {
status = clGetPlatformIDs(numPlatforms, platforms, NULL); status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform_ = platforms[0]; platform_ = platforms[0];
free(platforms); free(platforms);
return true; return status == CL_SUCCESS;
} else {
return false;
} }
return false;
} }
bool CLEngine::SetClDeviceId() { bool CLEngine::SetClDeviceId() {
...@@ -67,13 +70,15 @@ bool CLEngine::SetClDeviceId() { ...@@ -67,13 +70,15 @@ bool CLEngine::SetClDeviceId() {
devices_ = NULL; devices_ = NULL;
cl_int status = cl_int status =
clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (status != CL_SUCCESS) {
return false;
}
if (numDevices > 0) { if (numDevices > 0) {
devices_ = reinterpret_cast<cl_device_id *>( devices_ = reinterpret_cast<cl_device_id *>(
malloc(numDevices * sizeof(cl_device_id))); malloc(numDevices * sizeof(cl_device_id)));
status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_, status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_,
NULL); NULL);
return true; return status == CL_SUCCESS;
} }
return false; return false;
} }
......
...@@ -31,7 +31,7 @@ class CLEngine { ...@@ -31,7 +31,7 @@ class CLEngine {
static CLEngine *Instance(); static CLEngine *Instance();
bool Init(); bool Init();
bool isInitSuccess();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() { std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
cl_int status; cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status); cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
...@@ -51,6 +51,20 @@ class CLEngine { ...@@ -51,6 +51,20 @@ class CLEngine {
return std::move(command_queue_ptr); return std::move(command_queue_ptr);
} }
cl_context getContext() {
if (context_ == nullptr) {
context_ = CreateContext();
}
return context_.get();
}
cl_command_queue getClCommandQueue() {
if (command_queue_ == nullptr) {
command_queue_ = CreateClCommandQueue(getContext());
}
return command_queue_.get();
}
std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith( std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(
cl_context context, std::string file_name) { cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb"); FILE *file = fopen(file_name.c_str(), "rb");
...@@ -137,6 +151,11 @@ class CLEngine { ...@@ -137,6 +151,11 @@ class CLEngine {
std::string cl_path_; std::string cl_path_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_; std::unique_ptr<_cl_program, CLProgramDeleter> program_;
std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ =
nullptr;
// bool SetClContext(); // bool SetClContext();
// bool SetClCommandQueue(); // bool SetClCommandQueue();
...@@ -144,6 +163,7 @@ class CLEngine { ...@@ -144,6 +163,7 @@ class CLEngine {
// bool LoadKernelFromFile(const char *kernel_file); // bool LoadKernelFromFile(const char *kernel_file);
// bool BuildProgram(); // bool BuildProgram();
bool is_init_success_ = false;
}; };
} // namespace framework } // namespace framework
......
...@@ -29,12 +29,12 @@ namespace framework { ...@@ -29,12 +29,12 @@ namespace framework {
class CLScope { class CLScope {
public: public:
CLScope() { CLScope() {
CLEngine *engin = CLEngine::Instance(); CLEngine *engine = CLEngine::Instance();
context_ = engin->CreateContext(); context_ = engine->getContext();
command_queue_ = engin->CreateClCommandQueue(context_.get()); command_queue_ = engine->getClCommandQueue();
} }
cl_command_queue CommandQueue() { return command_queue_.get(); } cl_command_queue CommandQueue() { return command_queue_; }
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) { const std::string &kernel_name, const std::string &file_name) {
...@@ -49,7 +49,7 @@ class CLScope { ...@@ -49,7 +49,7 @@ class CLScope {
return std::move(kernel); return std::move(kernel);
} }
cl_context Context() { return context_.get(); } cl_context Context() { return context_; }
cl_program Program(const std::string &file_name) { cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name); auto it = programs_.find(file_name);
...@@ -58,7 +58,7 @@ class CLScope { ...@@ -58,7 +58,7 @@ class CLScope {
} }
auto program = CLEngine::Instance()->CreateProgramWith( auto program = CLEngine::Instance()->CreateProgramWith(
context_.get(), context_,
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name); CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
DLOG << " --- begin build program -> " << file_name << " --- "; DLOG << " --- begin build program -> " << file_name << " --- ";
...@@ -72,8 +72,8 @@ class CLScope { ...@@ -72,8 +72,8 @@ class CLScope {
private: private:
cl_int status_; cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_; cl_context context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_; cl_command_queue command_queue_;
std::unordered_map<std::string, std::unordered_map<std::string,
std::unique_ptr<_cl_program, CLProgramDeleter>> std::unique_ptr<_cl_program, CLProgramDeleter>>
programs_; programs_;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if _OPENMP
#include <omp.h>
#endif
#define MOBILE_MAX_CPU_NUM 8
namespace paddle_mobile {
namespace framework {
struct CPUContext {
private:
CPUContext() : num_cpus(4), num_threads(1) {
// TODO(hjchen2)
for (int i = 0; i < num_cpus; ++i) {
cpu_frequencies[i] = 2400; // 2400 MHz
max_cpu_frequencies[i] = 2400; // 2400 MHz
}
// L1_cache = 32000; // 32K
L1_cache = 32 * 1024;
L2_cache = 2000000; // 2M
// L2_cache = 512000;
}
public:
void set_num_threads(int threads) {
#if _ONENMP
omp_set_num_threads(threads);
if (threads <= omp_get_max_threads()) {
num_threads = threads;
} else {
num_threads = omp_get_max_threads();
}
#endif
num_threads = (num_threads > 1) ? num_threads : 1;
}
virtual ~CPUContext() {}
public:
static CPUContext* Context() {
static CPUContext* ctx = new CPUContext;
return ctx;
}
int num_cpus;
int num_threads;
int cpu_frequencies[MOBILE_MAX_CPU_NUM];
int max_cpu_frequencies[MOBILE_MAX_CPU_NUM];
int L1_cache;
int L2_cache;
};
inline void set_global_num_threads(int threads) {
CPUContext::Context()->set_num_threads(threads);
}
inline int get_global_num_threads() {
return CPUContext::Context()->num_threads;
}
} // namespace framework
} // namespace paddle_mobile
...@@ -42,6 +42,7 @@ inline DataLayout StringToDataLayout(const std::string &str) { ...@@ -42,6 +42,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str()) PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
} }
return DataLayout::kNCHW;
} }
inline std::string DataLayoutToString(const DataLayout &data_layout) { inline std::string DataLayoutToString(const DataLayout &data_layout) {
......
...@@ -82,6 +82,8 @@ struct Dim<0> { ...@@ -82,6 +82,8 @@ struct Dim<0> {
int64_t &operator[](int idx); int64_t &operator[](int idx);
int64_t operator[](int idx) const; int64_t operator[](int idx) const;
int64_t head;
}; };
namespace { namespace {
...@@ -131,6 +133,7 @@ int64_t &indexer(Dim<D> &dim, int idx) { ...@@ -131,6 +133,7 @@ int64_t &indexer(Dim<D> &dim, int idx) {
template <> template <>
int64_t &indexer<0>(Dim<0> &dim, int idx) { int64_t &indexer<0>(Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
return dim.head;
} }
template <int D> template <int D>
...@@ -147,6 +150,7 @@ int64_t indexer(const Dim<D> &dim, int idx) { ...@@ -147,6 +150,7 @@ int64_t indexer(const Dim<D> &dim, int idx) {
template <> template <>
int64_t indexer<0>(const Dim<0> &dim, int idx) { int64_t indexer<0>(const Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
return dim.head;
} }
} // namespace } // namespace
......
此差异已折叠。
...@@ -36,6 +36,8 @@ class Executor { ...@@ -36,6 +36,8 @@ class Executor {
paddle_mobile::PaddleMobileConfigInternal config, int batch_size = 1, paddle_mobile::PaddleMobileConfigInternal config, int batch_size = 1,
const bool use_optimize = true, const bool lod_mode = false); const bool use_optimize = true, const bool lod_mode = false);
void SetThreadNum(int threads);
PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs); PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs);
PMStatus Predict( PMStatus Predict(
const std::vector<std::pair<std::string, LoDTensor>> &inputs); const std::vector<std::pair<std::string, LoDTensor>> &inputs);
...@@ -49,16 +51,15 @@ class Executor { ...@@ -49,16 +51,15 @@ class Executor {
std::shared_ptr<LoDTensor> GetOutput(const std::string &var_name); std::shared_ptr<LoDTensor> GetOutput(const std::string &var_name);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetTensorResults(std::vector<framework::Tensor *> *v);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InjectVariable(const Tensor &t, std::string var_name); void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t); void FeedData(const Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetTensorResults(std::vector<framework::Tensor *> *v);
framework::Tensor *GetTensorByName(const std::string &name); framework::Tensor *GetTensorByName(const std::string &name);
std::shared_ptr<Tensor> FetchResult(int id = -1); std::shared_ptr<Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
...@@ -68,8 +69,9 @@ class Executor { ...@@ -68,8 +69,9 @@ class Executor {
protected: protected:
Executor() = default; Executor() = default;
bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc, Variable *var, bool varInputMemory(const std::shared_ptr<VarDesc> &var_desc,
LoDTensor *tensor) const; Variable *var) const;
void InitFeedFetchList();
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void InitNoPersistableMemory(const Tensor &input_tensor); void InitNoPersistableMemory(const Tensor &input_tensor);
...@@ -85,10 +87,9 @@ class Executor { ...@@ -85,10 +87,9 @@ class Executor {
PaddleMobileConfigInternal config_; PaddleMobileConfigInternal config_;
Program<Device> program_; Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_; std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr; std::vector<std::shared_ptr<OperatorBase<Device>>> ops_of_block0_;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_; std::unordered_map<std::string, int> feed_indices_;
// operators list std::unordered_map<std::string, int> fetch_indices_;
std::vector<OperatorBasePtr> ops_list_;
// for super resoltion // for super resoltion
DDim input_dim_last_; DDim input_dim_last_;
......
...@@ -13,13 +13,6 @@ void paddle_mobile__framework__proto__version__init( ...@@ -13,13 +13,6 @@ void paddle_mobile__framework__proto__version__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VERSION__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__version__get_packed_size(
const PaddleMobile__Framework__Proto__Version *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__version__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__Version * PaddleMobile__Framework__Proto__Version *
paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -54,13 +47,6 @@ void paddle_mobile__framework__proto__op_desc__init( ...@@ -54,13 +47,6 @@ void paddle_mobile__framework__proto__op_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
const PaddleMobile__Framework__Proto__OpDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__op_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__OpDesc * PaddleMobile__Framework__Proto__OpDesc *
paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -95,13 +81,6 @@ void paddle_mobile__framework__proto__op_proto__init( ...@@ -95,13 +81,6 @@ void paddle_mobile__framework__proto__op_proto__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
const PaddleMobile__Framework__Proto__OpProto *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__op_proto__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__OpProto * PaddleMobile__Framework__Proto__OpProto *
paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -162,13 +141,6 @@ void paddle_mobile__framework__proto__var_type__init( ...@@ -162,13 +141,6 @@ void paddle_mobile__framework__proto__var_type__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__var_type__get_packed_size(
const PaddleMobile__Framework__Proto__VarType *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__var_type__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__VarType * PaddleMobile__Framework__Proto__VarType *
paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -191,13 +163,6 @@ void paddle_mobile__framework__proto__var_desc__init( ...@@ -191,13 +163,6 @@ void paddle_mobile__framework__proto__var_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
const PaddleMobile__Framework__Proto__VarDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__var_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__VarDesc * PaddleMobile__Framework__Proto__VarDesc *
paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -220,13 +185,6 @@ void paddle_mobile__framework__proto__block_desc__init( ...@@ -220,13 +185,6 @@ void paddle_mobile__framework__proto__block_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
const PaddleMobile__Framework__Proto__BlockDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__block_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__BlockDesc * PaddleMobile__Framework__Proto__BlockDesc *
paddle_mobile__framework__proto__block_desc__unpack( paddle_mobile__framework__proto__block_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
...@@ -248,13 +206,6 @@ void paddle_mobile__framework__proto__program_desc__init( ...@@ -248,13 +206,6 @@ void paddle_mobile__framework__proto__program_desc__init(
PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT; PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT;
*message = init_value; *message = init_value;
} }
size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
const PaddleMobile__Framework__Proto__ProgramDesc *message) {
assert(message->base.descriptor ==
&paddle_mobile__framework__proto__program_desc__descriptor);
return protobuf_c_message_get_packed_size(
(const ProtobufCMessage *)(message));
}
PaddleMobile__Framework__Proto__ProgramDesc * PaddleMobile__Framework__Proto__ProgramDesc *
paddle_mobile__framework__proto__program_desc__unpack( paddle_mobile__framework__proto__program_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { ProtobufCAllocator *allocator, size_t len, const uint8_t *data) {
...@@ -310,7 +261,7 @@ const ProtobufCMessageDescriptor ...@@ -310,7 +261,7 @@ const ProtobufCMessageDescriptor
NULL /* reserved[123] */ NULL /* reserved[123] */
}; };
static const ProtobufCFieldDescriptor static const ProtobufCFieldDescriptor
paddle_mobile__framework__proto__op_desc__attr__field_descriptors[13] = { paddle_mobile__framework__proto__op_desc__attr__field_descriptors[14] = {
{ {
"name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING,
0, /* quantifier_offset */ 0, /* quantifier_offset */
...@@ -405,6 +356,13 @@ static const ProtobufCFieldDescriptor ...@@ -405,6 +356,13 @@ static const ProtobufCFieldDescriptor
NULL, NULL, 0, /* flags */ NULL, NULL, 0, /* flags */
0, NULL, NULL /* reserved1,reserved2, etc */ 0, NULL, NULL /* reserved1,reserved2, etc */
}, },
{
"longs", 15, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT64,
offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_longs),
offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, longs), NULL,
NULL, 0, /* flags */
0, NULL, NULL /* reserved1,reserved2, etc */
},
}; };
static const unsigned static const unsigned
paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = { paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = {
...@@ -417,6 +375,7 @@ static const unsigned ...@@ -417,6 +375,7 @@ static const unsigned
2, /* field[2] = i */ 2, /* field[2] = i */
5, /* field[5] = ints */ 5, /* field[5] = ints */
11, /* field[11] = l */ 11, /* field[11] = l */
13, /* field[13] = longs */
0, /* field[0] = name */ 0, /* field[0] = name */
4, /* field[4] = s */ 4, /* field[4] = s */
7, /* field[7] = strings */ 7, /* field[7] = strings */
...@@ -424,7 +383,7 @@ static const unsigned ...@@ -424,7 +383,7 @@ static const unsigned
}; };
static const ProtobufCIntRange static const ProtobufCIntRange
paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = { paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = {
{1, 0}, {10, 8}, {0, 13}}; {1, 0}, {10, 8}, {0, 14}};
const ProtobufCMessageDescriptor const ProtobufCMessageDescriptor
paddle_mobile__framework__proto__op_desc__attr__descriptor = { paddle_mobile__framework__proto__op_desc__attr__descriptor = {
PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
...@@ -433,7 +392,7 @@ const ProtobufCMessageDescriptor ...@@ -433,7 +392,7 @@ const ProtobufCMessageDescriptor
"PaddleMobile__Framework__Proto__OpDesc__Attr", "PaddleMobile__Framework__Proto__OpDesc__Attr",
"paddle_mobile.framework.proto", "paddle_mobile.framework.proto",
sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr), sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr),
13, 14,
paddle_mobile__framework__proto__op_desc__attr__field_descriptors, paddle_mobile__framework__proto__op_desc__attr__field_descriptors,
paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name, paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name,
2, 2,
...@@ -1448,7 +1407,7 @@ const ProtobufCMessageDescriptor ...@@ -1448,7 +1407,7 @@ const ProtobufCMessageDescriptor
NULL /* reserved[123] */ NULL /* reserved[123] */
}; };
static const ProtobufCEnumValue static const ProtobufCEnumValue
paddle_mobile__framework__proto__attr_type__enum_values_by_number[11] = { paddle_mobile__framework__proto__attr_type__enum_values_by_number[12] = {
{"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0}, {"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0},
{"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1}, {"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1},
{"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2}, {"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2},
...@@ -1460,15 +1419,16 @@ static const ProtobufCEnumValue ...@@ -1460,15 +1419,16 @@ static const ProtobufCEnumValue
{"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8}, {"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8},
{"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9}, {"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9},
{"BLOCKS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS", 10}, {"BLOCKS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS", 10},
{"LONGS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS", 11},
}; };
static const ProtobufCIntRange static const ProtobufCIntRange
paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0}, paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0},
{0, 11}}; {0, 12}};
static const ProtobufCEnumValueIndex static const ProtobufCEnumValueIndex
paddle_mobile__framework__proto__attr_type__enum_values_by_name[11] = { paddle_mobile__framework__proto__attr_type__enum_values_by_name[12] = {
{"BLOCK", 8}, {"BLOCKS", 10}, {"BOOLEAN", 6}, {"BOOLEANS", 7}, {"BLOCK", 8}, {"BLOCKS", 10}, {"BOOLEAN", 6}, {"BOOLEANS", 7},
{"FLOAT", 1}, {"FLOATS", 4}, {"INT", 0}, {"INTS", 3}, {"FLOAT", 1}, {"FLOATS", 4}, {"INT", 0}, {"INTS", 3},
{"LONG", 9}, {"STRING", 2}, {"STRINGS", 5}, {"LONG", 9}, {"LONGS", 11}, {"STRING", 2}, {"STRINGS", 5},
}; };
const ProtobufCEnumDescriptor const ProtobufCEnumDescriptor
paddle_mobile__framework__proto__attr_type__descriptor = { paddle_mobile__framework__proto__attr_type__descriptor = {
...@@ -1477,9 +1437,9 @@ const ProtobufCEnumDescriptor ...@@ -1477,9 +1437,9 @@ const ProtobufCEnumDescriptor
"AttrType", "AttrType",
"PaddleMobile__Framework__Proto__AttrType", "PaddleMobile__Framework__Proto__AttrType",
"paddle_mobile.framework.proto", "paddle_mobile.framework.proto",
11, 12,
paddle_mobile__framework__proto__attr_type__enum_values_by_number, paddle_mobile__framework__proto__attr_type__enum_values_by_number,
11, 12,
paddle_mobile__framework__proto__attr_type__enum_values_by_name, paddle_mobile__framework__proto__attr_type__enum_values_by_name,
1, 1,
paddle_mobile__framework__proto__attr_type__value_ranges, paddle_mobile__framework__proto__attr_type__value_ranges,
......
...@@ -102,8 +102,9 @@ typedef enum _PaddleMobile__Framework__Proto__AttrType { ...@@ -102,8 +102,9 @@ typedef enum _PaddleMobile__Framework__Proto__AttrType {
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS = PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS = 10,
10 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE( PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONGS =
11 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE) PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
} PaddleMobile__Framework__Proto__AttrType; } PaddleMobile__Framework__Proto__AttrType;
...@@ -152,13 +153,15 @@ struct _PaddleMobile__Framework__Proto__OpDesc__Attr { ...@@ -152,13 +153,15 @@ struct _PaddleMobile__Framework__Proto__OpDesc__Attr {
int64_t l; int64_t l;
size_t n_blocks_idx; size_t n_blocks_idx;
int32_t *blocks_idx; int32_t *blocks_idx;
size_t n_longs;
int64_t *longs;
}; };
#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \ #define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \
{ \ { \
PROTOBUF_C_MESSAGE_INIT( \ PROTOBUF_C_MESSAGE_INIT( \
&paddle_mobile__framework__proto__op_desc__attr__descriptor) \ &paddle_mobile__framework__proto__op_desc__attr__descriptor) \
, NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \ , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \
0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0, 0, NULL \ 0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, NULL \
} }
struct _PaddleMobile__Framework__Proto__OpDesc__Var { struct _PaddleMobile__Framework__Proto__OpDesc__Var {
...@@ -417,8 +420,6 @@ struct _PaddleMobile__Framework__Proto__ProgramDesc { ...@@ -417,8 +420,6 @@ struct _PaddleMobile__Framework__Proto__ProgramDesc {
/* PaddleMobile__Framework__Proto__Version methods */ /* PaddleMobile__Framework__Proto__Version methods */
void paddle_mobile__framework__proto__version__init( void paddle_mobile__framework__proto__version__init(
PaddleMobile__Framework__Proto__Version *message); PaddleMobile__Framework__Proto__Version *message);
size_t paddle_mobile__framework__proto__version__get_packed_size(
const PaddleMobile__Framework__Proto__Version *message);
PaddleMobile__Framework__Proto__Version * PaddleMobile__Framework__Proto__Version *
paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__version__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -435,8 +436,6 @@ void paddle_mobile__framework__proto__op_desc__var__init( ...@@ -435,8 +436,6 @@ void paddle_mobile__framework__proto__op_desc__var__init(
/* PaddleMobile__Framework__Proto__OpDesc methods */ /* PaddleMobile__Framework__Proto__OpDesc methods */
void paddle_mobile__framework__proto__op_desc__init( void paddle_mobile__framework__proto__op_desc__init(
PaddleMobile__Framework__Proto__OpDesc *message); PaddleMobile__Framework__Proto__OpDesc *message);
size_t paddle_mobile__framework__proto__op_desc__get_packed_size(
const PaddleMobile__Framework__Proto__OpDesc *message);
PaddleMobile__Framework__Proto__OpDesc * PaddleMobile__Framework__Proto__OpDesc *
paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -453,8 +452,6 @@ void paddle_mobile__framework__proto__op_proto__attr__init( ...@@ -453,8 +452,6 @@ void paddle_mobile__framework__proto__op_proto__attr__init(
/* PaddleMobile__Framework__Proto__OpProto methods */ /* PaddleMobile__Framework__Proto__OpProto methods */
void paddle_mobile__framework__proto__op_proto__init( void paddle_mobile__framework__proto__op_proto__init(
PaddleMobile__Framework__Proto__OpProto *message); PaddleMobile__Framework__Proto__OpProto *message);
size_t paddle_mobile__framework__proto__op_proto__get_packed_size(
const PaddleMobile__Framework__Proto__OpProto *message);
PaddleMobile__Framework__Proto__OpProto * PaddleMobile__Framework__Proto__OpProto *
paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -483,8 +480,6 @@ void paddle_mobile__framework__proto__var_type__tuple__init( ...@@ -483,8 +480,6 @@ void paddle_mobile__framework__proto__var_type__tuple__init(
/* PaddleMobile__Framework__Proto__VarType methods */ /* PaddleMobile__Framework__Proto__VarType methods */
void paddle_mobile__framework__proto__var_type__init( void paddle_mobile__framework__proto__var_type__init(
PaddleMobile__Framework__Proto__VarType *message); PaddleMobile__Framework__Proto__VarType *message);
size_t paddle_mobile__framework__proto__var_type__get_packed_size(
const PaddleMobile__Framework__Proto__VarType *message);
PaddleMobile__Framework__Proto__VarType * PaddleMobile__Framework__Proto__VarType *
paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -495,8 +490,6 @@ void paddle_mobile__framework__proto__var_type__free_unpacked( ...@@ -495,8 +490,6 @@ void paddle_mobile__framework__proto__var_type__free_unpacked(
/* PaddleMobile__Framework__Proto__VarDesc methods */ /* PaddleMobile__Framework__Proto__VarDesc methods */
void paddle_mobile__framework__proto__var_desc__init( void paddle_mobile__framework__proto__var_desc__init(
PaddleMobile__Framework__Proto__VarDesc *message); PaddleMobile__Framework__Proto__VarDesc *message);
size_t paddle_mobile__framework__proto__var_desc__get_packed_size(
const PaddleMobile__Framework__Proto__VarDesc *message);
PaddleMobile__Framework__Proto__VarDesc * PaddleMobile__Framework__Proto__VarDesc *
paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator,
size_t len, size_t len,
...@@ -507,8 +500,6 @@ void paddle_mobile__framework__proto__var_desc__free_unpacked( ...@@ -507,8 +500,6 @@ void paddle_mobile__framework__proto__var_desc__free_unpacked(
/* PaddleMobile__Framework__Proto__BlockDesc methods */ /* PaddleMobile__Framework__Proto__BlockDesc methods */
void paddle_mobile__framework__proto__block_desc__init( void paddle_mobile__framework__proto__block_desc__init(
PaddleMobile__Framework__Proto__BlockDesc *message); PaddleMobile__Framework__Proto__BlockDesc *message);
size_t paddle_mobile__framework__proto__block_desc__get_packed_size(
const PaddleMobile__Framework__Proto__BlockDesc *message);
PaddleMobile__Framework__Proto__BlockDesc * PaddleMobile__Framework__Proto__BlockDesc *
paddle_mobile__framework__proto__block_desc__unpack( paddle_mobile__framework__proto__block_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data); ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
...@@ -518,8 +509,6 @@ void paddle_mobile__framework__proto__block_desc__free_unpacked( ...@@ -518,8 +509,6 @@ void paddle_mobile__framework__proto__block_desc__free_unpacked(
/* PaddleMobile__Framework__Proto__ProgramDesc methods */ /* PaddleMobile__Framework__Proto__ProgramDesc methods */
void paddle_mobile__framework__proto__program_desc__init( void paddle_mobile__framework__proto__program_desc__init(
PaddleMobile__Framework__Proto__ProgramDesc *message); PaddleMobile__Framework__Proto__ProgramDesc *message);
size_t paddle_mobile__framework__proto__program_desc__get_packed_size(
const PaddleMobile__Framework__Proto__ProgramDesc *message);
PaddleMobile__Framework__Proto__ProgramDesc * PaddleMobile__Framework__Proto__ProgramDesc *
paddle_mobile__framework__proto__program_desc__unpack( paddle_mobile__framework__proto__program_desc__unpack(
ProtobufCAllocator *allocator, size_t len, const uint8_t *data); ProtobufCAllocator *allocator, size_t len, const uint8_t *data);
......
...@@ -35,6 +35,7 @@ enum AttrType { ...@@ -35,6 +35,7 @@ enum AttrType {
BLOCK = 8; BLOCK = 8;
LONG = 9; LONG = 9;
BLOCKS = 10; BLOCKS = 10;
LONGS = 11;
} }
// OpDesc describes an instance of a C++ framework::OperatorBase // OpDesc describes an instance of a C++ framework::OperatorBase
...@@ -55,6 +56,7 @@ message OpDesc { ...@@ -55,6 +56,7 @@ message OpDesc {
optional int32 block_idx = 12; optional int32 block_idx = 12;
optional int64 l = 13; optional int64 l = 13;
repeated int32 blocks_idx = 14; repeated int32 blocks_idx = 14;
repeated int64 longs = 15;
}; };
message Var { message Var {
......
...@@ -23,13 +23,13 @@ limitations under the License. */ ...@@ -23,13 +23,13 @@ limitations under the License. */
#define LOAD_CPU_OP(op_type) #define LOAD_CPU_OP(op_type)
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_CL
#define LOAD_MALI_GPU_OP(op_type) \ #define LOAD_GPU_CL_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##mali_gpu(); \ extern int TouchOpRegistrar_##op_type##_##cl(); \
static int use_op_itself_##op_type##_##mali_gpu __attribute__((unused)) = \ static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##mali_gpu() TouchOpRegistrar_##op_type##_##cl()
#else #else
#define LOAD_MALI_GPU_OP(op_type) #define LOAD_GPU_CL_OP(op_type)
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
...@@ -48,7 +48,7 @@ limitations under the License. */ ...@@ -48,7 +48,7 @@ limitations under the License. */
#define LOAD_OP(op_type) \ #define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \ LOAD_CPU_OP(op_type); \
LOAD_MALI_GPU_OP(op_type); \ LOAD_GPU_CL_OP(op_type); \
LOAD_FPGA_OP(op_type); LOAD_FPGA_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type); #define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
...@@ -65,76 +65,72 @@ limitations under the License. */ ...@@ -65,76 +65,72 @@ limitations under the License. */
LOAD_OP(feed) LOAD_OP(feed)
LOAD_OP(fetch) LOAD_OP(fetch)
#ifdef FILL_CONSTANT_OP #ifdef FILL_CONSTANT_OP
LOAD_OP(fill_constant) LOAD_OP2(fill_constant, CPU, FPGA)
#endif #endif
#ifdef BATCHNORM_OP #ifdef BATCHNORM_OP
LOAD_OP2(batch_norm, CPU, MALI_GPU); LOAD_OP2(batch_norm, CPU, GPU_CL);
#endif #endif
#ifdef BILINEAR_INTERP_OP #ifdef BILINEAR_INTERP_OP
LOAD_OP1(bilinear_interp, CPU); LOAD_OP1(bilinear_interp, CPU);
#endif #endif
#ifdef BOXCODER_OP #ifdef BOXCODER_OP
LOAD_OP1(box_coder, CPU); LOAD_OP2(box_coder, CPU, GPU_CL);
#endif #endif
#ifdef CONCAT_OP #ifdef CONCAT_OP
LOAD_OP3(concat, CPU, MALI_GPU, FPGA); LOAD_OP3(concat, CPU, GPU_CL, FPGA);
#endif #endif
#ifdef CONV_OP #ifdef CONV_OP
LOAD_OP3(conv2d, CPU, MALI_GPU, FPGA); LOAD_OP3(conv2d, CPU, GPU_CL, FPGA);
#endif #endif
#ifdef LRN_OP #ifdef LRN_OP
LOAD_OP2(lrn, CPU, MALI_GPU); LOAD_OP2(lrn, CPU, GPU_CL);
#endif #endif
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
LOAD_OP1(sigmoid, CPU); LOAD_OP1(sigmoid, CPU);
#endif #endif
#ifdef FUSION_FC_RELU_OP #ifdef FUSION_FC_RELU_OP
LOAD_OP3(fusion_fc_relu, CPU, MALI_GPU, FPGA); LOAD_OP2(fusion_fc_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_fc_relu); LOAD_FUSION_MATCHER(fusion_fc_relu);
#endif #endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
LOAD_OP3(fusion_elementwise_add_relu, CPU, MALI_GPU, FPGA); LOAD_OP2(fusion_elementwise_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu); LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
#endif #endif
#ifdef SPLIT_OP #ifdef SPLIT_OP
LOAD_OP1(split, CPU); LOAD_OP1(split, CPU);
#endif #endif
#ifdef RESIZE_OP #ifdef RESIZE_OP
LOAD_OP2(resize, CPU, MALI_GPU); LOAD_OP1(resize, CPU);
#endif #endif
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADDBNRELU_OP
LOAD_OP2(fusion_conv_add_bn_relu, CPU, FPGA); LOAD_OP3(fusion_conv_add_bn_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu); LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
#endif #endif
#ifdef RESHAPE_OP #ifdef RESHAPE_OP
LOAD_OP2(reshape, CPU, MALI_GPU); LOAD_OP2(reshape, CPU, GPU_CL);
#endif #endif
#ifdef RESHAPE2_OP #ifdef RESHAPE2_OP
LOAD_OP2(reshape2, CPU, MALI_GPU); LOAD_OP1(reshape2, CPU);
#endif #endif
#ifdef TRANSPOSE_OP #ifdef TRANSPOSE_OP
LOAD_OP1(transpose, CPU); LOAD_OP2(transpose, CPU, GPU_CL);
#endif #endif
#ifdef TRANSPOSE2_OP #ifdef TRANSPOSE2_OP
LOAD_OP1(transpose2, CPU); LOAD_OP1(transpose2, CPU);
#endif #endif
#ifdef PRIORBOX_OP #ifdef PRIORBOX_OP
LOAD_OP1(prior_box, CPU); LOAD_OP2(prior_box, CPU, GPU_CL);
#endif #endif
#ifdef FUSION_CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
LOAD_OP2(fusion_conv_add_relu, CPU, FPGA); LOAD_OP3(fusion_conv_add_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_relu); LOAD_FUSION_MATCHER(fusion_conv_add_relu);
#endif #endif
#ifdef FUSION_CONVADDADDPRELU_OP
LOAD_OP2(fusion_conv_add_add_prelu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_add_prelu);
#endif
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
LOAD_OP2(fusion_conv_add, CPU, MALI_GPU); LOAD_OP2(fusion_conv_add, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_conv_add); LOAD_FUSION_MATCHER(fusion_conv_add);
#endif #endif
#ifdef SOFTMAX_OP #ifdef SOFTMAX_OP
LOAD_OP2(softmax, CPU, MALI_GPU); LOAD_OP2(softmax, CPU, GPU_CL);
#endif #endif
#ifdef SHAPE_OP #ifdef SHAPE_OP
LOAD_OP1(shape, CPU); LOAD_OP1(shape, CPU);
...@@ -146,23 +142,23 @@ LOAD_OP1(depthwise_conv2d, CPU); ...@@ -146,23 +142,23 @@ LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP1(conv2d_transpose, CPU); LOAD_OP1(conv2d_transpose, CPU);
#endif #endif
#ifdef SCALE_OP #ifdef SCALE_OP
LOAD_OP2(scale, CPU, MALI_GPU); LOAD_OP1(scale, CPU);
#endif #endif
#ifdef ELEMENTWISEADD_OP #ifdef ELEMENTWISEADD_OP
LOAD_OP2(elementwise_add, CPU, MALI_GPU); LOAD_OP2(elementwise_add, CPU, GPU_CL);
#endif #endif
#ifdef PRELU_OP #ifdef PRELU_OP
LOAD_OP2(prelu, CPU, MALI_GPU); LOAD_OP1(prelu, CPU);
#endif #endif
#ifdef FLATTEN_OP #ifdef FLATTEN_OP
LOAD_OP1(flatten, CPU); LOAD_OP1(flatten, CPU);
#endif #endif
#ifdef FUSION_CONVBNADDRELU_OP #ifdef FUSION_CONVBNADDRELU_OP
LOAD_OP2(fusion_conv_bn_add_relu, CPU, FPGA); LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu); LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu);
#endif #endif
#ifdef FUSION_CONVBNRELU_OP #ifdef FUSION_CONVBNRELU_OP
LOAD_OP2(fusion_conv_bn_relu, CPU, FPGA); LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn_relu); LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
#endif #endif
#ifdef GRU_OP #ifdef GRU_OP
...@@ -176,27 +172,23 @@ LOAD_OP2(fusion_conv_add_bn, CPU, FPGA); ...@@ -176,27 +172,23 @@ LOAD_OP2(fusion_conv_add_bn, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn); LOAD_FUSION_MATCHER(fusion_conv_add_bn);
#endif #endif
#ifdef DROPOUT_OP #ifdef DROPOUT_OP
LOAD_OP2(dropout, CPU, FPGA); LOAD_OP3(dropout, CPU, GPU_CL, FPGA);
#endif
#ifdef FUSION_CONVADDPRELU_OP
LOAD_OP2(fusion_conv_add_prelu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_prelu);
#endif #endif
#ifdef FUSION_DWCONVBNRELU_OP #ifdef FUSION_DWCONVBNRELU_OP
LOAD_OP1(fusion_dwconv_bn_relu, CPU); LOAD_OP2(fusion_dwconv_bn_relu, CPU, GPU_CL);
LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu); LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
#endif #endif
#ifdef CRF_OP #ifdef CRF_OP
LOAD_OP1(crf_decoding, CPU); LOAD_OP1(crf_decoding, CPU);
#endif #endif
#ifdef MUL_OP #ifdef MUL_OP
LOAD_OP2(mul, CPU, MALI_GPU); LOAD_OP2(mul, CPU, GPU_CL);
#endif #endif
#ifdef NORM_OP #ifdef NORM_OP
LOAD_OP1(norm, CPU); LOAD_OP1(norm, CPU);
#endif #endif
#ifdef RELU_OP #ifdef RELU_OP
LOAD_OP2(relu, CPU, MALI_GPU); LOAD_OP2(relu, CPU, GPU_CL);
LOAD_OP1(relu6, CPU); LOAD_OP1(relu6, CPU);
#endif #endif
#ifdef IM2SEQUENCE_OP #ifdef IM2SEQUENCE_OP
...@@ -206,14 +198,14 @@ LOAD_OP1(im2sequence, CPU); ...@@ -206,14 +198,14 @@ LOAD_OP1(im2sequence, CPU);
LOAD_OP1(lookup_table, CPU); LOAD_OP1(lookup_table, CPU);
#endif #endif
#ifdef FUSION_FC_OP #ifdef FUSION_FC_OP
LOAD_OP3(fusion_fc, CPU, MALI_GPU, FPGA); LOAD_OP3(fusion_fc, CPU, GPU_CL, FPGA);
LOAD_FUSION_MATCHER(fusion_fc); LOAD_FUSION_MATCHER(fusion_fc);
#endif #endif
#ifdef POOL_OP #ifdef POOL_OP
LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA); LOAD_OP3(pool2d, CPU, GPU_CL, FPGA);
#endif #endif
#ifdef MULTICLASSNMS_OP #ifdef MULTICLASSNMS_OP
LOAD_OP1(multiclass_nms, CPU); LOAD_OP2(multiclass_nms, CPU, GPU_CL);
#endif #endif
#ifdef POLYGONBOXTRANSFORM_OP #ifdef POLYGONBOXTRANSFORM_OP
LOAD_OP1(polygon_box_transform, CPU); LOAD_OP1(polygon_box_transform, CPU);
...@@ -225,7 +217,7 @@ LOAD_OP1(sum, CPU); ...@@ -225,7 +217,7 @@ LOAD_OP1(sum, CPU);
LOAD_OP1(elementwise_mul, CPU); LOAD_OP1(elementwise_mul, CPU);
#endif #endif
#ifdef SLICE_OP #ifdef SLICE_OP
LOAD_OP2(slice, CPU, MALI_GPU); LOAD_OP1(slice, CPU);
#endif #endif
#ifdef FUSION_CONVBN_OP #ifdef FUSION_CONVBN_OP
LOAD_OP2(fusion_conv_bn, CPU, FPGA); LOAD_OP2(fusion_conv_bn, CPU, FPGA);
...@@ -324,3 +316,15 @@ LOAD_OP1(psroi_pool, CPU); ...@@ -324,3 +316,15 @@ LOAD_OP1(psroi_pool, CPU);
#ifdef ROI_PERSPECTIVE_OP #ifdef ROI_PERSPECTIVE_OP
LOAD_OP1(roi_perspective_transform, CPU); LOAD_OP1(roi_perspective_transform, CPU);
#endif #endif
#ifdef BEAM_SEARCH_OP
LOAD_OP1(beam_search, CPU);
#endif
#ifdef BEAM_SEARCH_DECODE_OP
LOAD_OP1(beam_search_decode, CPU);
#endif
#ifdef PAD2D_OP
LOAD_OP1(pad2d, CPU);
#endif
#ifdef ONE_HOT_OP
LOAD_OP1(one_hot, CPU);
#endif
...@@ -221,6 +221,8 @@ inline Print &operator<<(Print &printer, const LoDTensor &tensor) { ...@@ -221,6 +221,8 @@ inline Print &operator<<(Print &printer, const LoDTensor &tensor) {
printer << static_cast<int>(tensor.data<int8_t>()[i]) << " "; printer << static_cast<int>(tensor.data<int8_t>()[i]) << " ";
} else if (tensor.type() == typeid(int32_t)) { } else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " "; printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(bool)) {
printer << tensor.data<bool>()[i] << " ";
} }
} }
#endif // PADDLE_MOBILE_FPGA #endif // PADDLE_MOBILE_FPGA
......
...@@ -58,8 +58,7 @@ struct OpInfoFiller { ...@@ -58,8 +58,7 @@ struct OpInfoFiller {
void operator()(const std::string& op_type, OpInfo<Dtype>* info) const { void operator()(const std::string& op_type, OpInfo<Dtype>* info) const {
info->creator_ = [](const std::string& type, const VariableNameMap& inputs, info->creator_ = [](const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const VariableNameMap& outputs,
const AttributeMap& attrs, const AttributeMap& attrs, framework::Scope* scope) {
std::shared_ptr<Scope> scope) {
return new T(type, inputs, outputs, attrs, scope); return new T(type, inputs, outputs, attrs, scope);
}; };
} }
...@@ -91,7 +90,7 @@ class OpRegistry { ...@@ -91,7 +90,7 @@ class OpRegistry {
static std::shared_ptr<OperatorBase<Dtype>> CreateOp( static std::shared_ptr<OperatorBase<Dtype>> CreateOp(
const std::string& type, const VariableNameMap& inputs, const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap attrs, const VariableNameMap& outputs, const AttributeMap attrs,
std::shared_ptr<paddle_mobile::framework::Scope> scope) { paddle_mobile::framework::Scope* scope) {
auto& info = OpInfoMap<Dtype>::Instance()->Get(type); auto& info = OpInfoMap<Dtype>::Instance()->Get(type);
auto op = info.Creator()(type, inputs, outputs, attrs, scope); auto op = info.Creator()(type, inputs, outputs, attrs, scope);
return std::shared_ptr<OperatorBase<Dtype>>(op); return std::shared_ptr<OperatorBase<Dtype>>(op);
...@@ -116,9 +115,6 @@ class OpRegistry { ...@@ -116,9 +115,6 @@ class OpRegistry {
#define REGISTER_OPERATOR_CPU(op_type, op_class) \ #define REGISTER_OPERATOR_CPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU); REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ #define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA); REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
......
...@@ -43,16 +43,13 @@ OperatorBase<Dtype>::OperatorBase(const std::string &type, ...@@ -43,16 +43,13 @@ OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) framework::Scope *scope)
: type_(type), : type_(type),
inputs_(inputs), inputs_(inputs),
outputs_(outputs), outputs_(outputs),
attrs_(attrs), attrs_(attrs),
scope_(scope) { scope_(scope) {
CheckAllInputOutputSet(); CheckAllInputOutputSet();
#ifdef PADDLE_MOBILE_FPGA
InsertTensors();
#endif
} }
template <typename Dtype> template <typename Dtype>
...@@ -67,33 +64,31 @@ void OperatorBase<Dtype>::Run() { ...@@ -67,33 +64,31 @@ void OperatorBase<Dtype>::Run() {
for (const auto key : input_keys) { for (const auto key : input_keys) {
auto var_vec_in = inputs_.at(key); auto var_vec_in = inputs_.at(key);
for (int i = 0; i < var_vec_in.size(); ++i) { for (int i = 0; i < var_vec_in.size(); ++i) {
auto vari = this->scope_->FindVar(var_vec_in[i]); auto var = this->scope_->FindVar(var_vec_in[i]);
if (vari->IsInitialized()) { if (var->IsInitialized() &&
const Tensor *tensor = vari->template Get<framework::LoDTensor>(); var->template IsType<framework::LoDTensor>()) {
if (tensor) { const Tensor *tensor = var->template Get<framework::LoDTensor>();
DLOG << type_ << " input- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
DLOG << var_vec_in[i]; DLOG << var_vec_in[i];
#endif #endif
} }
} }
} }
}
for (const auto key : GetOutKeys()) { for (const auto key : GetOutKeys()) {
auto var_vec_out = outputs_.at(key); auto var_vec_out = outputs_.at(key);
for (int i = 0; i < var_vec_out.size(); ++i) { for (int i = 0; i < var_vec_out.size(); ++i) {
auto vari = scope_->FindVar(var_vec_out[i]); auto var = scope_->FindVar(var_vec_out[i]);
if (vari->IsInitialized()) { if (var->IsInitialized() &&
const Tensor *tensor = vari->template Get<framework::LoDTensor>(); var->template IsType<framework::LoDTensor>()) {
if (tensor) { const Tensor *tensor = var->template Get<framework::LoDTensor>();
DLOG << type_ << " output- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
DLOG << var_vec_out[i]; DLOG << var_vec_out[i];
#endif #endif
} }
} }
} }
}
#endif #endif
} }
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -58,7 +57,7 @@ class OperatorBase { ...@@ -58,7 +57,7 @@ class OperatorBase {
public: public:
OperatorBase(const std::string &type, const VariableNameMap &inputs, OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope); framework::Scope *scope);
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
virtual void Init() = 0; virtual void Init() = 0;
...@@ -81,11 +80,10 @@ class OperatorBase { ...@@ -81,11 +80,10 @@ class OperatorBase {
} }
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InsertTensors(); void InsertTensors();
void ChangeNameMap(string key, std::vector<string> value);
#endif #endif
protected: protected:
std::shared_ptr<Scope> scope_; framework::Scope *scope_;
std::string type_; std::string type_;
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
...@@ -98,35 +96,15 @@ class OperatorBase { ...@@ -98,35 +96,15 @@ class OperatorBase {
template <typename Dtype, typename ParamType, typename KernelType> template <typename Dtype, typename ParamType, typename KernelType>
class OperatorWithKernel : public OperatorBase<Dtype> { class OperatorWithKernel : public OperatorBase<Dtype> {
public: public:
#ifndef PADDLE_MOBILE_FPGA1
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs, OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) framework::Scope *scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope), : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, scope.get()) { param_(inputs, outputs, attrs, scope) {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
kernel_.InitCLHelper(scope->GetCLScpoe()); kernel_.InitCLHelper(scope->GetCLScpoe());
#endif #endif
} }
#else
OperatorWithKernel(const std::string &type, const VariableNameMap inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {
static int feed_num = 0;
static int fetch_num = 0;
if (type == "feed") {
auto new_name = string("feed") + std::to_string(feed_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(inputs)).at("X") = {string(new_name)};
} else if (type == "fetch") {
auto new_name = string("fetch") + std::to_string(fetch_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(outputs)).at("Out") = {string(new_name)};
}
param_ = ParamType(inputs, outputs, attrs, *scope);
}
#endif
virtual void RunImpl() { this->kernel_.Compute(this->param_); } virtual void RunImpl() { this->kernel_.Compute(this->param_); }
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
...@@ -152,13 +130,6 @@ class OpKernelBase { ...@@ -152,13 +130,6 @@ class OpKernelBase {
} }
#endif #endif
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase() { acl_op_ = nullptr; }
void *GetAclOp() const { return acl_op_; }
void SetAclOp(void *op, void *ob) const {
reinterpret_cast<OpKernelBase<Dtype, P> *>(ob)->acl_op_ = op;
}
#endif
virtual void Compute(const P &para) = 0; virtual void Compute(const P &para) = 0;
virtual bool Init(P *para) { return true; } virtual bool Init(P *para) { return true; }
virtual ~OpKernelBase() = default; virtual ~OpKernelBase() = default;
...@@ -169,9 +140,6 @@ class OpKernelBase { ...@@ -169,9 +140,6 @@ class OpKernelBase {
#endif #endif
private: private:
#ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_;
#endif
}; };
class FusionOpMatcher { class FusionOpMatcher {
...@@ -206,8 +174,7 @@ class FusionOpMatcher { ...@@ -206,8 +174,7 @@ class FusionOpMatcher {
public: \ public: \
OpName##Op(const std::string &type, const VariableNameMap &inputs, \ OpName##Op(const std::string &type, const VariableNameMap &inputs, \
const VariableNameMap &outputs, \ const VariableNameMap &outputs, \
const framework::AttributeMap &attrs, \ const framework::AttributeMap &attrs, framework::Scope *scope) \
std::shared_ptr<framework::Scope> scope) \
: framework::OperatorWithKernel<DeviceType, OpParam<DeviceType>, \ : framework::OperatorWithKernel<DeviceType, OpParam<DeviceType>, \
operators::OpKernel<DeviceType, T>>( \ operators::OpKernel<DeviceType, T>>( \
type, inputs, outputs, attrs, scope) {} \ type, inputs, outputs, attrs, scope) {} \
...@@ -228,7 +195,7 @@ class FusionOpMatcher { ...@@ -228,7 +195,7 @@ class FusionOpMatcher {
cls(const std::string &type, const ::paddle_mobile::VariableNameMap &inputs, \ cls(const std::string &type, const ::paddle_mobile::VariableNameMap &inputs, \
const ::paddle_mobile::VariableNameMap &outputs, \ const ::paddle_mobile::VariableNameMap &outputs, \
const ::paddle_mobile::framework::AttributeMap &attrs, \ const ::paddle_mobile::framework::AttributeMap &attrs, \
std::shared_ptr<::paddle_mobile::framework::Scope> scope) \ ::paddle_mobile::framework::Scope *scope) \
: parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {} : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}
} // namespace framework } // namespace framework
......
...@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) { ...@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) {
PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k]; PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k];
std::string attr_name(attr->name); std::string attr_name(attr->name);
attrs_[attr_name] = Attribute::GetAttrValue(attr); attrs_[attr_name] = Attribute::GetAttrValue(attr);
proto_attrs_.push_back(*attr);
} }
} }
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&OpDesc::GetProtoAttr() const {
return proto_attrs_;
}
const std::vector<std::string> &OpDesc::Input(const std::string &name) const { const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
return inputs_.find(name)->second; return inputs_.find(name)->second;
} }
...@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const { ...@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
return it->second; return it->second;
} }
void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
this->attrs_[name].Set<BlockDesc *>(block);
}
void OpDesc::SetBlocksAttr(const std::string &name,
std::vector<BlockDesc *> blocks) {
this->attrs_[name].Set<std::vector<BlockDesc *>>(blocks);
}
std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() { std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() {
return attrs_; return attrs_;
} }
......
...@@ -29,11 +29,13 @@ class OpDesc { ...@@ -29,11 +29,13 @@ class OpDesc {
friend class ProgramOptimize; friend class ProgramOptimize;
friend class FusionOpMatcher; friend class FusionOpMatcher;
friend class Node; friend class Node;
explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc); explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc);
OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) { OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) {
this->inputs_ = op_desc.inputs_; this->inputs_ = op_desc.inputs_;
this->outputs_ = op_desc.outputs_; this->outputs_ = op_desc.outputs_;
this->attrs_ = op_desc.attrs_; this->attrs_ = op_desc.attrs_;
this->proto_attrs_ = op_desc.proto_attrs_;
} }
OpDesc() {} OpDesc() {}
...@@ -41,6 +43,12 @@ class OpDesc { ...@@ -41,6 +43,12 @@ class OpDesc {
const std::vector<std::string> &Output(const std::string &name) const; const std::vector<std::string> &Output(const std::string &name) const;
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&GetProtoAttr() const;
void SetBlockAttr(const std::string &name, BlockDesc *block);
void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> block);
VariableNameMap &GetInputs() { return inputs_; } VariableNameMap &GetInputs() { return inputs_; }
VariableNameMap &GetOutputs() { return outputs_; } VariableNameMap &GetOutputs() { return outputs_; }
...@@ -60,6 +68,7 @@ class OpDesc { ...@@ -60,6 +68,7 @@ class OpDesc {
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
AttributeMap attrs_; AttributeMap attrs_;
std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr> proto_attrs_;
}; };
Print &operator<<(Print &printer, const OpDesc &op_desc); Print &operator<<(Print &printer, const OpDesc &op_desc);
......
...@@ -15,8 +15,8 @@ limitations under the License. */ ...@@ -15,8 +15,8 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "framework/program/program_desc.h"
#include "framework/program/tensor_desc.h" #include "framework/program/tensor_desc.h"
#include "program_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) { ...@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
for (int i = 0; i < desc->n_blocks; ++i) { for (int i = 0; i < desc->n_blocks; ++i) {
blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i])); blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i]));
} }
for (auto &block : blocks_) {
for (auto op : block->Ops()) {
for (const auto &attr : op->GetProtoAttr()) {
if (attr.type == PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK) {
size_t blk_idx = attr.block_idx;
op->SetBlockAttr(attr.name, this->MutableBlock(blk_idx));
} else if (attr.type ==
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS) {
size_t n_blocks_idx = attr.n_blocks_idx;
int32_t *blks_idx = attr.blocks_idx;
std::vector<BlockDesc *> block_descs;
for (size_t i = 0; i < n_blocks_idx; ++i) {
block_descs.push_back(this->MutableBlock(blks_idx[i]));
}
op->SetBlocksAttr(attr.name, block_descs);
}
}
}
}
} }
void ProgramDesc::Description(std::string header) { void ProgramDesc::Description(std::string header) {
...@@ -60,9 +79,8 @@ void ProgramDesc::Description(std::string header) { ...@@ -60,9 +79,8 @@ void ProgramDesc::Description(std::string header) {
} }
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
LOG(kLOG_DEBUG1) << "var name: " << var_desc->Name(); LOG(kLOG_DEBUG1) << "var name: " << var_desc->Name();
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
const TensorDesc &tensor_desc = var_desc->Tensor_desc(); const TensorDesc &tensor_desc = var_desc->Tensor_desc();
LOG(kLOG_DEBUG2) << "in var tensor desc dims size: " LOG(kLOG_DEBUG2) << "in var tensor desc dims size: "
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
...@@ -31,6 +32,14 @@ class ProgramDesc { ...@@ -31,6 +32,14 @@ class ProgramDesc {
std::shared_ptr<BlockDesc> Block(size_t idx); std::shared_ptr<BlockDesc> Block(size_t idx);
BlockDesc *MutableBlock(size_t idx) {
if (idx == -1) {
return nullptr;
} else {
return blocks_[idx].get();
}
}
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; } const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
ProgramDesc(const ProgramDesc &program_desc) { ProgramDesc(const ProgramDesc &program_desc) {
for (auto &block : program_desc.blocks_) { for (auto &block : program_desc.blocks_) {
......
...@@ -32,15 +32,7 @@ class Scope { ...@@ -32,15 +32,7 @@ class Scope {
Scope() = default; Scope() = default;
~Scope() { ~Scope() {
for (auto &var : vars_) { DropKids();
delete var.second;
}
vars_.clear();
for (auto kid : kids_) {
delete kid;
}
kids_.clear();
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
delete cl_scope_; delete cl_scope_;
#endif #endif
......
...@@ -209,8 +209,9 @@ class Tensor : public TensorBase { ...@@ -209,8 +209,9 @@ class Tensor : public TensorBase {
} }
inline void set_type(std::type_index type) { holder_->set_type(type); } inline void set_type(std::type_index type) { holder_->set_type(type); }
inline void *get_data() { inline void *get_data() {
return (void *)(((PlaceholderImpl *)(holder_.get()))->ptr_.get()); return (
} // NOLINT void *)(((PlaceholderImpl *)(holder_.get()))->ptr_.get()); // NOLINT
}
inline void *init(std::type_index type) { inline void *init(std::type_index type) {
if (holder_ != nullptr) { if (holder_ != nullptr) {
......
...@@ -14,13 +14,26 @@ limitations under the License. */ ...@@ -14,13 +14,26 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "framework/tensor.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
#include "tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst); void TensorCopy(const Tensor& src, Tensor* dst);
template <typename T>
void TensorFromVector(const std::vector<T>& src, Tensor* dst);
template <typename T>
void TensorFromVector(const std::vector<T>& src, Tensor* dst) {
auto src_ptr = static_cast<const void*>(src.data());
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void*>(dst->mutable_data<T>());
auto size = src.size() * sizeof(T);
memory::Copy(dst_ptr, src_ptr, size);
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "io/api_paddle_mobile.h" #include "io/api_paddle_mobile.h"
#include <string>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "framework/tensor.h" #include "framework/tensor.h"
...@@ -169,7 +170,7 @@ void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name, ...@@ -169,7 +170,7 @@ void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name,
PaddleTensor *output) { PaddleTensor *output) {
framework::Tensor *t = paddle_mobile_->GetTensorByName(name); framework::Tensor *t = paddle_mobile_->GetTensorByName(name);
ConvertTensors(*t, output); ConvertTensors(*t, output);
}; }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::Predict_From_To(int start, int end) { void PaddleMobilePredictor<Device, T>::Predict_From_To(int start, int end) {
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "io/paddle_inference_api.h" #include "io/paddle_inference_api.h"
......
...@@ -39,8 +39,6 @@ using framework::Tensor; ...@@ -39,8 +39,6 @@ using framework::Tensor;
using paddle_mobile::CPU; using paddle_mobile::CPU;
using std::string; using std::string;
const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
static std::mutex shared_mutex; static std::mutex shared_mutex;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_MOBILE_CL
#include "io/opencl_interface.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_scope.h"
namespace paddle_mobile {
cl_context getContext() {
return framework::CLEngine::Instance()->getContext();
}
cl_command_queue getClCommandQueue() {
return framework::CLEngine::Instance()->getClCommandQueue();
}
bool isInitSuccess() {
return framework::CLEngine::Instance()->isInitSuccess();
}
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_MOBILE_CL
#include "CL/cl.h"
namespace paddle_mobile {
cl_context getContext();
cl_command_queue getClCommandQueue();
bool isInitSuccess();
#endif
} // namespace paddle_mobile
...@@ -20,6 +20,8 @@ limitations under the License. */ ...@@ -20,6 +20,8 @@ limitations under the License. */
#endif // _OPENMP #endif // _OPENMP
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
#include <CL/cl.h> #include <CL/cl.h>
#include <mutex>
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_tensor.h" #include "framework/cl/cl_tensor.h"
#endif #endif
#include "operators/math/gemm.h" #include "operators/math/gemm.h"
...@@ -28,9 +30,7 @@ namespace paddle_mobile { ...@@ -28,9 +30,7 @@ namespace paddle_mobile {
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::SetThreadNum(int num) { void PaddleMobile<Device, T>::SetThreadNum(int num) {
#ifdef _OPENMP executor_->SetThreadNum(num);
omp_set_num_threads(num);
#endif
} }
template <typename Device, typename T> template <typename Device, typename T>
...@@ -152,14 +152,14 @@ PMStatus PaddleMobile<Device, T>::Predict() { ...@@ -152,14 +152,14 @@ PMStatus PaddleMobile<Device, T>::Predict() {
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::Tensor &input, void PaddleMobile<Device, T>::Feed(const std::string &var_name,
const std::string &var_name) { const framework::Tensor &input) {
executor_->SetInput(input, var_name); executor_->SetInput(input, var_name);
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::Feed(const framework::LoDTensor &input, void PaddleMobile<Device, T>::Feed(const std::string &var_name,
const std::string &var_name) { const framework::LoDTensor &input) {
executor_->SetInput(input, var_name); executor_->SetInput(input, var_name);
} }
...@@ -204,11 +204,15 @@ double PaddleMobile<CPU, float>::GetPredictTime() { ...@@ -204,11 +204,15 @@ double PaddleMobile<CPU, float>::GetPredictTime() {
operators::math::Gemm gemm; operators::math::Gemm gemm;
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
int times = 4;
for (int j = 0; j < times; ++j) {
gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb, gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb,
static_cast<float>(0), c, ldc, false, static_cast<float>(0), c, ldc, false,
static_cast<float *>(nullptr)); static_cast<float *>(nullptr));
}
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
double cost = paddle_mobile::time_diff(time1, time2); double cost = paddle_mobile::time_diff(time1, time2) / times;
paddle_mobile::memory::Free(a); paddle_mobile::memory::Free(a);
paddle_mobile::memory::Free(b); paddle_mobile::memory::Free(b);
paddle_mobile::memory::Free(c); paddle_mobile::memory::Free(c);
...@@ -227,16 +231,16 @@ template <typename Device, typename T> ...@@ -227,16 +231,16 @@ template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) { void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) {
executor_->FeedData(t); executor_->FeedData(t);
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) { void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) {
executor_->FeedData(v); executor_->FeedData(v);
}; }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedTensorData( void PaddleMobile<Device, T>::FeedTensorData(
const std::vector<framework::Tensor> &v) { const std::vector<framework::Tensor> &v) {
executor_->FeedTensorData(v); executor_->FeedTensorData(v);
}; }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) { void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) {
...@@ -253,7 +257,7 @@ template <typename Device, typename T> ...@@ -253,7 +257,7 @@ template <typename Device, typename T>
framework::Tensor *PaddleMobile<Device, T>::GetTensorByName( framework::Tensor *PaddleMobile<Device, T>::GetTensorByName(
const std::string &name) { const std::string &name) {
return executor_->GetTensorByName(name); return executor_->GetTensorByName(name);
}; }
template <typename Device, typename T> template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult( std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
...@@ -289,21 +293,11 @@ void PaddleMobile<Device, T>::SetCLPath(std::string path) { ...@@ -289,21 +293,11 @@ void PaddleMobile<Device, T>::SetCLPath(std::string path) {
template <> template <>
double PaddleMobile<GPU_CL, float>::GetPredictTime() { double PaddleMobile<GPU_CL, float>::GetPredictTime() {
cl_int status; cl_int status;
cl_uint nPlatform; if (!framework::CLEngine::Instance()->isInitSuccess()) {
clGetPlatformIDs(0, NULL, &nPlatform); return -1;
cl_platform_id *listPlatform = reinterpret_cast<cl_platform_id *>( }
malloc(nPlatform * sizeof(cl_platform_id))); cl_context context = framework::CLEngine::Instance()->getContext();
clGetPlatformIDs(nPlatform, listPlatform, NULL); cl_command_queue queue = framework::CLEngine::Instance()->getClCommandQueue();
cl_uint nDevice = 0;
clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_GPU, 0, NULL, &nDevice);
cl_device_id *listDevice =
reinterpret_cast<cl_device_id *>(malloc(nDevice * sizeof(cl_device_id)));
clGetDeviceIDs(listPlatform[0], CL_DEVICE_TYPE_GPU, nDevice, listDevice,
NULL);
cl_context context =
clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status);
cl_command_queue queue =
clCreateCommandQueue(context, listDevice[0], 0, &status);
int n = 1; int n = 1;
int c = 3; int c = 3;
...@@ -417,7 +411,7 @@ double PaddleMobile<GPU_CL, float>::GetPredictTime() { ...@@ -417,7 +411,7 @@ double PaddleMobile<GPU_CL, float>::GetPredictTime() {
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clFinish(queue); clFinish(queue);
queue = clCreateCommandQueue(context, listDevice[0], 0, &status); // queue = clCreateCommandQueue(context, listDevice[0], 0, &status);
path = framework::CLEngine::Instance()->GetCLPath() + path = framework::CLEngine::Instance()->GetCLPath() +
"/cl_kernel/conv_kernel.cl"; "/cl_kernel/conv_kernel.cl";
...@@ -472,15 +466,18 @@ double PaddleMobile<GPU_CL, float>::GetPredictTime() { ...@@ -472,15 +466,18 @@ double PaddleMobile<GPU_CL, float>::GetPredictTime() {
// cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
size_t global_work_size2[3] = {8, 224, 224}; size_t global_work_size2[3] = {8, 224, 224};
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
int times = 10;
for (int i = 0; i < times; ++i) {
status = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size2, status = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size2,
NULL, 0, NULL, NULL); NULL, 0, NULL, NULL);
}
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clFinish(queue); clFinish(queue);
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
paddle_mobile::memory::Free(input); paddle_mobile::memory::Free(input);
paddle_mobile::memory::Free(filter); paddle_mobile::memory::Free(filter);
if (status == CL_SUCCESS) { if (status == CL_SUCCESS) {
return paddle_mobile::time_diff(time1, time2); return paddle_mobile::time_diff(time1, time2) / times;
} else { } else {
return -1; return -1;
} }
......
...@@ -33,7 +33,7 @@ namespace paddle_mobile { ...@@ -33,7 +33,7 @@ namespace paddle_mobile {
template <typename Device, typename T = float> template <typename Device, typename T = float>
class PaddleMobile { class PaddleMobile {
public: public:
PaddleMobile(PaddleMobileConfigInternal config) : config_(config) { explicit PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL #ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
...@@ -46,7 +46,7 @@ class PaddleMobile { ...@@ -46,7 +46,7 @@ class PaddleMobile {
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif #endif
} }
~PaddleMobile() {} virtual ~PaddleMobile() { Clear(); }
PMStatus Load(const std::string &dirname, const bool optimize = false, PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1, const bool quantification = false, const int batch_size = 1,
...@@ -69,8 +69,8 @@ class PaddleMobile { ...@@ -69,8 +69,8 @@ class PaddleMobile {
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
PMStatus Predict(); PMStatus Predict();
void Feed(const framework::LoDTensor &input, const std::string &var_name); void Feed(const std::string &var_name, const framework::LoDTensor &input);
void Feed(const framework::Tensor &input, const std::string &var_name); void Feed(const std::string &var_name, const framework::Tensor &input);
typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr; typedef std::shared_ptr<framework::LoDTensor> LoDTensorPtr;
LoDTensorPtr Fetch(const std::string &var_name); LoDTensorPtr Fetch(const std::string &var_name);
......
...@@ -54,9 +54,6 @@ namespace ops = paddle_mobile::operators; ...@@ -54,9 +54,6 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU(relu, ops::ReluOp); REGISTER_OPERATOR_CPU(relu, ops::ReluOp);
REGISTER_OPERATOR_CPU(relu6, ops::Relu6Op); REGISTER_OPERATOR_CPU(relu6, ops::Relu6Op);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(relu, ops::ReluOp); REGISTER_OPERATOR_FPGA(relu, ops::ReluOp);
#endif #endif
...@@ -69,6 +66,9 @@ REGISTER_OPERATOR_CL(relu, ops::ReluOp); ...@@ -69,6 +66,9 @@ REGISTER_OPERATOR_CL(relu, ops::ReluOp);
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(sigmoid, ops::SigmoidOp); REGISTER_OPERATOR_CPU(sigmoid, ops::SigmoidOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP #endif // SIGMOID_OP
#ifdef TANH_OP #ifdef TANH_OP
......
...@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators; ...@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp); REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -32,8 +32,7 @@ class BatchNormOp ...@@ -32,8 +32,7 @@ class BatchNormOp
public: public:
BatchNormOp(const string &type, const VariableNameMap &inputs, BatchNormOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, BatchNormParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, BatchNormParam<DeviceType>,
BatchNormKernel<DeviceType, T>>( BatchNormKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -12,25 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,25 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/kernel/feed_kernel.h" #ifdef BEAM_SEARCH_DECODE_OP
namespace paddle_mobile { #pragma once
namespace operators {
template <> #include "operators/beam_search_decode_op.h"
bool FeedKernel<GPU_MALI, float>::Init(FeedParam<GPU_MALI> *param) {
return true;
}
template <> namespace paddle_mobile {
void FeedKernel<GPU_MALI, float>::Compute(const FeedParam<GPU_MALI> &param) { namespace operators {
param.Out()->ShareDataWith(*(param.InputX()));
param.Out()->set_lod(param.InputX()->lod());
}
template class FeedKernel<GPU_MALI, float>; template <typename Dtype, typename T>
void BeamSearchDecodeOp<Dtype, T>::InferShape() const {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(beam_search_decode, ops::BeamSearchDecodeOp);
#endif #endif
#endif // BEAM_SEARCH_DECODE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BEAM_SEARCH_DECODE_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/beam_search_decode_kernel.h"
namespace paddle_mobile {
namespace operators {
DECLARE_OPERATOR(BeamSearchDecode, BeamSearchDecodeParam,
BeamSearchDecodeKernel);
} // namespace operators
} // namespace paddle_mobile
#endif // BEAM_SEARCH_DECODE_OP
...@@ -11,27 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,27 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h" #ifdef BEAM_SEARCH_OP
#include "../central-arm-func/conv_add_arm_func.h"
namespace paddle_mobile { #pragma once
namespace operators {
template <> #include "operators/beam_search_op.h"
bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) {
return true;
}
template <> namespace paddle_mobile {
void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> &param) { namespace operators {
ConvAddCompute<float>(param);
}
template class ConvAddKernel<CPU, float>; template <typename Dtype, typename T>
void BeamSearchOp<Dtype, T>::InferShape() const {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(beam_search, ops::BeamSearchOp);
#endif #endif
#endif // BEAM_SEARCH_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BEAM_SEARCH_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/beam_search_kernel.h"
namespace paddle_mobile {
namespace operators {
DECLARE_OPERATOR(BeamSearch, BeamSearchParam, BeamSearchKernel);
} // namespace operators
} // namespace paddle_mobile
#endif // BEAM_SEARCH_OP
...@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators; ...@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(bilinear_interp, ops::BilinearOp); REGISTER_OPERATOR_CPU(bilinear_interp, ops::BilinearOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -34,8 +34,7 @@ class BilinearOp : public framework::OperatorWithKernel< ...@@ -34,8 +34,7 @@ class BilinearOp : public framework::OperatorWithKernel<
public: public:
BilinearOp(const std::string &type, const VariableNameMap &inputs, BilinearOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, BilinearInterpParam<DeviceType>, DeviceType, BilinearInterpParam<DeviceType>,
operators::BilinearInterpKernel<DeviceType, T>>( operators::BilinearInterpKernel<DeviceType, T>>(
......
...@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp); ...@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(box_coder, ops::BoxCoderOp); REGISTER_OPERATOR_CL(box_coder, ops::BoxCoderOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -34,8 +34,7 @@ class BoxCoderOp : public framework::OperatorWithKernel< ...@@ -34,8 +34,7 @@ class BoxCoderOp : public framework::OperatorWithKernel<
public: public:
BoxCoderOp(const std::string &type, const VariableNameMap &inputs, BoxCoderOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>,
operators::BoxCoderKernel<DeviceType, T>>( operators::BoxCoderKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,7 +31,7 @@ class CastOp : public framework::OperatorWithKernel< ...@@ -31,7 +31,7 @@ class CastOp : public framework::OperatorWithKernel<
public: public:
CastOp(const std::string &type, const VariableNameMap &inputs, CastOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, CastParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, CastParam<DeviceType>,
operators::CastKernel<DeviceType, T>>( operators::CastKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp); ...@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(concat, ops::ConcatOp); REGISTER_OPERATOR_CL(concat, ops::ConcatOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp); REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp);
#endif #endif
......
...@@ -30,7 +30,7 @@ class ConcatOp : public framework::OperatorWithKernel< ...@@ -30,7 +30,7 @@ class ConcatOp : public framework::OperatorWithKernel<
public: public:
ConcatOp(const string &type, const VariableNameMap &inputs, ConcatOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>,
operators::ConcatKernel<DeviceType, T>>( operators::ConcatKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const { ...@@ -39,9 +39,9 @@ void ConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
...@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators; ...@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp); REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
#endif #endif
......
...@@ -30,7 +30,7 @@ class ConvOp : public framework::OperatorWithKernel< ...@@ -30,7 +30,7 @@ class ConvOp : public framework::OperatorWithKernel<
public: public:
ConvOp(const std::string &type, const VariableNameMap &inputs, ConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>( operators::ConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; ...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose); REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(conv2d_transpose, ops::ConvOpTranspose); REGISTER_OPERATOR_FPGA(conv2d_transpose, ops::ConvOpTranspose);
#endif #endif
......
...@@ -31,8 +31,7 @@ class ConvOpTranspose : public framework::OperatorWithKernel< ...@@ -31,8 +31,7 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
public: public:
ConvOpTranspose(const std::string &type, const VariableNameMap &inputs, ConvOpTranspose(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ConvTransposeParam<DeviceType>, DeviceType, ConvTransposeParam<DeviceType>,
operators::ConvTransposeKernel<DeviceType, T>>( operators::ConvTransposeKernel<DeviceType, T>>(
......
...@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators; ...@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(crf_decoding, ops::CrfOp); REGISTER_OPERATOR_CPU(crf_decoding, ops::CrfOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -33,7 +33,7 @@ class CrfOp : public framework::OperatorWithKernel< ...@@ -33,7 +33,7 @@ class CrfOp : public framework::OperatorWithKernel<
public: public:
CrfOp(const std::string &type, const VariableNameMap &inputs, CrfOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>,
operators::CrfKernel<DeviceType, T>>( operators::CrfKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const { ...@@ -40,9 +40,9 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -30,8 +30,7 @@ class DepthwiseConvOp : public framework::OperatorWithKernel< ...@@ -30,8 +30,7 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<
public: public:
DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs, DepthwiseConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>( operators::ConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -32,8 +32,7 @@ class DequantizeOp ...@@ -32,8 +32,7 @@ class DequantizeOp
public: public:
DequantizeOp(const std::string &type, const VariableNameMap &inputs, DequantizeOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, DequantizeParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, DequantizeParam<DeviceType>,
DequantizeKernel<DeviceType, T>>( DequantizeKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -34,7 +34,7 @@ class DropoutOp : public framework::OperatorWithKernel< ...@@ -34,7 +34,7 @@ class DropoutOp : public framework::OperatorWithKernel<
public: public:
DropoutOp(const std::string &type, const VariableNameMap &inputs, DropoutOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>,
operators::DropoutKernel<DeviceType, T>>( operators::DropoutKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators; ...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp); REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
#endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp); REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp);
......
...@@ -32,7 +32,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<
ElementwiseAddOp(const string &type, const VariableNameMap &inputs, ElementwiseAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseAddParam<DeviceType>, DeviceType, ElementwiseAddParam<DeviceType>,
operators::ElementwiseAddKernel<DeviceType, T>>( operators::ElementwiseAddKernel<DeviceType, T>>(
......
...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators; ...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp); REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(elementwise_mul, ops::ElementwiseMulOp); REGISTER_OPERATOR_FPGA(elementwise_mul, ops::ElementwiseMulOp);
#endif #endif
......
...@@ -32,7 +32,7 @@ class ElementwiseMulOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseMulOp : public framework::OperatorWithKernel<
ElementwiseMulOp(const string &type, const VariableNameMap &inputs, ElementwiseMulOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>, DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>( operators::ElementwiseMulKernel<DeviceType, T>>(
......
...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators; ...@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp); REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -32,7 +32,7 @@ class ElementwiseSubOp : public framework::OperatorWithKernel< ...@@ -32,7 +32,7 @@ class ElementwiseSubOp : public framework::OperatorWithKernel<
ElementwiseSubOp(const string &type, const VariableNameMap &inputs, ElementwiseSubOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>, DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>( operators::ElementwiseSubKernel<DeviceType, T>>(
......
...@@ -21,7 +21,8 @@ template <typename DeviceType, typename T> ...@@ -21,7 +21,8 @@ template <typename DeviceType, typename T>
void FeedOp<DeviceType, T>::InferShape() const { void FeedOp<DeviceType, T>::InferShape() const {
auto out_dims = this->param_.Out()->dims(); auto out_dims = this->param_.Out()->dims();
out_dims[0] = this->param_.BatchSize(); out_dims[0] = this->param_.BatchSize();
auto input_dims = this->param_.InputX()->dims(); int col = this->param_.Col();
auto input_dims = this->param_.InputX()->at(col).dims();
if (input_dims.size() == 4) { if (input_dims.size() == 4) {
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} else { } else {
...@@ -37,9 +38,6 @@ namespace ops = paddle_mobile::operators; ...@@ -37,9 +38,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(feed, ops::FeedOp); REGISTER_OPERATOR_CPU(feed, ops::FeedOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp); REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif #endif
......
...@@ -31,7 +31,7 @@ class FeedOp ...@@ -31,7 +31,7 @@ class FeedOp
public: public:
FeedOp(const std::string &type, const VariableNameMap &inputs, FeedOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
FeedKernel<DeviceType, T>>( FeedKernel<DeviceType, T>>(
......
...@@ -18,8 +18,9 @@ namespace operators { ...@@ -18,8 +18,9 @@ namespace operators {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
void FetchOp<DeviceType, T>::InferShape() const { void FetchOp<DeviceType, T>::InferShape() const {
int col = this->param_.Col();
auto x_dims = this->param_.InputX()->dims(); auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims); this->param_.Out()->at(col).Resize(x_dims);
} }
} // namespace operators } // namespace operators
...@@ -29,9 +30,7 @@ namespace ops = paddle_mobile::operators; ...@@ -29,9 +30,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fetch, ops::FetchOp); REGISTER_OPERATOR_CPU(fetch, ops::FetchOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif #endif
......
...@@ -30,7 +30,7 @@ class FetchOp ...@@ -30,7 +30,7 @@ class FetchOp
public: public:
FetchOp(const string &type, const VariableNameMap &inputs, FetchOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>,
FetchKernel<DeviceType, T>>( FetchKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -31,11 +31,10 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> { ...@@ -31,11 +31,10 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> {
public: public:
FillConstantOp(const std::string &type, const VariableNameMap &inputs, FillConstantOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap attrs, const framework::AttributeMap attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs, : framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope), scope),
param_(inputs, outputs, attrs, scope.get()) {} param_(inputs, outputs, attrs, scope) {}
void RunImpl() { void RunImpl() {
auto data_type = auto data_type =
static_cast<_PaddleMobile__Framework__Proto__VarType__Type>( static_cast<_PaddleMobile__Framework__Proto__VarType__Type>(
......
...@@ -49,8 +49,7 @@ class FlattenOp : public framework::OperatorWithKernel< ...@@ -49,8 +49,7 @@ class FlattenOp : public framework::OperatorWithKernel<
public: public:
FlattenOp(const std::string &type, const VariableNameMap &inputs, FlattenOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>,
operators::FlattenKernel<DeviceType, T>>( operators::FlattenKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDADDPRELU_OP
#include "operators/fusion_conv_add_add_prelu_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_conv_add_add_prelu,
ops::FusionConvAddAddPReluOpMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_add_prelu, ops::FusionConvAddAddPReluOp);
#endif
#endif // FUSION_CONVADDADDPRELU_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDADDPRELU_OP
#pragma once
#include <string>
#include <utility>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_add_prelu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD,
{{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; }
std::vector<std::pair<int, std::string>> NeedCheck() {
DLOG << " conv add add prelu check add X ";
return {{2, "Y"}, {2, "X"}};
}
};
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP #ifdef FUSION_CONVADDBN_OP
#include "operators/fusion_conv_add_bn_op.h" #include "operators/fusion_conv_add_bn_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddBNOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddBNOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_kernel.h" #include "operators/kernel/conv_add_bn_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -59,7 +59,7 @@ class FusionConvAddBNOp : public framework::OperatorWithKernel< ...@@ -59,7 +59,7 @@ class FusionConvAddBNOp : public framework::OperatorWithKernel<
FusionConvAddBNOp(const string &type, const VariableNameMap &inputs, FusionConvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddBNParam<DeviceType>, DeviceType, FusionConvAddBNParam<DeviceType>,
operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADDBNRELU_OP
#include "operators/fusion_conv_add_bn_relu_op.h" #include "operators/fusion_conv_add_bn_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -61,7 +61,7 @@ class FusionConvAddBNReluOp ...@@ -61,7 +61,7 @@ class FusionConvAddBNReluOp
FusionConvAddBNReluOp(const string &type, const VariableNameMap &inputs, FusionConvAddBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddBNReluParam<DeviceType>, DeviceType, FusionConvAddBNReluParam<DeviceType>,
operators::ConvAddBNReluKernel<DeviceType, T>>( operators::ConvAddBNReluKernel<DeviceType, T>>(
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
#include "operators/fusion_conv_add_op.h" #include "operators/fusion_conv_add_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
...@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher); ...@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher);
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp); REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp); REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp);
......
...@@ -50,8 +50,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel< ...@@ -50,8 +50,7 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
public: public:
FusionConvAddOp(const string &type, const VariableNameMap &inputs, FusionConvAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, : framework::OperatorWithKernel<DeviceType,
FusionConvAddParam<DeviceType>, FusionConvAddParam<DeviceType>,
operators::ConvAddKernel<DeviceType, T>>( operators::ConvAddKernel<DeviceType, T>>(
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDPRELU_OP
#include "operators/fusion_conv_add_prelu_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_conv_add_prelu,
ops::FusionConvAddPReluOpMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#endif
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
#include "operators/fusion_conv_add_relu_op.h" #include "operators/fusion_conv_add_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
......
...@@ -51,7 +51,7 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< ...@@ -51,7 +51,7 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
FusionConvAddReluOp(const string &type, const VariableNameMap &inputs, FusionConvAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvAddReluParam<DeviceType>, DeviceType, FusionConvAddReluParam<DeviceType>,
operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNADDRELU_OP #ifdef FUSION_CONVBNADDRELU_OP
#include "operators/fusion_conv_bn_add_relu_op.h" #include "operators/fusion_conv_bn_add_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvBNAddReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvBNAddReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -67,7 +67,7 @@ class FusionConvBNAddReluOp ...@@ -67,7 +67,7 @@ class FusionConvBNAddReluOp
FusionConvBNAddReluOp(const string &type, const VariableNameMap &inputs, FusionConvBNAddReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvBNAddReluParam<DeviceType>, DeviceType, FusionConvBNAddReluParam<DeviceType>,
operators::ConvBNAddReluKernel<DeviceType, T>>( operators::ConvBNAddReluKernel<DeviceType, T>>(
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBN_OP #ifdef FUSION_CONVBN_OP
#include "operators/fusion_conv_bn_op.h" #include "operators/fusion_conv_bn_op.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -35,9 +36,9 @@ void FusionConvBNOp<Dtype, T>::InferShape() const { ...@@ -35,9 +36,9 @@ void FusionConvBNOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -56,8 +56,7 @@ class FusionConvBNOp : public framework::OperatorWithKernel< ...@@ -56,8 +56,7 @@ class FusionConvBNOp : public framework::OperatorWithKernel<
public: public:
FusionConvBNOp(const string &type, const VariableNameMap &inputs, FusionConvBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs, framework::Scope *scope)
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>,
operators::ConvBNKernel<DeviceType, T>>( operators::ConvBNKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP #ifdef FUSION_CONVBNRELU_OP
#include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,9 +36,9 @@ void FusionConvBNReluOp<Dtype, T>::InferShape() const { ...@@ -36,9 +36,9 @@ void FusionConvBNReluOp<Dtype, T>::InferShape() const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back( output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], dilations[i], paddings[i],
paddings[i], strides[i])); strides[i]));
} }
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
......
...@@ -58,7 +58,7 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel< ...@@ -58,7 +58,7 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel<
FusionConvBNReluOp(const string &type, const VariableNameMap &inputs, FusionConvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam<DeviceType>, DeviceType, FusionConvBNReluParam<DeviceType>,
operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; ...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher); REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher);
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp); REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp);
#endif #endif
......
...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel< ...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNParam<DeviceType>, DeviceType, FusionDeconvAddBNParam<DeviceType>,
operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu, ...@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
ops::FusionDeconvAddBNReluMatcher); ops::FusionDeconvAddBNReluMatcher);
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp); REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp);
#endif #endif
......
...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp ...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNReluParam<DeviceType>, DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
operators::DeconvAddBNReluKernel<DeviceType, T>>( operators::DeconvAddBNReluKernel<DeviceType, T>>(
......
...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; ...@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_deconv_add, ops::FusionDeconvAddMatcher); REGISTER_FUSION_MATCHER(fusion_deconv_add, ops::FusionDeconvAddMatcher);
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add, ops::FusionDeconvAddOp); REGISTER_OPERATOR_FPGA(fusion_deconv_add, ops::FusionDeconvAddOp);
#endif #endif
......
...@@ -49,7 +49,7 @@ class FusionDeconvAddOp : public framework::OperatorWithKernel< ...@@ -49,7 +49,7 @@ class FusionDeconvAddOp : public framework::OperatorWithKernel<
FusionDeconvAddOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddParam<DeviceType>, DeviceType, FusionDeconvAddParam<DeviceType>,
operators::DeconvAddKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvAddKernel<DeviceType, T>>(type, inputs, outputs,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册