提交 b2e33f33 编写于 作者: Y yangfei

add some function

上级 de4b0ffb
...@@ -7,7 +7,7 @@ option(DEBUGING "enable debug mode" ON) ...@@ -7,7 +7,7 @@ option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF) option(USE_EXCEPTION "use std exception" OFF)
option(LOG_PROFILE "log profile" OFF) option(LOG_PROFILE "log profile" OFF)
# select the platform to build # select the platform to build
option(CPU "armv7 with neon" ON) option(CPU "armv7 with neon" OFF)
option(GPU_MALI "mali gpu" OFF) option(GPU_MALI "mali gpu" OFF)
option(GPU_CL "opencl gpu" ON) option(GPU_CL "opencl gpu" ON)
option(FPGA "fpga" OFF) option(FPGA "fpga" OFF)
......
...@@ -27,38 +27,62 @@ class CLImage { ...@@ -27,38 +27,62 @@ class CLImage {
CLImage() = default; CLImage() = default;
void Init(cl_context context, float *tensorInput, DDim ddim) { void Init(cl_context context, float *tensorInput, DDim ddim) {
tensorDims_ = ddim;
cl_image_format cf = { cl_image_format cf = {
.image_channel_order = CL_RGBA, .image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT .image_channel_data_type = CL_HALF_FLOAT
}; };
// NCHW -> [W * (C+3)/4, H * N] // NCHW -> [W * (C+3)/4, H * N]
size_t N = tensorDims_[0]; DLOG<<tensorDims_;
size_t C = tensorDims_[1]; size_t N,C,H,W;
size_t H = tensorDims_[2]; if(tensorDims_.size()==4){
size_t W = tensorDims_[3]; N = tensorDims_[0];
if(N<0){
N = 1;
}
C = tensorDims_[1];
H = tensorDims_[2];
W = tensorDims_[3];
}else if(tensorDims_.size()==1){
N = 1;
C = tensorDims_[0];
H = 1;
W = 1;
}
DLOG<<"-------InitMemory-------";
size_t width = W * ((C + 3) / 4); size_t width = W * ((C + 3) / 4);
size_t height = H * N; size_t height = H * N;
std::unique_ptr<half_t[]> imageData{}; std::unique_ptr<half_t[]> imageData{};
int count = 0;
if (tensorInput != nullptr) { if (tensorInput != nullptr) {
imageData.reset(new half_t[width * height * 4]); imageData.reset(new half_t[width * height * 4]);
float *p = tensorInput; float *p = tensorInput;
size_t i0 = 0; size_t i0 = 0;
for (int n = 0; n < N; n++) { for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) { for (int c = 0; c < C; c++) {
size_t i1 = i0; size_t i1 = i0;
for (int h = 0; h < H; h++) { for (int h = 0; h < H; h++) {
size_t i2 = i1 << 2 + c % 4; size_t i2 = (i1<<2) + c % 4;
for (int w = 0; w < W; w++) { for (int w = 0; w < W; w++) {
imageData[i2] = float2half(*p); if (i2 >= width * height * 4) {
i2 += 4; printf("%d > %d ----> %d, %d, %d, %d --- %d, %d, %d\n", i2, width*height*4, n, c, h, w, i0, i1, i2);
p++; }
} assert(i2 < width * height * 4);
i1 += width;
} imageData[i2] = float2half(*p);
} i2 += 4;
i0 += width * H; p++;
} // count++;
// DLOG<<count;
}
i1 += width;
}
}
i0 += width * H;
}
} }
DLOG<<"-------InitMemory-------";
cl_int err; cl_int err;
cl_image_ = clCreateImage2D( cl_image_ = clCreateImage2D(
context, // cl_context context context, // cl_context context
......
...@@ -801,6 +801,106 @@ void Executor<Dtype, P>::Predict_To(int end) { ...@@ -801,6 +801,106 @@ void Executor<Dtype, P>::Predict_To(int end) {
#endif #endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
template <>
void Executor<GPU_CL, Precision::FP32>::LoadMemory(const framework::VarDesc var_desc,
float *tensorInput, char **data) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 2 Lod information
uint64_t *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
uint64_t lod_level = *lod_level_ptr;
delete lod_level_ptr;
(*data) += sizeof(uint64_t);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(*data);
(*data) += sizeof(uint64_t);
std::vector<size_t> tmp(size / sizeof(size_t));
for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *reinterpret_cast<size_t *>(*data);
(*data) += sizeof(size_t);
}
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(*data);
(*data) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (*data)[m];
}
(*data) += (sizeof(char) * size);
const framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
// int type_size = 0;
// switch (desc.DataType()) {
// case framework::VARTYPE_TYPE_FP16:
// type_size = 2;
// break;
// case framework::VARTYPE_TYPE_FP32:
// type_size = 4;
// memory = tensor->mutable_data<float>();
// break;
// case framework::VARTYPE_TYPE_FP64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_INT32:
// memory = tensor->mutable_data<int32_t>();
// type_size = 4;
// break;
// case framework::VARTYPE_TYPE_INT64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_BOOL:
// type_size = 1;
// break;
// default:
// break;
// }
int type_size = 4;
memory = tensorInput;
if (program_.quantification) {
float min_value;
float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
}
template <> template <>
void Executor<GPU_CL, Precision::FP32>::InitMemory() { void Executor<GPU_CL, Precision::FP32>::InitMemory() {
...@@ -812,26 +912,38 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() { ...@@ -812,26 +912,38 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
char *origin_data = char *origin_data =
Get_binary_data(program_.model_path + "/" + var_desc->Name()); Get_binary_data(program_.model_path + "/" + var_desc->Name());
char *data = origin_data;
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
float *tensorInput = (float *)origin_data; int numel = 1;
for (auto l : desc.Dims()) {
const framework::TensorDesc &desc = var_desc->Tensor_desc(); numel *= l;
framework::DDim ddim = cl_image->dims(); }
DLOG<<var_desc->Name();
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc,tensorInput,&data);
framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->Init(context, tensorInput, ddim); cl_image->Init(context, tensorInput, ddim);
delete origin_data;
delete origin_data;
paddle_mobile::memory::Free(tensorInput);
}else{ }else{
auto cl_image = var->template GetMutable<framework::CLImage>(); if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
cl_context context = program_.scope->GetCLScpoe()->Context(); auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = cl_image->dims(); framework::DDim ddim = framework::make_ddim(desc.Dims());
DLOG<<var_desc->Name();
cl_image->Init(context, ddim); cl_image->Init(context, ddim);
}
} }
} }
...@@ -863,21 +975,23 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() { ...@@ -863,21 +975,23 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = cl_image->dims(); framework::DDim ddim = framework::make_ddim(desc.Dims());
int numel = 1; int numel = 1;
for (int i = 0; i < ddim.size(); i++) { for (int i = 0; i < ddim.size(); i++) {
numel = numel * ddim[i]; numel = numel * ddim[i];
} }
float *tensorInput = data; float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc,tensorInput,&origin_data);
cl_image->Init(context, tensorInput, ddim); cl_image->Init(context, tensorInput, ddim);
data += numel; paddle_mobile::memory::Free(tensorInput);
}else{ }else{
auto cl_image = var->template GetMutable<framework::CLImage>(); auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = cl_image->dims(); framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->Init(context, ddim); cl_image->Init(context, ddim);
} }
......
...@@ -73,6 +73,8 @@ class Executor { ...@@ -73,6 +73,8 @@ class Executor {
void LoadMemory(const framework::VarDesc var_desc, void LoadMemory(const framework::VarDesc var_desc,
framework::LoDTensor *tensor, char **data); framework::LoDTensor *tensor, char **data);
void LoadMemory(const framework::VarDesc var_desc,
float * tensorInput, char **data);
void InitCombineMemory(); void InitCombineMemory();
......
...@@ -16,6 +16,9 @@ limitations under the License. */ ...@@ -16,6 +16,9 @@ limitations under the License. */
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -26,7 +29,8 @@ namespace framework { ...@@ -26,7 +29,8 @@ namespace framework {
* @param originProgramDesc * @param originProgramDesc
* @param scope * @param scope
*/ */
void InitMemoryFromProgram( template<typename Dtype, Precision P>
void Loader<Dtype, P>::InitMemoryFromProgram(
std::shared_ptr<ProgramDesc> &originProgramDesc, std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope) { std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) { for (const auto &block : originProgramDesc.get()->Blocks()) {
...@@ -51,6 +55,35 @@ void InitMemoryFromProgram( ...@@ -51,6 +55,35 @@ void InitMemoryFromProgram(
} }
} }
#ifdef PADDLE_MOBILE_CL
template<>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name());
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
} else {
auto dim = var_desc->Tensor_desc().Dims();
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
dim[0] = 1;
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
}
} else {
// TODO(codeWorm): some.
}
}
}
}
#endif
/** /**
* fusion and print someinfos * fusion and print someinfos
* @tparam Dtype * @tparam Dtype
......
...@@ -53,6 +53,9 @@ class Loader { ...@@ -53,6 +53,9 @@ class Loader {
bool optimize = false, bool optimize = false,
bool quantification = false, bool quantification = false,
bool can_add_split = false); bool can_add_split = false);
void InitMemoryFromProgram(std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope);
}; };
} }
......
...@@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp); ...@@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(batch_norm, ops::BatchNormOp);
#endif
#endif #endif
...@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(batch_norm); ...@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(batch_norm);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(batch_norm);
#endif
#endif #endif
...@@ -77,7 +77,6 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -77,7 +77,6 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
void Init() {} void Init() {}
void RunImpl() { void RunImpl() {
} }
#else #else
void Init() {} void Init() {}
......
...@@ -43,3 +43,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); ...@@ -43,3 +43,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fetch, ops::FetchOp);
#endif
...@@ -54,3 +54,6 @@ USE_OP_MALI_GPU(fetch); ...@@ -54,3 +54,6 @@ USE_OP_MALI_GPU(fetch);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fetch); USE_OP_FPGA(fetch);
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(fetch);
#endif
...@@ -15,22 +15,21 @@ limitations under the License. */ ...@@ -15,22 +15,21 @@ limitations under the License. */
#ifdef CONV_OP #ifdef CONV_OP
#include "operators/kernel/conv_kernel.h" #include "operators/kernel/conv_kernel.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl"); // this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
return true; return true;
} }
template <> template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); // auto kernel = this->cl_helper_.KernelAt(0);
size_t global_work_size[3] = {1, 2, 3}; // size_t global_work_size[3] = {1, 2, 3};
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL);
} }
template class ConvKernel<GPU_CL, float>; template class ConvKernel<GPU_CL, float>;
......
...@@ -21,7 +21,7 @@ namespace paddle_mobile { ...@@ -21,7 +21,7 @@ namespace paddle_mobile {
template <> template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) { bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl"); // this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
return true; return true;
} }
......
...@@ -12,22 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,22 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef SOFTMAX_OP
#include "operators/kernel/pool_kernel.h" #include "operators/kernel/softmax_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) { bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
return true; return true;
} }
template <> template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {} void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {}
template class SoftmaxKernel<GPU_CL, float>; template class SoftmaxKernel<GPU_CL, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#endif
...@@ -68,5 +68,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp); ...@@ -68,5 +68,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp); REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(pool2d, ops::PoolOp);
#endif
#endif #endif
...@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(pool2d); ...@@ -54,5 +54,8 @@ USE_OP_MALI_GPU(pool2d);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(pool2d); USE_OP_FPGA(pool2d);
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(pool2d);
#endif
#endif #endif
...@@ -41,5 +41,8 @@ REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp); ...@@ -41,5 +41,8 @@ REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(relu, ops::ReluOp);
#endif
#endif #endif
...@@ -57,5 +57,8 @@ USE_OP_MALI_GPU(relu); ...@@ -57,5 +57,8 @@ USE_OP_MALI_GPU(relu);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(relu);
#endif
#endif #endif
...@@ -40,5 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp); ...@@ -40,5 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(reshape, ops::ReshapeOp);
#endif
#endif #endif
...@@ -56,5 +56,8 @@ USE_OP_MALI_GPU(reshape); ...@@ -56,5 +56,8 @@ USE_OP_MALI_GPU(reshape);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(reshape);
#endif
#endif #endif
...@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp); ...@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp); REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(softmax, ops::SoftmaxOp);
#endif
#endif #endif
...@@ -52,5 +52,8 @@ USE_OP_MALI_GPU(softmax); ...@@ -52,5 +52,8 @@ USE_OP_MALI_GPU(softmax);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(softmax); USE_OP_FPGA(softmax);
#endif #endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(softmax);
#endif
#endif #endif
...@@ -83,175 +83,175 @@ elseif("genet" IN_LIST NET) ...@@ -83,175 +83,175 @@ elseif("genet" IN_LIST NET)
target_link_libraries(test-genet paddle-mobile) target_link_libraries(test-genet paddle-mobile)
else () else ()
# gen test # # gen test
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile) # target_link_libraries(test-resnet paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-squeezenet paddle-mobile) # target_link_libraries(test-squeezenet paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-yolo paddle-mobile) # target_link_libraries(test-yolo paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-googlenet paddle-mobile) # target_link_libraries(test-googlenet paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-op paddle-mobile) # target_link_libraries(test-conv-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_include.h)
target_link_libraries(test-mul-op paddle-mobile) # target_link_libraries(test-mul-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h)
target_link_libraries(test-elementwiseadd-op paddle-mobile) # target_link_libraries(test-elementwiseadd-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-concat-op paddle-mobile) # target_link_libraries(test-concat-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-lrn-op operators/test_lrn_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-lrn-op operators/test_lrn_op.cpp test_helper.h test_include.h)
target_link_libraries(test-lrn-op paddle-mobile) # target_link_libraries(test-lrn-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-batchnorm-op operators/test_batchnorm_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-batchnorm-op operators/test_batchnorm_op.cpp test_helper.h test_include.h)
target_link_libraries(test-batchnorm-op paddle-mobile) # target_link_libraries(test-batchnorm-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-priorbox-op operators/test_prior_box_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-priorbox-op operators/test_prior_box_op.cpp test_helper.h test_include.h)
target_link_libraries(test-priorbox-op paddle-mobile) # target_link_libraries(test-priorbox-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-boxcoder-op operators/test_box_coder_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-boxcoder-op operators/test_box_coder_op.cpp test_helper.h test_include.h)
target_link_libraries(test-boxcoder-op paddle-mobile) # target_link_libraries(test-boxcoder-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h)
target_link_libraries(test-transpose-op paddle-mobile) # target_link_libraries(test-transpose-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h)
target_link_libraries(test-multiclassnms-op paddle-mobile) # target_link_libraries(test-multiclassnms-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h)
target_link_libraries(test-reshape-op paddle-mobile) # target_link_libraries(test-reshape-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-relu-op paddle-mobile) # target_link_libraries(test-relu-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fc-op paddle-mobile) # target_link_libraries(test-fc-op paddle-mobile)
#
# gen test log # # gen test log
ADD_EXECUTABLE(test-log common/test_log.cpp) # ADD_EXECUTABLE(test-log common/test_log.cpp)
target_link_libraries(test-log paddle-mobile) # target_link_libraries(test-log paddle-mobile)
#
# gen test log # # gen test log
ADD_EXECUTABLE(test-load framework/test_load.cpp) # ADD_EXECUTABLE(test-load framework/test_load.cpp)
target_link_libraries(test-load paddle-mobile) # target_link_libraries(test-load paddle-mobile)
#
# gen test log # # gen test log
ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp) # ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp)
target_link_libraries(test-loadmemory paddle-mobile) # target_link_libraries(test-loadmemory paddle-mobile)
#
ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp) # ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp)
target_link_libraries(test-inference-api paddle-mobile) # target_link_libraries(test-inference-api paddle-mobile)
#
#
# gen test log # # gen test log
# gen test # # gen test
ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp) # ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
target_link_libraries(test-optimize paddle-mobile) # target_link_libraries(test-optimize paddle-mobile)
#
#
#gen test # #gen test
ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-pool paddle-mobile) # target_link_libraries(test-pool paddle-mobile)
#
#gen test # #gen test
ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-softmax paddle-mobile) # target_link_libraries(test-softmax paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp) # ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp)
target_link_libraries(test-gemm-accuracy paddle-mobile) # target_link_libraries(test-gemm-accuracy paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp) # ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp)
target_link_libraries(test-gemm-perf paddle-mobile) # target_link_libraries(test-gemm-perf paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-enforce common/test_enforce.cpp) # ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
target_link_libraries(test-enforce paddle-mobile) # target_link_libraries(test-enforce paddle-mobile)
#
# gen test - test if openmp works # # gen test - test if openmp works
ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-openmp paddle-mobile) # target_link_libraries(test-openmp paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenetssd paddle-mobile) # target_link_libraries(test-mobilenetssd paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile) # target_link_libraries(test-mobilenet-combine paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-genet paddle-mobile) # target_link_libraries(test-genet paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) # ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) # target_link_libraries(test-sigmoid paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-depthwise-conv-op paddle-mobile) # target_link_libraries(test-depthwise-conv-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet paddle-mobile) # target_link_libraries(test-mobilenet paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-relu-op paddle-mobile) # target_link_libraries(test-conv-add-relu-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-bn-relu-op paddle-mobile) # target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-nlp paddle-mobile) # target_link_libraries(test-nlp paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h)
target_link_libraries(test-gru-op paddle-mobile) # target_link_libraries(test-gru-op paddle-mobile)
#
# gen test # # gen test
#
ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-inceptionv4 paddle-mobile) # target_link_libraries(test-inceptionv4 paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-alexnet paddle-mobile) # target_link_libraries(test-alexnet paddle-mobile)
#
ADD_EXECUTABLE(test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h)
target_link_libraries(test-googlenetv1 paddle-mobile) # target_link_libraries(test-googlenetv1 paddle-mobile)
#
# gen test # # gen test
ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h) # ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h)
target_link_libraries(test-fssd paddle-mobile) # target_link_libraries(test-fssd paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册