From f2969df5a35d736b7aac085a65b5abfed150fb9e Mon Sep 17 00:00:00 2001 From: liuruilong Date: Tue, 18 Sep 2018 21:47:53 +0800 Subject: [PATCH] update ios io --- .gitignore | 3 + CMakeLists.txt | 2 +- src/ios_io/PaddleMobile.h | 37 ++++++++- src/ios_io/PaddleMobile.mm | 165 +++++++++++++++++++++++++++++++------ src/ios_io/op_symbols.h | 19 +++++ src/operators/prelu_op.cpp | 2 - src/operators/prelu_op.h | 10 +++ 7 files changed, 206 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 964bfa4e48..a7b444b588 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,6 @@ SwiftProtobuf.framework paddle-mobile.xcworkspace metal/models/ metal/images/ + + +tools/libomp.a \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c388d8b2a..9e0fb4a867 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ if (LOG_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE) endif() -if(USE_OPENMP AND NOT IS_IOS) +if(USE_OPENMP) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") add_definitions(-DPADDLE_MOBILE_USE_OPENMP) endif() diff --git a/src/ios_io/PaddleMobile.h b/src/ios_io/PaddleMobile.h index 5854c5c3a4..c68d81f328 100644 --- a/src/ios_io/PaddleMobile.h +++ b/src/ios_io/PaddleMobile.h @@ -17,7 +17,17 @@ #import #import -@interface PaddleMobile : NSObject +@interface PaddleMobileCPUResult: NSObject + +@property (assign, nonatomic, readonly) float *output; + +@property (assign, nonatomic, readonly) int outputSize; + +-(void)releaseOutput; + +@end + +@interface PaddleMobileCPU : NSObject /* 创建对象 @@ -34,13 +44,36 @@ */ - (BOOL)load:(NSString *)modelAndWeightPath; +/* + * 从内存中加载模型 + * */ +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf; + +/* + * 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存 + * */ +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim; + +/* + * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放 + * */ +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim; + /* 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; /* - 进行预测 + 进行预测, 默认 means 为 0, scale 为 1.0 */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; diff --git a/src/ios_io/PaddleMobile.mm b/src/ios_io/PaddleMobile.mm index 5c7b801be0..650ae391dd 100644 --- a/src/ios_io/PaddleMobile.mm +++ b/src/ios_io/PaddleMobile.mm @@ -15,21 +15,48 @@ #import "PaddleMobile.h" #import "op_symbols.h" +#include "framework/tensor.h" #import "io/paddle_mobile.h" #import #import -@interface PaddleMobile() + +@interface PaddleMobileCPUResult() + +-(void)toSetOutput:(float *)output; + +-(void)toSetOutputSize:(int)outputSize; + +@end + +@implementation PaddleMobileCPUResult + +-(void)releaseOutput { + delete [] _output; + _output = nil; + _outputSize = 0; +} + +-(void)toSetOutput:(float *)output { + _output = output; +} + +-(void)toSetOutputSize:(int)outputSize { + _outputSize = outputSize; +} + +@end + + +@interface PaddleMobileCPU() { paddle_mobile::PaddleMobile *pam_; BOOL loaded_; - std::vector *predict_input_; - } @end -@implementation PaddleMobile +@implementation PaddleMobileCPU static std::mutex shared_mutex; @@ -66,6 +93,14 @@ static std::mutex shared_mutex; } } +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf { + pam_->SetThreadNum(2); + return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf); +} + - (BOOL)load:(NSString *)modelAndWeightPath{ std::string model_path_str = std::string([modelAndWeightPath UTF8String]); if (loaded_ = pam_->Load(model_path_str)) { @@ -75,6 +110,57 @@ static std::mutex shared_mutex; } } + +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + const int sourceRowBytes = CGImageGetBytesPerRow(image); + const int imageWidth = CGImageGetWidth(image); + const int imageHeight = CGImageGetHeight(image); + const int imageChannels = 4; + CGDataProviderRef provider = CGImageGetDataProvider(image); + CFDataRef cfData = CGDataProviderCopyData(provider); + const UInt8 *input = CFDataGetBytePtr(cfData); + + int wanted_input_width = dim_vec[3]; + int wanted_input_height = dim_vec[2]; + int wanted_input_channels = dim_vec[1]; + + for (int c = 0; c < wanted_input_channels; ++c) { + float *out_channel = output + c * wanted_input_height * wanted_input_width; + for (int y = 0; y < wanted_input_height; ++y) { + float *out_row = out_channel + y * wanted_input_width; + for (int x = 0; x < wanted_input_width; ++x) { + int in_row = (y * imageHeight) / wanted_input_height; + int in_col = (x * imageWidth) / wanted_input_width; + const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels); + float *out_pos = out_row + x; + if (c == 0) { + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 1){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 2){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + } + } + } + } + +} + -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray *)means scale:(float)scale dim:(std::vector)dim{ if (means == nil) { means = @[@0, @0, @0]; @@ -105,27 +191,54 @@ static std::mutex shared_mutex; } } -- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ -// printf(" hi i am here"); - if (predict_input_) { -// printf(" fukc -- "); -// printf(" %d \n", predict_input_->size()); - // dim to c++ vector, get numel - std::vector dim_vec = {1, 3, 300, 300}; -// int numel = 1; -// for (int k = 0; k < dim.count; ++k) { -// int d = dim[k].intValue; -// numel *= d; -// dim_vec.push_back(d); -// } - - - std::vector cpp_result = pam_->Predict(*predict_input_, dim_vec); +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + if (!loaded_) { + printf("PaddleMobile doesn't be loaded yet"); + return nil; + } + + if (dim.count != 4) { + printf("dim must have 4 elements"); return nil; } -// printf(" predict one "); -// std::lock_guard lock(shared_mutex); + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + paddle_mobile::framework::Tensor input_tensor; + + paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec); + + float *input_ptr = input_tensor.mutable_data(dims); + + memcpy(input_ptr, input, + numel * sizeof(float)); + + std::shared_ptr output = pam_->Predict(input_tensor); + + float *output_pointer = new float[output->numel()]; + + memcpy(output_pointer, output->data(), + output->numel() * sizeof(float)); + + PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init]; + [cpuResult toSetOutput: output_pointer]; + [cpuResult toSetOutputSize: output->numel()]; + + return cpuResult; +} + +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ +// printf(" predict one "); + std::lock_guard lock(shared_mutex); if (!loaded_) { printf("PaddleMobile doesn't be loaded yet"); return nil; @@ -164,15 +277,13 @@ static std::mutex shared_mutex; } // input - std::vector *predict_input = new std::vector(); + std::vector predict_input; for (int j = 0; j < numel; ++j) { - predict_input->push_back(dataPointer[j]); + predict_input.push_back(dataPointer[j]); } - predict_input_ = predict_input; - // predict - std::vector cpp_result = pam_->Predict(*predict_input, dim_vec); + std::vector cpp_result = pam_->Predict(predict_input, dim_vec); // result long count = 0; diff --git a/src/ios_io/op_symbols.h b/src/ios_io/op_symbols.h index 0fe1137278..2c4a3711bc 100644 --- a/src/ios_io/op_symbols.h +++ b/src/ios_io/op_symbols.h @@ -14,6 +14,13 @@ #pragma once +#include "operators/prelu_op.h" +#include "operators/fusion_conv_add_prelu_op.h" +#include "operators/fusion_conv_add_add_prelu_op.h" +#include "operators/bilinear_interp_op.h" +#include "operators/conv_transpose_op.h" +#include "operators/crf_op.h" +#include "operators/flatten_op.h" #include "operators/batchnorm_op.h" #include "operators/box_coder_op.h" #include "operators/concat_op.h" @@ -24,11 +31,18 @@ #include "operators/feed_op.h" #include "operators/fetch_op.h" #include "operators/fusion_conv_add.h" +#include "operators/fusion_conv_add_bn_op.h" +#include "operators/fusion_conv_add_relu_op.h" +#include "operators/fusion_conv_bn_add_relu_op.h" #include "operators/fusion_conv_add_bn_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_dwconv_bn_relu_op.h" +#include "operators/fusion_elementwise_add_relu_op.h" #include "operators/fusion_fc_op.h" +#include "operators/fusion_fc_relu_op.h" +#include "operators/gru_op.h" #include "operators/im2sequence_op.h" +#include "operators/lookup_op.h" #include "operators/lrn_op.h" #include "operators/mul_op.h" #include "operators/multiclass_nms_op.h" @@ -36,6 +50,11 @@ #include "operators/prior_box_op.h" #include "operators/relu_op.h" #include "operators/reshape_op.h" +#include "operators/resize_op.h" +#include "operators/scale_op.h" +#include "operators/shape_op.h" #include "operators/sigmoid_op.h" +#include "operators/slice_op.h" #include "operators/softmax_op.h" +#include "operators/split_op.h" #include "operators/transpose_op.h" diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp index 245154ca5e..332b5cc9bb 100644 --- a/src/operators/prelu_op.cpp +++ b/src/operators/prelu_op.cpp @@ -34,11 +34,9 @@ void PReluOp::InferShape() const { * */ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU -USE_OP_CPU(prelu); REGISTER_OPERATOR_CPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_MALI_GPU -USE_OP_MALI_GPU(prelu); REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_FPGA diff --git a/src/operators/prelu_op.h b/src/operators/prelu_op.h index af33476b72..7b6b778fa6 100644 --- a/src/operators/prelu_op.h +++ b/src/operators/prelu_op.h @@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel< } // namespace operators } // namespace paddle_mobile +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(prelu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(prelu); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(prelu); +#endif + #endif -- GitLab