diff --git a/.gitignore b/.gitignore index 964bfa4e48ee8e7c9387339d5775a3df90c63eb4..a7b444b5881c8b993c6edbb4a7ba555359dcab39 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,6 @@ SwiftProtobuf.framework paddle-mobile.xcworkspace metal/models/ metal/images/ + + +tools/libomp.a \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c388d8b2a6374c68aecf86b215c8e8462b13c2b..2b3e6227acb58b3d8b3bf132fecf60c5298f172f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ if (LOG_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE) endif() -if(USE_OPENMP AND NOT IS_IOS) +if(USE_OPENMP) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") add_definitions(-DPADDLE_MOBILE_USE_OPENMP) endif() @@ -130,8 +130,8 @@ endif () if (IS_IOS) else() - list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm) + list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.h) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.mm) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) endif () diff --git a/src/ios_io/PaddleMobile.h b/src/ios_io/PaddleMobileCPU.h similarity index 55% rename from src/ios_io/PaddleMobile.h rename to src/ios_io/PaddleMobileCPU.h index 5854c5c3a4d4c899feb88822b2f7993860d1ed76..c68d81f328f4ce9a9bf16624f677b2996644c35c 100644 --- a/src/ios_io/PaddleMobile.h +++ b/src/ios_io/PaddleMobileCPU.h @@ -17,7 +17,17 @@ #import #import -@interface PaddleMobile : NSObject +@interface PaddleMobileCPUResult: NSObject + +@property (assign, nonatomic, readonly) float *output; + +@property (assign, nonatomic, readonly) int outputSize; + +-(void)releaseOutput; + +@end + +@interface PaddleMobileCPU : NSObject /* 创建对象 @@ -34,13 +44,36 @@ */ - (BOOL)load:(NSString *)modelAndWeightPath; +/* + * 从内存中加载模型 + * */ +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf; + +/* + * 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存 + * */ +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim; + +/* + * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放 + * */ +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim; + /* 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; /* - 进行预测 + 进行预测, 默认 means 为 0, scale 为 1.0 */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; diff --git a/src/ios_io/PaddleMobile.mm b/src/ios_io/PaddleMobileCPU.mm similarity index 55% rename from src/ios_io/PaddleMobile.mm rename to src/ios_io/PaddleMobileCPU.mm index 5c7b801be0ea7967ea0c94813325d41071bb890b..5a21418ef5fa9cbf7b24436cb778fc8c6c164e16 100644 --- a/src/ios_io/PaddleMobile.mm +++ b/src/ios_io/PaddleMobileCPU.mm @@ -12,24 +12,51 @@ See the License for the specific language governing permissions and limitations under the License. */ -#import "PaddleMobile.h" +#import "PaddleMobileCPU.h" #import "op_symbols.h" +#include "framework/tensor.h" #import "io/paddle_mobile.h" #import #import -@interface PaddleMobile() + +@interface PaddleMobileCPUResult() + +-(void)toSetOutput:(float *)output; + +-(void)toSetOutputSize:(int)outputSize; + +@end + +@implementation PaddleMobileCPUResult + +-(void)releaseOutput { + delete [] _output; + _output = nil; + _outputSize = 0; +} + +-(void)toSetOutput:(float *)output { + _output = output; +} + +-(void)toSetOutputSize:(int)outputSize { + _outputSize = outputSize; +} + +@end + + +@interface PaddleMobileCPU() { paddle_mobile::PaddleMobile *pam_; BOOL loaded_; - std::vector *predict_input_; - } @end -@implementation PaddleMobile +@implementation PaddleMobileCPU static std::mutex shared_mutex; @@ -66,6 +93,14 @@ static std::mutex shared_mutex; } } +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf { + pam_->SetThreadNum(2); + return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf); +} + - (BOOL)load:(NSString *)modelAndWeightPath{ std::string model_path_str = std::string([modelAndWeightPath UTF8String]); if (loaded_ = pam_->Load(model_path_str)) { @@ -75,6 +110,57 @@ static std::mutex shared_mutex; } } + +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + const int sourceRowBytes = CGImageGetBytesPerRow(image); + const int imageWidth = CGImageGetWidth(image); + const int imageHeight = CGImageGetHeight(image); + const int imageChannels = 4; + CGDataProviderRef provider = CGImageGetDataProvider(image); + CFDataRef cfData = CGDataProviderCopyData(provider); + const UInt8 *input = CFDataGetBytePtr(cfData); + + int wanted_input_width = dim_vec[3]; + int wanted_input_height = dim_vec[2]; + int wanted_input_channels = dim_vec[1]; + + for (int c = 0; c < wanted_input_channels; ++c) { + float *out_channel = output + c * wanted_input_height * wanted_input_width; + for (int y = 0; y < wanted_input_height; ++y) { + float *out_row = out_channel + y * wanted_input_width; + for (int x = 0; x < wanted_input_width; ++x) { + int in_row = (y * imageHeight) / wanted_input_height; + int in_col = (x * imageWidth) / wanted_input_width; + const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels); + float *out_pos = out_row + x; + if (c == 0) { + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 1){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 2){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + } + } + } + } + +} + -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray *)means scale:(float)scale dim:(std::vector)dim{ if (means == nil) { means = @[@0, @0, @0]; @@ -105,27 +191,54 @@ static std::mutex shared_mutex; } } -- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ -// printf(" hi i am here"); - if (predict_input_) { -// printf(" fukc -- "); -// printf(" %d \n", predict_input_->size()); - // dim to c++ vector, get numel - std::vector dim_vec = {1, 3, 300, 300}; -// int numel = 1; -// for (int k = 0; k < dim.count; ++k) { -// int d = dim[k].intValue; -// numel *= d; -// dim_vec.push_back(d); -// } - - - std::vector cpp_result = pam_->Predict(*predict_input_, dim_vec); +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + if (!loaded_) { + printf("PaddleMobile doesn't be loaded yet"); + return nil; + } + + if (dim.count != 4) { + printf("dim must have 4 elements"); return nil; } -// printf(" predict one "); -// std::lock_guard lock(shared_mutex); + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + paddle_mobile::framework::Tensor input_tensor; + + paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec); + + float *input_ptr = input_tensor.mutable_data(dims); + + memcpy(input_ptr, input, + numel * sizeof(float)); + + std::shared_ptr output = pam_->Predict(input_tensor); + + float *output_pointer = new float[output->numel()]; + + memcpy(output_pointer, output->data(), + output->numel() * sizeof(float)); + + PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init]; + [cpuResult toSetOutput: output_pointer]; + [cpuResult toSetOutputSize: output->numel()]; + + return cpuResult; +} + +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ +// printf(" predict one "); + std::lock_guard lock(shared_mutex); if (!loaded_) { printf("PaddleMobile doesn't be loaded yet"); return nil; @@ -164,15 +277,13 @@ static std::mutex shared_mutex; } // input - std::vector *predict_input = new std::vector(); + std::vector predict_input; for (int j = 0; j < numel; ++j) { - predict_input->push_back(dataPointer[j]); + predict_input.push_back(dataPointer[j]); } - predict_input_ = predict_input; - // predict - std::vector cpp_result = pam_->Predict(*predict_input, dim_vec); + std::vector cpp_result = pam_->Predict(predict_input, dim_vec); // result long count = 0; diff --git a/src/ios_io/op_symbols.h b/src/ios_io/op_symbols.h index 0fe1137278d19ab4c9c9aaecf2db108e4a184993..af0401c15ab28b0baa0cdbffb16a46215a26953e 100644 --- a/src/ios_io/op_symbols.h +++ b/src/ios_io/op_symbols.h @@ -15,27 +15,46 @@ #pragma once #include "operators/batchnorm_op.h" +#include "operators/bilinear_interp_op.h" #include "operators/box_coder_op.h" #include "operators/concat_op.h" #include "operators/conv_op.h" +#include "operators/conv_transpose_op.h" +#include "operators/crf_op.h" #include "operators/depthwise_conv_op.h" #include "operators/dropout_op.h" #include "operators/elementwise_add_op.h" #include "operators/feed_op.h" #include "operators/fetch_op.h" +#include "operators/flatten_op.h" #include "operators/fusion_conv_add.h" +#include "operators/fusion_conv_add_add_prelu_op.h" +#include "operators/fusion_conv_add_bn_op.h" #include "operators/fusion_conv_add_bn_relu_op.h" +#include "operators/fusion_conv_add_prelu_op.h" +#include "operators/fusion_conv_add_relu_op.h" +#include "operators/fusion_conv_bn_add_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_dwconv_bn_relu_op.h" +#include "operators/fusion_elementwise_add_relu_op.h" #include "operators/fusion_fc_op.h" +#include "operators/fusion_fc_relu_op.h" +#include "operators/gru_op.h" #include "operators/im2sequence_op.h" +#include "operators/lookup_op.h" #include "operators/lrn_op.h" #include "operators/mul_op.h" #include "operators/multiclass_nms_op.h" #include "operators/pool_op.h" +#include "operators/prelu_op.h" #include "operators/prior_box_op.h" #include "operators/relu_op.h" #include "operators/reshape_op.h" +#include "operators/resize_op.h" +#include "operators/scale_op.h" +#include "operators/shape_op.h" #include "operators/sigmoid_op.h" +#include "operators/slice_op.h" #include "operators/softmax_op.h" +#include "operators/split_op.h" #include "operators/transpose_op.h" diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp index 245154ca5ea6971dee33e14550bf1e090fa0ec71..332b5cc9bbbabf9498858b96e0028a9e3992f3ea 100644 --- a/src/operators/prelu_op.cpp +++ b/src/operators/prelu_op.cpp @@ -34,11 +34,9 @@ void PReluOp::InferShape() const { * */ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU -USE_OP_CPU(prelu); REGISTER_OPERATOR_CPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_MALI_GPU -USE_OP_MALI_GPU(prelu); REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_FPGA diff --git a/src/operators/prelu_op.h b/src/operators/prelu_op.h index af33476b7298a5728a6ef944506d55f422a2fa8c..7b6b778fa6e8f0951faffda6803b25b6b23ea17c 100644 --- a/src/operators/prelu_op.h +++ b/src/operators/prelu_op.h @@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel< } // namespace operators } // namespace paddle_mobile +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(prelu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(prelu); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(prelu); +#endif + #endif