update ios io

f2969df5 · liuruilong · 17b66f79 · f2969df5 · f2969df5 · f2969df5
7 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -84,3 +84,6 @@ SwiftProtobuf.framework
 paddle-mobile.xcworkspace
 metal/models/
 metal/images/
+tools/libomp.a
\ No newline at end of file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ if (LOG_PROFILE)
    add_definitions(-DPADDLE_MOBILE_PROFILE)
 endif()
-if(USE_OPENMP AND NOT IS_IOS)
+if(USE_OPENMP)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
    add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
 endif()

--- a/src/ios_io/PaddleMobile.h
+++ b/src/ios_io/PaddleMobile.h
@@ -17,7 +17,17 @@
 #import <CoreImage/CoreImage.h>
 #import <Foundation/Foundation.h>
-@interface PaddleMobile : NSObject
+@interface PaddleMobileCPUResult: NSObject
+@property (assign, nonatomic, readonly) float *output;
+@property (assign, nonatomic, readonly) int outputSize;
+-(void)releaseOutput;
+@end
+@interface PaddleMobileCPU : NSObject
 /*
    创建对象
@@ -34,13 +44,36 @@
 */
 - (BOOL)load:(NSString *)modelAndWeightPath;
+/*
+ * 从内存中加载模型
+ * */
+- (BOOL)LoadCombinedMemory:(size_t)modelLen
+               andModelBuf:(const uint8_t *)modelBuf
+         andModelParamsLen:(size_t)combinedParamsLen
+      andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
+/*
+ *  对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
+ * */
+-(void)preprocess:(CGImageRef)image
+           output:(float *)output
+            means:(NSArray<NSNumber *> *)means
+        scale:(float)scale
+        dim:(NSArray<NSNumber *> *)dim;
+/*
+ * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
+ * */
+- (PaddleMobileCPUResult *)predictInput:(float *)input
+                                    dim:(NSArray<NSNumber *> *)dim;
 /*
    进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
 */
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
 /*
-    进行预测
+    进行预测, 默认 means 为 0, scale 为 1.0
 */
 - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;

--- a/src/ios_io/PaddleMobile.mm
+++ b/src/ios_io/PaddleMobile.mm
@@ -15,21 +15,48 @@
 #import "PaddleMobile.h"
 #import "op_symbols.h"
+#include "framework/tensor.h"
 #import "io/paddle_mobile.h"
 #import <memory>
 #import <vector>
-@interface  PaddleMobile()
+@interface PaddleMobileCPUResult()
+-(void)toSetOutput:(float *)output;
+-(void)toSetOutputSize:(int)outputSize;
+@end
+@implementation PaddleMobileCPUResult
+-(void)releaseOutput {
+  delete [] _output;
+  _output = nil;
+  _outputSize = 0;
+}
+-(void)toSetOutput:(float *)output {
+  _output = output;
+}
+-(void)toSetOutputSize:(int)outputSize {
+  _outputSize = outputSize;
+}
+@end
+@interface  PaddleMobileCPU()
 {
  paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_;
  BOOL loaded_;
-  std::vector<float> *predict_input_;
 }
 @end
-@implementation PaddleMobile
+@implementation PaddleMobileCPU
 static std::mutex shared_mutex;
@@ -66,6 +93,14 @@ static std::mutex shared_mutex;
  }
 }
+- (BOOL)LoadCombinedMemory:(size_t)modelLen
+               andModelBuf:(const uint8_t *)modelBuf
+         andModelParamsLen:(size_t)combinedParamsLen
+      andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf {
+  pam_->SetThreadNum(2);
+  return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf);
+}
 - (BOOL)load:(NSString *)modelAndWeightPath{
  std::string model_path_str = std::string([modelAndWeightPath UTF8String]);
  if (loaded_ = pam_->Load(model_path_str)) {
@@ -75,6 +110,57 @@ static std::mutex shared_mutex;
  }
 }
+-(void)preprocess:(CGImageRef)image
+           output:(float *)output
+            means:(NSArray<NSNumber *> *)means
+        scale:(float)scale
+        dim:(NSArray<NSNumber *> *)dim {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+  // dim to c++ vector, get numel
+  std::vector<int64_t > dim_vec;
+  int numel = 1;
+  for (int k = 0; k < dim.count; ++k) {
+    int d = dim[k].intValue;
+    numel *= d;
+    dim_vec.push_back(d);
+  }
+  const int sourceRowBytes = CGImageGetBytesPerRow(image);
+  const int imageWidth = CGImageGetWidth(image);
+  const int imageHeight = CGImageGetHeight(image);
+  const int imageChannels = 4;
+  CGDataProviderRef provider = CGImageGetDataProvider(image);
+  CFDataRef cfData = CGDataProviderCopyData(provider);
+  const UInt8 *input = CFDataGetBytePtr(cfData);
+  int wanted_input_width = dim_vec[3];
+  int wanted_input_height = dim_vec[2];
+  int wanted_input_channels = dim_vec[1];
+  for (int c = 0; c < wanted_input_channels; ++c) {
+    float *out_channel = output + c * wanted_input_height * wanted_input_width;
+    for (int y = 0; y < wanted_input_height; ++y) {
+      float *out_row = out_channel + y * wanted_input_width;
+      for (int x = 0; x < wanted_input_width; ++x) {
+        int in_row = (y * imageHeight) / wanted_input_height;
+        int in_col = (x * imageWidth) / wanted_input_width;
+        const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels);
+        float *out_pos = out_row + x;
+        if (c == 0) {
+          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
+        }else if (c == 1){
+          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
+        }else if (c == 2){
+          *out_pos = (in_pixel[c] - means[c].floatValue) * scale;
+        }
+      }
+    }
+  }
+}
 -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray<NSNumber *> *)means scale:(float)scale dim:(std::vector<int64_t>)dim{
  if (means == nil) {
    means = @[@0, @0, @0];
@@ -105,27 +191,54 @@ static std::mutex shared_mutex;
  }
 }
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{
+- (PaddleMobileCPUResult *)predictInput:(float *)input
-//  printf(" hi i am here");
+                      dim:(NSArray<NSNumber *> *)dim {
-  if (predict_input_) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
-//    printf(" fukc -- ");
+  if (!loaded_) {
-//    printf(" %d \n", predict_input_->size());
+    printf("PaddleMobile doesn't be loaded yet");
-    // dim to c++ vector, get numel
+    return nil;
-    std::vector<int64_t > dim_vec = {1, 3, 300, 300};
+  }
-//    int numel = 1;
-//    for (int k = 0; k < dim.count; ++k) {
+  if (dim.count != 4) {
-//      int d = dim[k].intValue;
+    printf("dim must have 4 elements");
-//      numel *= d;
-//      dim_vec.push_back(d);
-//    }
-    std::vector<float> cpp_result = pam_->Predict(*predict_input_, dim_vec);
    return nil;
  }
-//  printf(" predict one ");
-//  std::lock_guard<std::mutex> lock(shared_mutex);
+  // dim to c++ vector, get numel
+  std::vector<int64_t > dim_vec;
+  int numel = 1;
+  for (int k = 0; k < dim.count; ++k) {
+    int d = dim[k].intValue;
+    numel *= d;
+    dim_vec.push_back(d);
+  }
+  paddle_mobile::framework::Tensor input_tensor;
+  paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec);
+  float *input_ptr = input_tensor.mutable_data<float>(dims);
+  memcpy(input_ptr, input,
+         numel * sizeof(float));
+  std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Predict(input_tensor);
+  float *output_pointer = new float[output->numel()];
+  memcpy(output_pointer, output->data<float>(),
+         output->numel() * sizeof(float));
+  PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init];
+  [cpuResult toSetOutput: output_pointer];
+  [cpuResult toSetOutputSize: output->numel()];
+  return cpuResult;
+}
+- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{
+//  printf(" predict one ");
+  std::lock_guard<std::mutex> lock(shared_mutex);
  if (!loaded_) {
    printf("PaddleMobile doesn't be loaded yet");
    return nil;
@@ -164,15 +277,13 @@ static std::mutex shared_mutex;
  }
  // input
-  std::vector<float> *predict_input = new std::vector<float>();
+  std::vector<float> predict_input;
  for (int j = 0; j < numel; ++j) {
-    predict_input->push_back(dataPointer[j]);
+    predict_input.push_back(dataPointer[j]);
  }
-  predict_input_ = predict_input;
  // predict
-  std::vector<float> cpp_result = pam_->Predict(*predict_input, dim_vec);
+  std::vector<float> cpp_result = pam_->Predict(predict_input, dim_vec);
  // result
  long count = 0;

--- a/src/ios_io/op_symbols.h
+++ b/src/ios_io/op_symbols.h
@@ -14,6 +14,13 @@
 #pragma once
+#include "operators/prelu_op.h"
+#include "operators/fusion_conv_add_prelu_op.h"
+#include "operators/fusion_conv_add_add_prelu_op.h"
+#include "operators/bilinear_interp_op.h"
+#include "operators/conv_transpose_op.h"
+#include "operators/crf_op.h"
+#include "operators/flatten_op.h"
 #include "operators/batchnorm_op.h"
 #include "operators/box_coder_op.h"
 #include "operators/concat_op.h"
@@ -24,11 +31,18 @@
 #include "operators/feed_op.h"
 #include "operators/fetch_op.h"
 #include "operators/fusion_conv_add.h"
+#include "operators/fusion_conv_add_bn_op.h"
+#include "operators/fusion_conv_add_relu_op.h"
+#include "operators/fusion_conv_bn_add_relu_op.h"
 #include "operators/fusion_conv_add_bn_relu_op.h"
 #include "operators/fusion_conv_bn_relu_op.h"
 #include "operators/fusion_dwconv_bn_relu_op.h"
+#include "operators/fusion_elementwise_add_relu_op.h"
 #include "operators/fusion_fc_op.h"
+#include "operators/fusion_fc_relu_op.h"
+#include "operators/gru_op.h"
 #include "operators/im2sequence_op.h"
+#include "operators/lookup_op.h"
 #include "operators/lrn_op.h"
 #include "operators/mul_op.h"
 #include "operators/multiclass_nms_op.h"
@@ -36,6 +50,11 @@
 #include "operators/prior_box_op.h"
 #include "operators/relu_op.h"
 #include "operators/reshape_op.h"
+#include "operators/resize_op.h"
+#include "operators/scale_op.h"
+#include "operators/shape_op.h"
 #include "operators/sigmoid_op.h"
+#include "operators/slice_op.h"
 #include "operators/softmax_op.h"
+#include "operators/split_op.h"
 #include "operators/transpose_op.h"
--- a/src/operators/prelu_op.cpp
+++ b/src/operators/prelu_op.cpp
@@ -34,11 +34,9 @@ void PReluOp<Dtype, T>::InferShape() const {
 * */
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
-USE_OP_CPU(prelu);
 REGISTER_OPERATOR_CPU(prelu, ops::PReluOp);
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
-USE_OP_MALI_GPU(prelu);
 REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA

--- a/src/operators/prelu_op.h
+++ b/src/operators/prelu_op.h
@@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(prelu);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(prelu);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(prelu);
+#endif
 #endif