Merge branch 'develop' into ocr_attention

932c29fe · Houjiang Chen · GitHub · fca53805 · 358f8f36 · 932c29fe
12 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -37,6 +37,7 @@ const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
 const char *G_OP_TYPE_LRN = "lrn";
 const char *G_OP_TYPE_MUL = "mul";
 const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
+const char *G_OP_TYPE_NORM = "norm";
 const char *G_OP_TYPE_POLYGON_BOX_TRANSFORM = "polygon_box_transform";
 const char *G_OP_TYPE_POOL2D = "pool2d";
 const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
@@ -169,5 +170,6 @@ std::unordered_map<
        {G_OP_TYPE_FUSION_DECONV_ADD_RELU, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_SEQUENCE_EXPAND, {{"X", "Y"}, {"Out"}}},
        {G_OP_TYPE_SEQUENCE_POOL, {{"X"}, {"Out"}}},
-        {G_OP_TYPE_SEQUENCE_SOFTMAX, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_SEQUENCE_SOFTMAX, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_NORM, {{"X"}, {"Out", "Norm"}}}};
 }  // namespace paddle_mobile
--- a/src/fpga/V1/api.cpp
+++ b/src/fpga/V1/api.cpp
@@ -332,8 +332,8 @@ void expand_conv_arg(ConvArgs *arg) {
  auto image_win_cnt = block_len;
  auto image_win_cnt_last = block_last;
  auto res_row_data_align4_pad = res_amount_per_row_pad / 8;
-  auto prog_full_cnt = 2048 / (filter_amount_all / 16 * 2) - 1;
+  auto prog_full_cnt = 1024 / (filter_amount_all / 16 * 2) - 1;
-  if (prog_full_cnt == 1023) {
+  if (prog_full_cnt == 511) {
    prog_full_cnt--;
  }
  auto post_prog_full_cnt =

--- a/src/fpga/common/fpga_common.cpp
+++ b/src/fpga/common/fpga_common.cpp
@@ -22,26 +22,97 @@ namespace paddle_mobile {
 namespace fpga {
 int16_t fp32_2_fp16(float fp32_num) {
-  unsigned long tmp = *(unsigned long *)(&fp32_num);  // NOLINT
+  int32_t tmp = *(reinterpret_cast<int32_t *>(&fp32_num));
-  auto t = (int16_t)(((tmp & 0x007fffff) >> 13) | ((tmp & 0x80000000) >> 16) |
+  int16_t se_fp32 = (tmp >> 23) & 0x1ff;
-                     (((tmp & 0x7f800000) >> 13) - (112 << 10)));
+  int32_t m_fp32 = tmp & 0x007fffff;
-  if (tmp & 0x1000) {
+  int16_t se_fp16 = 0;
-    t++;  // roundoff
+  int16_t m_fp16 = 0;
+  if (se_fp32 < 103) {
+    se_fp16 = 0x0000;
+    m_fp16 = m_fp32 >> 24;
+  } else if (se_fp32 < 113) {
+    se_fp16 = (0x0400 >> (113 - se_fp32));
+    m_fp16 = m_fp32 >> (126 - se_fp32);
+  } else if (se_fp32 <= 142) {
+    se_fp16 = (se_fp32 - 112) << 10;
+    m_fp16 = m_fp32 >> 13;
+  } else if (se_fp32 < 255) {
+    se_fp16 = 0x7C00;
+    m_fp16 = m_fp32 >> 24;
+  } else if (se_fp32 == 255) {
+    se_fp16 = 0x7C00;
+    m_fp16 = m_fp32 >> 13;
+  } else if (se_fp32 < 359) {
+    se_fp16 = 0x8000;
+    m_fp16 = m_fp32 >> 24;
+  } else if (se_fp32 < 369) {
+    se_fp16 = (0x0400 >> (369 - se_fp32)) | 0x8000;
+    m_fp16 = m_fp32 >> (382 - se_fp32);
+  } else if (se_fp32 <= 398) {
+    se_fp16 = ((se_fp32 - 368) << 10) | 0x8000;
+    m_fp16 = m_fp32 >> 13;
+  } else if (se_fp32 < 511) {
+    se_fp16 = 0x7C00;
+    m_fp16 = m_fp32 >> 24;
+  } else {
+    se_fp16 = 0x7C00;
+    m_fp16 = m_fp32 >> 13;
+  }
+  int16_t result = se_fp16 + m_fp16;
+  return result;
+}
+int32_t convertmantissa(int32_t i) {
+  int32_t m = i << 13;
+  int32_t e = 0;
+  while (!(m & 0x00800000)) {
+    e -= 0x00800000;
+    m <<= 1;
  }
-  return t;
+  m &= ~0x00800000;
+  e += 0x38800000;
+  return m | e;
 }
 float fp16_2_fp32(int16_t fp16_num) {
-  if (0 == fp16_num) {
+  int16_t se_fp16 = fp16_num >> 10;
-    return 0;
+  int16_t m_fp16 = fp16_num & 0x3ff;
+  int32_t e_fp32 = 0;
+  int16_t offset = 0;
+  int32_t m_fp32 = 0;
+  if (se_fp16 == 0) {
+    e_fp32 = 0;
+    offset = 0;
+  } else if (se_fp16 < 31) {
+    e_fp32 = se_fp16 << 23;
+    offset = 1024;
+  } else if (se_fp16 == 31) {
+    e_fp32 = 0x47800000;
+    offset = 1024;
+  } else if (se_fp16 == 32) {
+    e_fp32 = 0x80000000;
+    offset = 0;
+  } else if (se_fp16 < 63) {
+    e_fp32 = 0x80000000 + (se_fp16 - 32) << 23;
+    offset = 1024;
+  } else {  // se_fp16 == 63
+    e_fp32 = 0xC7800000;
+    offset = 1024;
+  }
+  int16_t a = offset + m_fp16;
+  if (a == 0) {
+    m_fp32 = 0;
+  } else if (a < 1024) {
+    int32_t tmp = a;
+    m_fp32 = convertmantissa(tmp);
+  } else {
+    int32_t tmp = a - 1024;
+    m_fp32 = 0x38000000 + (tmp << 13);
  }
-  int frac = (fp16_num & 0x3ff);
-  int exp = ((fp16_num & 0x7c00) >> 10) + 112;
+  int32_t tmp = e_fp32 + m_fp32;
-  int s = fp16_num & 0x8000;
+  float fp32_num = *(reinterpret_cast<float *>(&tmp));
-  int tmp = 0;
-  float fp32_num;
-  tmp = s << 16 | exp << 23 | frac << 13;
-  fp32_num = *(float *)&tmp;  // NOLINT
  return fp32_num;
 }
@@ -126,6 +197,5 @@ uint64_t vaddr_to_paddr(void *address) {
  return 0;
 #endif
 }
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/common/fpga_common.h
+++ b/src/fpga/common/fpga_common.h
@@ -256,6 +256,6 @@ int fpga_invalidate(void* address, size_t size);
 uint64_t vaddr_to_paddr(void* address);
 void expand_conv_arg(ConvArgs* arg);
 void expand_EW_arg(EWAddArgs* arg);
+inline int32_t convertmantissa(int32_t i);
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/operators/kernel/arm/norm_kernel.cpp
+++ b/src/operators/kernel/arm/norm_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef NORM_OP
+#include "operators/kernel/norm_kernel.h"
+#include "operators/kernel/central-arm-func/norm_arm_func.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool NormKernel<CPU, float>::Init(NormParam<CPU> *param) {
+  return true;
+}
+template <>
+void NormKernel<CPU, float>::Compute(const NormParam<CPU> &param) {
+  NormCompute<float>(param);
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/central-arm-func/norm_arm_func.h
+++ b/src/operators/kernel/central-arm-func/norm_arm_func.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef NORM_OP
+#pragma once
+#include <cmath>
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+inline void GetDims(const framework::DDim &dim, int axis, int *pre, int *n,
+                    int *post) {
+  *pre = 1;
+  *post = 1;
+  *n = dim[axis];
+  for (int i = 0; i < axis; ++i) {
+    (*pre) *= dim[i];
+  }
+  for (int i = axis + 1; i < dim.size(); ++i) {
+    (*post) *= dim[i];
+  }
+}
+template <typename P>
+void NormCompute(const NormParam<CPU> &param) {
+  const float epsilon = param.Epsilon();
+  int axis = param.Axis();
+  const framework::Tensor *input = param.InputX();
+  framework::Tensor *norm = param.OutputNorm();
+  framework::Tensor *out = param.Out();
+  auto x_dims = input->dims();
+  if (axis < 0) {
+    axis += x_dims.size();
+  }
+  int pre, n, post;
+  GetDims(x_dims, axis, &pre, &n, &post);
+  const float *input_ptr = input->data<float>();
+  float *norm_ptr = norm->mutable_data<float>();
+  float *out_ptr = out->mutable_data<float>();
+  for (int p = 0; p < pre; ++p) {
+    const float *in_tmp = input_ptr + p * n * post;
+    float *norm_tmp = norm_ptr + p * post;
+    // in_ch = 0; norm = epsilon + x * x
+    for (int i = 0; i < post; ++i) {
+      *norm_tmp = epsilon;
+      *norm_tmp += (*in_tmp) * (*in_tmp);
+      norm_tmp++;
+      in_tmp++;
+    }
+    // in_ch >= 1; norm += x * x
+    for (int c = 1; c < n; ++c) {
+      norm_tmp = norm_ptr + p * post;
+      for (int i = 0; i < post; ++i) {
+        *norm_tmp += (*in_tmp) * (*in_tmp);
+        norm_tmp++;
+        in_tmp++;
+      }
+    }
+    // norm = sqart(norm)
+    norm_tmp = norm_ptr + p * post;
+    for (int i = 0; i < post; ++i) {
+      *norm_tmp = sqrtf(*norm_tmp);
+      norm_tmp++;
+    }
+    // out = input / norm
+    in_tmp = input_ptr + p * n * post;
+    float *out_tmp = out_ptr + p * n * post;
+    for (int c = 0; c < n; ++c) {
+      norm_tmp = norm_ptr + p * post;
+      for (int j = 0; j < post; ++j) {
+        *out_tmp = *in_tmp / *norm_tmp;
+        in_tmp++;
+        norm_tmp++;
+        out_tmp++;
+      }
+    }
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/norm_kernel.h
+++ b/src/operators/kernel/norm_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef NORM_OP
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class NormKernel
+    : public framework::OpKernelBase<DeviceType, NormParam<DeviceType>> {
+ public:
+  void Compute(const NormParam<DeviceType> &param);
+  bool Init(NormParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/norm_op.cpp
+++ b/src/operators/norm_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef NORM_OP
+#include "operators/norm_op.h"
+#include "framework/op_proto_maker.h"
+#include "framework/op_registry.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void NormOp<Dtype, T>::InferShape() const {
+  auto x_dims = this->param_.InputX()->dims();
+  this->param_.Out()->Resize(x_dims);
+  int axis = this->param_.Axis();
+  if (axis < 0) {
+    axis += x_dims.size();
+  }
+  x_dims[axis] = 1;
+  this->param_.OutputNorm()->Resize(x_dims);
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(norm, ops::NormOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+#ifdef PADDLE_MOBILE_CL
+#endif
+#endif
--- a/src/operators/norm_op.h
+++ b/src/operators/norm_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef NORM_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/norm_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+template <typename DeviceType, typename T>
+class NormOp
+    : public framework::OperatorWithKernel<DeviceType, NormParam<DeviceType>,
+                                           NormKernel<DeviceType, T>> {
+ public:
+  NormOp(const string &type, const VariableNameMap &inputs,
+         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+         std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, NormParam<DeviceType>,
+                                      NormKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  void InferShape() const override;
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -280,6 +280,11 @@ class OpParam {
    return GetVarValue<T>("OutputBox", outputs, scope);
  }
+  template <typename T>
+  static T *OutputNormFrom(const VariableNameMap &outputs, const Scope &scope) {
+    return GetVarValue<T>("Norm", outputs, scope);
+  }
  template <typename T>
  static T *OutputVariancesFrom(const VariableNameMap &outputs,
                                const Scope &scope) {
@@ -733,6 +738,41 @@ class LrnParam : public OpParam {
 };
 #endif
+#ifdef NORM_OP
+template <typename Dtype>
+class NormParam : OpParam {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  NormParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+            const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<GType>(inputs, scope);
+    out_ = OutFrom<GType>(outputs, scope);
+    output_norm_ = OutputNormFrom<GType>(outputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    axis_ = GetAttr<int>("axis", attrs);
+  }
+  const RType *InputX() const { return input_x_; }
+  RType *Out() const { return out_; }
+  RType *OutputNorm() const { return output_norm_; }
+  const float &Epsilon() const { return epsilon_; }
+  const int &Axis() const { return axis_; }
+ private:
+  RType *input_x_;
+  RType *out_;
+  RType *output_norm_;
+  float epsilon_;
+  int axis_;
+};
+#endif
 #ifdef BATCHNORM_OP
 template <typename Dtype>
 class BatchNormParam : OpParam {

--- a/tools/ios-cmake/ios.toolchain.cmake
+++ b/tools/ios-cmake/ios.toolchain.cmake
@@ -146,6 +146,7 @@ if (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT)
 endif (NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT)
 set (CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform")
+set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
 # Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT
 if (NOT DEFINED CMAKE_IOS_SDK_ROOT)
  file (GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*")

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -215,6 +215,7 @@ endif()
 if(NOT FOUND_MATCH)
  message("--default--")
+  set(NORM_OP ON)
  set(BATCHNORM_OP ON)
  set(CONV_TRANSPOSE_OP ON)
  set(BOXCODER_OP ON)
@@ -302,6 +303,9 @@ endif()
  # option(TRANSPOSE2_OP "" ON)
 # endif ()
+if (NORM_OP)
+  add_definitions(-DNORM_OP)
+endif()
 if (BATCHNORM_OP)
  add_definitions(-DBATCHNORM_OP)
 endif()