Merge branch 'dev-latest' of https://github.com/hjchen2/paddle-mobile into dev-latest

1fbe4e79 · hjchen2 · 49e9c2ca · f12252b9 · 1fbe4e79 · 1fbe4e79
12 changed file
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "io/executor.h"
-#include <operators/math/gemm.h>
 #include <algorithm>
+#include <utility>
 #include <vector>
 #include "common/enforce.h"
 #include "common/log.h"
@@ -26,7 +26,7 @@ limitations under the License. */
 #include "framework/program/var_desc.h"
 #include "framework/scope.h"
 #include "framework/tensor.h"
-
+#include "operators/math/gemm.h"

 namespace paddle_mobile {

@@ -34,9 +34,8 @@ using framework::Variable;

 template <typename Dtype, Precision P>
 Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
-                             const bool use_optimize,
-			     const bool loddable)
-      : program_(p), use_optimize_(use_optimize), loddable_(loddable) {
+                             const bool use_optimize, const bool loddable)
+    : program_(p), use_optimize_(use_optimize), loddable_(loddable) {
  Variable *variable_ptr = program_.scope->Var("batch_size");
  variable_ptr->SetValue<int>(1);
  to_predict_program_ =
@@ -77,20 +76,20 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
  }
 }

-template<typename Dtype>
+template <typename Dtype>
 void LoadMemInternal(void **data, framework::LoDTensor *tensor) {
  char **data_buf = reinterpret_cast<char **>(data);
  int64_t size = tensor->numel();
-  Dtype* tensor_data = tensor->mutable_data<Dtype>();
+  Dtype *tensor_data = tensor->mutable_data<Dtype>();
  if (0) {
-    // TODO should be moved into operator init function
+    // TODO(hjchen2) should be moved into operator init function
    float min_value;
    float max_value;
    memcpy(&min_value, data_buf, sizeof(float));
    memcpy(&max_value, data_buf + sizeof(float), sizeof(float));
    data_buf += 2 * sizeof(float);
    const float factor = (max_value - min_value) / 255.0;
-    const uint8_t *uint8_data = reinterpret_cast<uint8_t*>(data_buf);
+    const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(data_buf);
    for (int k = 0; k < size; ++k) {
      tensor_data[k] = uint8_data[k] * factor + min_value;
    }
@@ -103,21 +102,20 @@ void LoadMemInternal(void **data, framework::LoDTensor *tensor) {

 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::LoadMemory(
-                          void **data,
-                          const std::shared_ptr<framework::VarDesc> var_desc,
-                          framework::LoDTensor *tensor) {
-  char **data_buf = reinterpret_cast<char**>(data);
+    void **data, const std::shared_ptr<framework::VarDesc> var_desc,
+    framework::LoDTensor *tensor) {
+  char **data_buf = reinterpret_cast<char **>(data);
  // version
-  uint32_t version = *(reinterpret_cast<uint32_t*>(*data_buf));
+  uint32_t version = *(reinterpret_cast<uint32_t *>(*data_buf));
  *data_buf += sizeof(uint32_t);
  // lod information
-  uint64_t lod_level = *(reinterpret_cast<uint64_t*>(*data_buf));
+  uint64_t lod_level = *(reinterpret_cast<uint64_t *>(*data_buf));
  *data_buf += sizeof(uint64_t);

  auto *lod = tensor->mutable_lod();
  lod->resize(lod_level);
  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size = *(reinterpret_cast<uint64_t*>(*data_buf));
+    uint64_t size = *(reinterpret_cast<uint64_t *>(*data_buf));
    *data_buf += sizeof(uint64_t);
    std::vector<size_t> tmp_dim(size / sizeof(size_t));
    memcpy(tmp_dim.data(), *data_buf, size);
@@ -125,10 +123,10 @@ void Executor<Dtype, P>::LoadMemory(
    *data_buf += size;
  }
  // tensor version
-  uint32_t tensor_version = *(reinterpret_cast<uint32_t*>(*data_buf));
+  uint32_t tensor_version = *(reinterpret_cast<uint32_t *>(*data_buf));
  *data_buf += sizeof(uint32_t);
  // tensor desc size
-  int32_t tensor_desc_size = *(reinterpret_cast<int32_t*>(*data_buf));
+  int32_t tensor_desc_size = *(reinterpret_cast<int32_t *>(*data_buf));
  *data_buf += sizeof(int32_t);
  // skip tensor desc
  *data_buf += tensor_desc_size;
@@ -138,13 +136,13 @@ void Executor<Dtype, P>::LoadMemory(
  // parse tensor from stream
  switch (tensor_desc.DataType()) {
    case framework::VARTYPE_TYPE_FP32:
-      LoadMemInternal<float>((void**)data_buf, tensor);
+      LoadMemInternal<float>(reinterpret_cast<void **>(data_buf), tensor);
      break;
    case framework::VARTYPE_TYPE_INT8:
-      LoadMemInternal<int8_t>((void**)data_buf, tensor);
+      LoadMemInternal<int8_t>(reinterpret_cast<void **>(data_buf), tensor);
      break;
    case framework::VARTYPE_TYPE_INT32:
-      LoadMemInternal<int>((void**)data_buf, tensor);
+      LoadMemInternal<int>(reinterpret_cast<void **>(data_buf), tensor);
      break;
    default:
      LOG(kLOG_ERROR) << "data type is not supported";
@@ -164,8 +162,8 @@ void Executor<Dtype, P>::InitMemory() {
        char *origin_data =
            ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
        char *data = origin_data;
-        LoadMemory((void**)&data, var_desc, tensor);
-        delete [] origin_data;
+        LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
+        delete[] origin_data;
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
          varInputMemory(var_desc, var, tensor);
@@ -180,7 +178,8 @@ void Executor<Dtype, P>::InitCombineMemory() {
  char *origin_data = nullptr;
  bool self_alloc = false;
  if (program_.combined_params_buf && program_.combined_params_len) {
-    origin_data = (char *)program_.combined_params_buf;
+    origin_data = reinterpret_cast<char *>(
+        const_cast<uint8_t *>(program_.combined_params_buf));
  } else {
    self_alloc = true;
    origin_data = ReadFileToBuff(program_.para_path);
@@ -195,7 +194,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
          continue;
        }
-        LoadMemory((void**)&data, var_desc, tensor);
+        LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
          varInputMemory(var_desc, var, tensor);
@@ -204,7 +203,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
    }
  }
  if (self_alloc) {
-    delete [] origin_data;
+    delete[] origin_data;
  }
  LOG(kLOG_INFO) << "init combine memory finish";
 }
@@ -231,9 +230,9 @@ bool Executor<Dtype, P>::varInputMemory(
      break;
  }
  bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) ||
-	               (type == framework::VARTYPE_TYPE_INT8) ||
-		       (type == framework::VARTYPE_TYPE_INT32) ||
-		       (type == framework::VARTYPE_TYPE_INT64);
+                       (type == framework::VARTYPE_TYPE_INT8) ||
+                       (type == framework::VARTYPE_TYPE_INT32) ||
+                       (type == framework::VARTYPE_TYPE_INT64);
  PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type);
  return is_mute_match;
 }
@@ -402,12 +401,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
      g_feed_value->GetMutable<framework::LoDTensor>();
  feed_tensor->Resize(t.dims());
  feed_tensor->ShareDataWith(t);
-};
+}

 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
  InjectVariable(t, "feed");
-};
+}

 template <typename Dtype, Precision P>
 std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
@@ -423,14 +422,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
  auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
      out_keys[0], output_map, *(program_.scope));
  return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
-};
+}

 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::Predict_From_To(int start, int end) {
  std::shared_ptr<framework::BlockDesc> to_predict_block =
      to_predict_program_->Block(0);
  auto &ops = ops_of_block_[*to_predict_block.get()];
-  end = end < 0 ? (int)ops.size() : end;
+  end = end < 0 ? static_cast<int>(ops.size()) : end;
  PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
                        "start or end parameter is wrong");

@@ -451,17 +450,17 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
    profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
  }
-};
+}

 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::Predict_From(int start) {
  Predict_From_To(start);
-};
+}

 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::Predict_To(int end) {
  Predict_From_To(0, end);
-};
+}
 #endif

 template class Executor<CPU, Precision::FP32>;

--- a/src/io/executor.h
+++ b/src/io/executor.h
@@ -14,16 +14,16 @@ limitations under the License. */

 #pragma once

+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
 #include "common/types.h"
 #include "common/util.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
 #include "framework/program/program.h"
 #include "framework/tensor.h"
-#include <memory>
-#include <string>
-#include <vector>
-#include <map>

 namespace paddle_mobile {

@@ -36,8 +36,7 @@ class Executor {
  // @param use_optimize bool whether use operator fusion to speed up or not
  // @param loddable bool
  Executor(const framework::Program<Dtype> program,
-           const bool use_optimize = true,
-	   const bool loddable = false);
+           const bool use_optimize = true, const bool loddable = false);

  // predict with tensor input
  // @param t input tensor to do prediction
@@ -68,8 +67,8 @@ class Executor {
                      framework::LoDTensor *tensor) const;
  void InitMemory();
  void InitCombineMemory();
-  void LoadMemory(void** data,
-		  const std::shared_ptr<framework::VarDesc> var_desc,
+  void LoadMemory(void **data,
+                  const std::shared_ptr<framework::VarDesc> var_desc,
                  framework::LoDTensor *tensor);

  framework::Program<Dtype> program_;

--- a/src/operators/dequantize_op.cpp
+++ b/src/operators/dequantize_op.cpp
@@ -30,4 +30,3 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(dequantize, ops::DequantizeOp);
 #endif
-
--- a/src/operators/feed_op.cpp
+++ b/src/operators/feed_op.cpp
@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "feed_op.h"
-
+#include "operators/feed_op.h"

 namespace ops = paddle_mobile::operators;

@@ -26,4 +25,3 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
 #endif
-
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -44,7 +44,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
  }

  void RunImpl() const {
-    auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
+    auto input = reinterpret_cast<Tensor *>(param_.InputX());
    fpga::format_image(input);
    auto input_ptr = input->data<float>();
    Tensor *output = param_.Out();
@@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
    fpga::BypassArgs args;
    args.convert_type = fpga::DATA_FP32_TO_FP16;
    args.layout_type = fpga::LAYOUT_NO_CONVERT;
-    args.image.address = (void *)input_ptr;
+    args.image.address = input_ptr;
    args.image.channels = input->dims()[1];
    args.image.height = input->dims()[2];
    args.image.width = input->dims()[3];
@@ -78,4 +78,3 @@ class FeedOp : public framework::OperatorBase<DeviceType> {

 }  // namespace operators
 }  // namespace paddle_mobile
-
--- a/src/operators/fetch_op.cpp
+++ b/src/operators/fetch_op.cpp
@@ -12,10 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "fetch_op.h"
-namespace paddle_mobile {
-namespace operators {}
-}  // namespace paddle_mobile
+#include "operators/fetch_op.h"

 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
@@ -27,4 +24,3 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
 #endif
-
--- a/src/operators/fetch_op.h
+++ b/src/operators/fetch_op.h
@@ -46,4 +46,3 @@ class FetchOp : public framework::OperatorBase<DeviceType> {

 }  // namespace operators
 }  // namespace paddle_mobile
-
--- a/src/operators/kernel/arm/dequantize_kernel.cpp
+++ b/src/operators/kernel/arm/dequantize_kernel.cpp
@@ -23,16 +23,16 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {

-template<>
+template <>
 bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) {
  return true;
 }

-template<>
+template <>
 void DequantizeKernel<CPU, float>::Compute(
    const DequantizeParam<CPU> &param) const {
  const Tensor *input = param.input_;
-  Tensor *output = param.out_; 
+  Tensor *output = param.out_;
  float activation_scale = param.activation_scale_->data<float>()[0];
  float weight_scale = param.weight_scale_;
  const int32_t *x = input->data<const int32_t>();
@@ -70,7 +70,7 @@ void DequantizeKernel<CPU, float>::Compute(
  }
 }

-}  // namespace paddle_mobile
 }  // namespace operators
+}  // namespace paddle_mobile

 #endif
--- a/src/operators/kernel/arm/quantize_kernel.cpp
+++ b/src/operators/kernel/arm/quantize_kernel.cpp
@@ -28,14 +28,12 @@ float32_t vmaxvq_f32(float32x4_t r) {
 }
 #endif

-int32x4_t vrnd_towards_zero(float32x4_t r) {
-  return vcvtq_s32_f32(r);
-}
+int32x4_t vrnd_towards_zero(float32x4_t r) { return vcvtq_s32_f32(r); }

 int32x4_t vrnd_away_zero(float32x4_t r) {
-  float32x4_t plus  = vdupq_n_f32(0.5);
+  float32x4_t plus = vdupq_n_f32(0.5);
  float32x4_t minus = vdupq_n_f32(-0.5);
-  float32x4_t zero  = vdupq_n_f32(0);
+  float32x4_t zero = vdupq_n_f32(0);
  uint32x4_t more_than_zero = vcgtq_f32(r, zero);
  float32x4_t temp = vbslq_f32(more_than_zero, plus, minus);
  temp = vaddq_f32(r, temp);
@@ -62,7 +60,7 @@ int32x4_t vrnd_to_even(float32x4_t r) {
    }
  }
  return ret;
-#else 
+#else
  float32x4_t point5 = vdupq_n_f32(0.5);
  int32x4_t one = vdupq_n_s32(1);
  int32x4_t zero = vdupq_n_s32(0);
@@ -83,9 +81,9 @@ int32x4_t vrnd_to_even(float32x4_t r) {
  mask = vaddq_u32(more_than_zero, mask);
  int32x4_t smask = vreinterpretq_s32_u32(mask);
  smask = vsubq_s32(smask, one);
-  rnd = vaddq_s32(rnd, smask); 
+  rnd = vaddq_s32(rnd, smask);
  return rnd;
- #endif
+#endif
 }
 #endif

@@ -93,7 +91,7 @@ namespace paddle_mobile {
 namespace operators {

 static float find_abs_max(const Tensor *input) {
-  float max_abs = float(0);
+  float max_abs = 0.f;
  const float *x = input->data<const float>();
  size_t size = input->numel();
 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
@@ -130,8 +128,7 @@ static float find_abs_max(const Tensor *input) {
  return max_abs;
 }

-static void quantize_round_to_even(const Tensor *input,
-                                   const float scale,
+static void quantize_round_to_even(const Tensor *input, const float scale,
                                   Tensor *output) {
  const float *x = input->data<const float>();
  int8_t *y = output->data<int8_t>();
@@ -183,9 +180,8 @@ static void quantize_round_to_even(const Tensor *input,
  }
 }

-static void quantize_round_to_zero(const Tensor *input,
-                            const float scale,
-                            Tensor *output) {
+static void quantize_round_to_zero(const Tensor *input, const float scale,
+                                   Tensor *output) {
  const float *x = input->data<const float>();
  int8_t *y = output->data<int8_t>();
  size_t size = input->numel();
@@ -225,9 +221,8 @@ static void quantize_round_to_zero(const Tensor *input,
  }
 }

-static void quantize_round_to_nearest(const Tensor *input,
-                               const float scale,
-                               Tensor *output) {
+static void quantize_round_to_nearest(const Tensor *input, const float scale,
+                                      Tensor *output) {
  const float *x = input->data<const float>();
  int8_t *y = output->data<int8_t>();
  size_t size = input->numel();
@@ -267,15 +262,14 @@ static void quantize_round_to_nearest(const Tensor *input,
  }
 }

-template<>
+template <>
 bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
  return true;
 }

-template<>
+template <>
 void QuantizeKernel<CPU, float>::Compute(
    const QuantizeParam<CPU> &param) const {
-  // TODO
  float max_abs = 0.f;
  const Tensor *input = param.input_;
  Tensor *output = param.out_;
@@ -306,7 +300,7 @@ void QuantizeKernel<CPU, float>::Compute(
  }
 }

-}  // namespace paddle_mobile
 }  // namespace operators
+}  // namespace paddle_mobile

 #endif
--- a/src/operators/quantize_op.cpp
+++ b/src/operators/quantize_op.cpp
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "operators/quantize_op.h"
+#include <vector>

 namespace paddle_mobile {
 namespace operators {
@@ -32,4 +33,3 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(quantize, ops::QuantizeOp);
 #endif
-
--- a/src/protobuf-c/protobuf-c.c
+++ b/src/protobuf-c/protobuf-c.c
--- a/tools/pre-commit.hooks/cpplint.hook
+++ b/tools/pre-commit.hooks/cpplint.hook
@@ -3,7 +3,7 @@
 TOTAL_ERRORS=0

 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
-for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h"); do
+for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v "protobuf-c.*"); do
    cpplint $file;
    TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
 done