Merge branch 'develop' into develop

ba376dec · zhangyang0701 · GitHub · bb870d00 · ff16962d · ba376dec
9 changed file
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -20,7 +20,9 @@ limitations under the License. */
 #include <vector>

 namespace paddle_mobile {
-enum class Precision : int { FP32 = 0 };
+enum class Precision : int { FP32 = 0, FP16 = 1 };
+
+typedef int16_t half;

 template <Precision p>
 struct PrecisionTrait {
@@ -31,6 +33,10 @@ template <>
 struct PrecisionTrait<Precision::FP32> {
  typedef float ptype;
 };
+template <>
+struct PrecisionTrait<Precision::FP16> {
+  typedef half ptype;
+};

 //! device type
 enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };

--- a/src/framework/program/program-optimize/fusion_op_register.h
+++ b/src/framework/program/program-optimize/fusion_op_register.h
@@ -14,11 +14,13 @@ limitations under the License. */

 #pragma once

+#include <algorithm>
 #include <map>
 #include <string>
+#include <vector>

 #include "framework/operator.h"
-#include "node.h"
+#include "framework/program/program-optimize/node.h"

 namespace paddle_mobile {
 namespace framework {

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -16,14 +16,15 @@ limitations under the License. */

 #include <cstdint>
 #include <cstring>
+#include <fstream>
 #include <memory>
+#include <string>
 #include <type_traits>
 #include <typeindex>
 #include <vector>
-#include "common/enforce.h"

-#include <fstream>
 #include "common/enforce.h"
+#include "common/types.h"
 #include "framework/data_layout.h"
 #include "framework/ddim.h"
 #include "memory/t_malloc.h"
@@ -63,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
 };

 static inline size_t SizeOfType(std::type_index type) {
-  SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor;
+  SizeOfTypeFunctor<int, half, float, double, int16_t, int64_t, bool, size_t>
+      functor;
  size_t size = functor(type);

  PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -187,7 +187,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
    memcpy(&max_value, *data + sizeof(float), sizeof(float));
    *data += 2 * sizeof(float);
    const float factor = (max_value - min_value) / 255.0;
-    uint8_t *uint8_data = (uint8_t *)(*data);
+    uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
    for (int k = 0; k < memory_size; ++k) {
      static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
    }
@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(

 template class Executor<CPU, Precision::FP32>;
 template class Executor<FPGA, Precision::FP32>;
-template class Executor<GPU_MALI, Precision::FP32>;
+template class Executor<GPU_MALI, Precision::FP16>;

 }  // namespace paddle_mobile
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -14,7 +14,9 @@ limitations under the License. */

 #ifdef CONCAT_OP

-#include "concat_op.h"
+#include <vector>
+
+#include "operators/concat_op.h"

 namespace paddle_mobile {
 namespace operators {
@@ -68,6 +70,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
 REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp);
 #endif

 #endif
--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -53,6 +53,7 @@ USE_OP_CPU(concat);
 USE_OP_MALI_GPU(concat);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(concat);
 #endif

 #endif
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONCAT_OP
+
+#include "operators/kernel/concat_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConcatKernel<FPGA, half>::Init(ConcatParam *param) {
+  return true;
+}
+
+template <>
+void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
+  auto inputs = param.Inputs();
+  auto *out = param.Out();
+  int64_t axis = param.Axis();
+  out->mutable_data<half>();
+
+  DDim out_dim = out->dims();
+  int pixels = out_dim[1] * out_dim[2];
+  auto out_channel = out_dim[3];
+
+  auto out_offset = 0;
+
+  for (int i = 0; i < inputs.size(); ++i) {
+    auto input = inputs[i];
+    auto channels = input[3];
+    out_offset += channels;
+    auto src = input->data<half>();
+    for (int j = 0; j < pixels; ++j) {
+      auto dst = out->data<half>() + out_offset;
+      memory::Copy(dst, src, sizeof(half));
+    }
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/tools/android-cmake/android.toolchain.cmake
+++ b/tools/android-cmake/android.toolchain.cmake
@@ -65,6 +65,8 @@ endif()
 file(TO_CMAKE_PATH "${ANDROID_NDK}" ANDROID_NDK)

 # Android NDK revision
+message("${ANDROID_NDK}")
+
 file(READ "${ANDROID_NDK}/source.properties" ANDROID_NDK_SOURCE_PROPERTIES)
 set(ANDROID_NDK_SOURCE_PROPERTIES_REGEX
  "^Pkg\\.Desc = Android NDK\nPkg\\.Revision = ([0-9]+)\\.")

--- a/tools/build.sh
+++ b/tools/build.sh
@@ -40,8 +40,8 @@ build_for_android() {
    fi

    if [ -z "$PLATFORM" ]; then
-        PLATFORM="arm-v7a"  # Users could choose "arm-v8a" platform.
-#        PLATFORM="arm-v8a"
+#        PLATFORM="arm-v7a"  # Users could choose "arm-v8a" platform.
+        PLATFORM="arm-v8a"
    fi

    if [ "${PLATFORM}" = "arm-v7a" ]; then
@@ -63,7 +63,7 @@ build_for_android() {
    TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
    ANDROID_ARM_MODE="arm"

-    if [ "${#NETS}" > 1 ]; then
+    if [ "${#NETS}" -gt 1 ]; then
    cmake .. \
        -B"../build/release/${PLATFORM}" \
        -DANDROID_ABI="${ABI}" \
@@ -99,7 +99,7 @@ build_for_ios() {
    BUILD_DIR=../build/release/"${PLATFORM}"/
    TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake"
    mkdir -p "${BUILD_DIR}"
-    if [ "${#NETS}" > 1 ]; then
+    if [ "${#NETS}" -gt 1 ]; then
        cmake .. \
            -B"${BUILD_DIR}" \
            -DCMAKE_BUILD_TYPE="${MODE}" \