未验证 提交 523a949c 编写于 作者: E eclipsycn 提交者: GitHub

Merge branch 'develop' into develop

cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.6)
project(paddle-mobile) project(paddle-mobile)
option(DEBUGING "enable debug mode" ON) option(DEBUGING "enable debug mode" ON)
option(USE_OPENMP "openmp support" ON) option(USE_OPENMP "openmp support" OFF)
option(USE_EXCEPTION "use std exception" ON) option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" ON) option(LOG_PROFILE "log profile" ON)
# select the platform to build # select the platform to build
...@@ -15,7 +15,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) ...@@ -15,7 +15,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/) include_directories(src/)
if(IS_IOS) if(IS_IOS)
set(CMAKE_CXX_FLAGS "-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
else() else()
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
endif() endif()
...@@ -43,7 +43,7 @@ if (LOG_PROFILE) ...@@ -43,7 +43,7 @@ if (LOG_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE)
endif() endif()
if(USE_OPENMP) if(USE_OPENMP AND NOT IS_IOS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP) add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif() endif()
...@@ -104,12 +104,21 @@ else() ...@@ -104,12 +104,21 @@ else()
foreach(f ${_tmp_list_h}) foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach() endforeach()
endif()
file(GLOB_RECURSE _tmp_list src/fpga/*.cpp src/fpga/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/fpga/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if (ANDROID_NDK_TOOLCHAIN_INCLUDED) if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
add_definitions(-DARMV7)
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp)
...@@ -130,8 +139,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) ...@@ -130,8 +139,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default # NET default
set(NET "defult" CACHE STRING "select net type") set(NET "default" CACHE STRING "select net type")
set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet") set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
...@@ -153,3 +162,4 @@ if(DEBUGING) ...@@ -153,3 +162,4 @@ if(DEBUGING)
endif() endif()
endif() endif()
...@@ -27,10 +27,10 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ...@@ -27,10 +27,10 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平
- **ARM CPU** - **ARM CPU**
![](http://7xop3k.com1.z0.glb.clouddn.com/15312108766575.jpg) ![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_29.png)
arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。
arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是120+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
- **Mali GPU** - **Mali GPU**
......
# Quantification 模型量化、反量化
## 背景故事
部分网络如AlexNet训练出的模型体积较大,不适宜在移动设备上使用。
## 解决模型过大办法
1. 选用适合移动端的模型结构如:mobilenet、googlenet、 yolo、squeezenet 等;
2. 使用我们提供的量化工具,可以在几乎不影响精度的情况下将float32模型减小至原模型的 1/4;
- - - - -
## 量化工具介绍
### 模型转化工具目录:
- [量化工具目录](https://github.com/PaddlePaddle/paddle-mobile/tree/develop/tools/quantification)
- [模型转化工具](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/convert.cpp)
#### 使用说明
- [工具使用](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/README.md)
## 如何读取量化后的模型
load方法中添加了 quantification 参数,默认为false。 如果需要load量化后的模型,按需传参即可。
[我是源代码](https://github.com/PaddlePaddle/paddle-mobile/blob/55302b33ea3bd68c9797d8f65e527544792b8095/src/io/paddle_mobile.h)
```c++
bool Load(const std::string &dirname, bool optimize = false,
bool quantification = false, int batch_size = 1);
```
- - - - -
...@@ -17,38 +17,46 @@ limitations under the License. */ ...@@ -17,38 +17,46 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
const std::string G_OP_TYPE_CONV = "conv2d"; const char *G_OP_TYPE_CONV = "conv2d";
const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; const char *G_OP_TYPE_BATCHNORM = "batch_norm";
const std::string G_OP_TYPE_BOX_CODER = "box_coder"; const char *G_OP_TYPE_BOX_CODER = "box_coder";
const std::string G_OP_TYPE_CONCAT = "concat"; const char *G_OP_TYPE_CONCAT = "concat";
const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu";
const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu";
const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu";
const std::string G_OP_TYPE_FC = "fusion_fc"; const char *G_OP_TYPE_FC = "fusion_fc";
const std::string G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
const std::string G_OP_TYPE_LRN = "lrn"; const char *G_OP_TYPE_LRN = "lrn";
const std::string G_OP_TYPE_MUL = "mul"; const char *G_OP_TYPE_MUL = "mul";
const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
const std::string G_OP_TYPE_POOL2D = "pool2d"; const char *G_OP_TYPE_POOL2D = "pool2d";
const std::string G_OP_TYPE_PRIOR_BOX = "prior_box"; const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
const std::string G_OP_TYPE_RELU = "relu"; const char *G_OP_TYPE_RELU = "relu";
const std::string G_OP_TYPE_RESHAPE = "reshape"; const char *G_OP_TYPE_RESHAPE = "reshape";
const std::string G_OP_TYPE_SIGMOID = "sigmoid"; const char *G_OP_TYPE_SIGMOID = "sigmoid";
const std::string G_OP_TYPE_SOFTMAX = "softmax"; const char *G_OP_TYPE_SOFTMAX = "softmax";
const std::string G_OP_TYPE_TRANSPOSE = "transpose"; const char *G_OP_TYPE_TRANSPOSE = "transpose";
const std::string G_OP_TYPE_SPLIT = "split"; const char *G_OP_TYPE_SPLIT = "split";
const std::string G_OP_TYPE_FEED = "feed"; const char *G_OP_TYPE_FEED = "feed";
const std::string G_OP_TYPE_FETCH = "fetch"; const char *G_OP_TYPE_FETCH = "fetch";
const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; const char *G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d";
const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence"; const char *G_OP_TYPE_IM2SEQUENCE = "im2sequence";
const std::string G_OP_TYPE_DROPOUT = "dropout"; const char *G_OP_TYPE_DROPOUT = "dropout";
const char *G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn";
const char *G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn";
const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
"fusion_elementwise_add_relu";
const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const char *G_OP_TYPE_REGION = "region";
std::unordered_map< std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = { op_input_output_key = {
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
...@@ -72,6 +80,11 @@ std::unordered_map< ...@@ -72,6 +80,11 @@ std::unordered_map<
{G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}}, {G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}},
{G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}}}; {G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_BN, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_REGION, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
namespace paddle_mobile { namespace paddle_mobile {
...@@ -72,33 +73,40 @@ enum PMStatus { ...@@ -72,33 +73,40 @@ enum PMStatus {
PMWrongDevice = 0x08 /*!< un-correct device. */ PMWrongDevice = 0x08 /*!< un-correct device. */
}; };
extern const std::string G_OP_TYPE_CONV; extern const char *G_OP_TYPE_CONV;
extern const std::string G_OP_TYPE_BATCHNORM; extern const char *G_OP_TYPE_BATCHNORM;
extern const std::string G_OP_TYPE_BOX_CODER; extern const char *G_OP_TYPE_BOX_CODER;
extern const std::string G_OP_TYPE_CONCAT; extern const char *G_OP_TYPE_CONCAT;
extern const std::string G_OP_TYPE_ELEMENTWISE_ADD; extern const char *G_OP_TYPE_ELEMENTWISE_ADD;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU; extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU;
extern const std::string G_OP_TYPE_FC; extern const char *G_OP_TYPE_FC;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD; extern const char *G_OP_TYPE_FUSION_CONV_ADD;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU;
extern const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU;
extern const std::string G_OP_TYPE_LRN;
extern const std::string G_OP_TYPE_MUL; extern const char *G_OP_TYPE_LRN;
extern const std::string G_OP_TYPE_MULTICLASS_NMS; extern const char *G_OP_TYPE_MUL;
extern const std::string G_OP_TYPE_POOL2D; extern const char *G_OP_TYPE_MULTICLASS_NMS;
extern const std::string G_OP_TYPE_PRIOR_BOX; extern const char *G_OP_TYPE_POOL2D;
extern const std::string G_OP_TYPE_RELU; extern const char *G_OP_TYPE_PRIOR_BOX;
extern const std::string G_OP_TYPE_RESHAPE; extern const char *G_OP_TYPE_RELU;
extern const std::string G_OP_TYPE_SIGMOID; extern const char *G_OP_TYPE_RESHAPE;
extern const std::string G_OP_TYPE_SOFTMAX; extern const char *G_OP_TYPE_SIGMOID;
extern const std::string G_OP_TYPE_TRANSPOSE; extern const char *G_OP_TYPE_SOFTMAX;
extern const std::string G_OP_TYPE_SPLIT; extern const char *G_OP_TYPE_TRANSPOSE;
extern const std::string G_OP_TYPE_FEED; extern const char *G_OP_TYPE_SPLIT;
extern const std::string G_OP_TYPE_FETCH; extern const char *G_OP_TYPE_FEED;
extern const std::string G_OP_TYPE_DEPTHWISE_CONV; extern const char *G_OP_TYPE_FETCH;
extern const std::string G_OP_TYPE_IM2SEQUENCE; extern const char *G_OP_TYPE_DEPTHWISE_CONV;
extern const std::string G_OP_TYPE_DROPOUT; extern const char *G_OP_TYPE_IM2SEQUENCE;
extern const char *G_OP_TYPE_DROPOUT;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_POOL_BN;
extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_FC_RELU;
extern const char *G_OP_TYPE_REGION;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
...@@ -84,7 +84,7 @@ struct Variant { ...@@ -84,7 +84,7 @@ struct Variant {
if (type_id == typeid(T).hash_code()) { if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data)); return *const_cast<T *>(reinterpret_cast<const T *>(&data));
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant "); PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant");
exit(0); exit(0);
} }
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include "fpga/api/fpga_api.h"
namespace paddle {
namespace mobile {
namespace fpga {
namespace api {
static int fd = -1;
static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); }
int open_device() {
if (fd == -1) {
fd = open(device_path, O_RDWR);
}
return fd;
}
// memory management;
void *fpga_malloc(size_t size) {
return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
}
void fpga_free(void *ptr) { munmap(ptr, 0); }
void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
}
int ComputeFpgaConv(struct FpgaConvArgs) {}
int ComputeFpgaPool(struct FpgaPoolArgs) {}
int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {}
} // namespace api
} // namespace fpga
} // namespace mobile
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace paddle {
namespace mobile {
namespace fpga {
namespace api {
int open_device();
int close_device();
void *fpga_malloc(size_t size);
void fpga_free(void *ptr);
void fpga_copy(void *dst, const void *src, size_t num);
struct FpgaVersionArgs {
void *buf;
};
struct MemoryToPhysicalArgs {
const void *src;
uint64_t physical;
};
struct MemoryCopyArgs {
void *src;
void *dst;
size_t size;
};
struct FpgaQuantArgs {
float scale;
};
struct FpgaBNArgs {};
struct FpgaConvArgs {
bool enable_BN = false;
bool enable_Relu = false;
struct FpgaBNParam bn_parm;
};
struct FpgaPoolArgs {
bool enable_BN = false;
struct FpgaBNParam bn_parm;
};
struct FpgaEWAddArgs { // only support X + Y
bool enable_Relu = false;
};
int ComputeFpgaConv(struct FpgaConvArgs);
int ComputeFpgaPool(struct FpgaPoolArgs);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs);
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga
} // namespace mobile
} // namespace paddle
...@@ -28,6 +28,16 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const { ...@@ -28,6 +28,16 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const {
return it->second.second; return it->second.second;
} }
template <typename Dtype>
vector<string> OperatorBase<Dtype>::GetInputKeys() const {
auto it = op_input_output_key.find(type_);
if (it == op_input_output_key.end()) {
DLOG << type_ << " has no outputs";
return {};
}
return it->second.first;
}
template <typename Dtype> template <typename Dtype>
OperatorBase<Dtype>::OperatorBase(const std::string &type, OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs, const VariableNameMap &inputs,
...@@ -49,6 +59,11 @@ template <typename Dtype> ...@@ -49,6 +59,11 @@ template <typename Dtype>
void OperatorBase<Dtype>::Run() const { void OperatorBase<Dtype>::Run() const {
RunImpl(); RunImpl();
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
vector<string> input_keys = GetInputKeys();
for (const auto key : input_keys) {
Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_);
DLOG << type_ << " input- " << key << "=" << *input;
}
vector<string> output_keys = GetOutKeys(); vector<string> output_keys = GetOutKeys();
for (const auto key : output_keys) { for (const auto key : output_keys) {
Tensor *out_ = GetVarValue<framework::LoDTensor>(key, outputs_, *scope_); Tensor *out_ = GetVarValue<framework::LoDTensor>(key, outputs_, *scope_);
......
...@@ -61,6 +61,7 @@ class OperatorBase { ...@@ -61,6 +61,7 @@ class OperatorBase {
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
void Run() const; void Run() const;
std::vector<string> GetOutKeys() const; std::vector<string> GetOutKeys() const;
std::vector<string> GetInputKeys() const;
virtual void RunImpl() const = 0; virtual void RunImpl() const = 0;
virtual void Init() = 0; virtual void Init() = 0;
...@@ -118,6 +119,10 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -118,6 +119,10 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
void Init() { void Init() {
// for (auto i : this->inputs_) {
// DLOG << i.first;
// DLOG << i.second;
// }
PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed", PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed",
this->type_.c_str()); this->type_.c_str());
} }
...@@ -146,7 +151,7 @@ class OpKernelBase { ...@@ -146,7 +151,7 @@ class OpKernelBase {
} }
#endif #endif
virtual void Compute(const P &para) const = 0; virtual void Compute(const P &para) const = 0;
virtual bool Init(P *para) { return true; }; virtual bool Init(P *para) { return true; }
virtual ~OpKernelBase() = default; virtual ~OpKernelBase() = default;
private: private:
......
...@@ -42,8 +42,17 @@ class FusionOpRegister { ...@@ -42,8 +42,17 @@ class FusionOpRegister {
matchers_[matcher->Type()] = shared_matcher; matchers_[matcher->Type()] = shared_matcher;
} }
const std::map<std::string, std::shared_ptr<FusionOpMatcher>> Matchers() { const std::vector<std::shared_ptr<FusionOpMatcher>> Matchers() {
return matchers_; std::vector<std::shared_ptr<FusionOpMatcher>> matchers;
for (const auto& match : matchers_) {
matchers.push_back(match.second);
}
std::sort(matchers.begin(), matchers.end(),
[](std::shared_ptr<FusionOpMatcher> first,
std::shared_ptr<FusionOpMatcher> second) {
return first->BeginNode().Depth() > second->BeginNode().Depth();
});
return matchers;
} }
private: private:
......
...@@ -44,23 +44,6 @@ bool Node::operator==(const Node &in) { ...@@ -44,23 +44,6 @@ bool Node::operator==(const Node &in) {
return true; return true;
} }
std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(int size) {
std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
OpDescs(size - 1, &op_descs);
return op_descs;
}
void Node::OpDescs(int index,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc) {
if (index == 0) {
return;
}
op_desc->push_back(this->op_desc_);
for (auto &output : outputs_) {
output->OpDescs(index, op_desc);
}
}
std::shared_ptr<Node> Node::To(int size) { std::shared_ptr<Node> Node::To(int size) {
std::shared_ptr<Node> node = std::make_shared<Node>(); std::shared_ptr<Node> node = std::make_shared<Node>();
this->To(size - 1, node); this->To(size - 1, node);
......
...@@ -47,13 +47,10 @@ class Node { ...@@ -47,13 +47,10 @@ class Node {
std::map<std::string, std::vector<std::pair<std::string, std::string>>> std::map<std::string, std::vector<std::pair<std::string, std::string>>>
change, change,
std::vector<std::shared_ptr<Node>> *removed_nodes); std::vector<std::shared_ptr<Node>> *removed_nodes);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(int size);
std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; } std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; }
std::string Type() { return type_; } std::string Type() { return type_; }
private: private:
void OpDescs(int size,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
void To(int index, std::shared_ptr<Node>); void To(int index, std::shared_ptr<Node>);
void Folder( void Folder(
std::shared_ptr<framework::OpDesc> op_desc, std::shared_ptr<framework::OpDesc> op_desc,
......
...@@ -78,9 +78,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize( ...@@ -78,9 +78,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
} }
for (auto &registed : FusionOpRegister::Instance()->Matchers()) { for (auto &registed : FusionOpRegister::Instance()->Matchers()) {
std::string fusion_type = registed.first; std::string fusion_type = registed->Type();
std::shared_ptr<FusionOpMatcher> matcher = registed.second; std::shared_ptr<FusionOpMatcher> matcher = registed;
// DLOG << " registed node \n " << matcher->BeginNode();
auto match_vector = type_map[matcher->BeginType()]; auto match_vector = type_map[matcher->BeginType()];
......
...@@ -30,6 +30,7 @@ class Program { ...@@ -30,6 +30,7 @@ class Program {
std::string model_path; std::string model_path;
std::string para_path; std::string para_path;
bool combined = false; bool combined = false;
bool quantification = false;
private: private:
}; };
......
...@@ -154,7 +154,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, ...@@ -154,7 +154,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
tensor->Resize(framework::make_ddim(desc.Dims())); tensor->Resize(framework::make_ddim(desc.Dims()));
void *memory = tensor; void *memory = nullptr;
int type_size = 0; int type_size = 0;
switch (desc.DataType()) { switch (desc.DataType()) {
case framework::VARTYPE_TYPE_FP16: case framework::VARTYPE_TYPE_FP16:
...@@ -179,11 +179,25 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, ...@@ -179,11 +179,25 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
default: default:
break; break;
} }
if (program_.quantification) {
for (int n = 0; n < memory_size * type_size; ++n) { float min_value;
static_cast<char *>(memory)[n] = (*data)[n]; float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = (uint8_t *)(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size * type_size; ++n) {
static_cast<char *>(memory)[n] = (*data)[n];
}
(*data) += (sizeof(char) * memory_size * type_size);
} }
(*data) += (sizeof(char) * memory_size * type_size);
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
......
...@@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { ...@@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &dirname, bool optimize, bool can_add_split) { const std::string &dirname, bool optimize, bool quantification,
auto program = bool can_add_split) {
this->LoadProgram(dirname + "/__model__", optimize, can_add_split); auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split);
program.model_path = dirname; program.model_path = dirname;
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &model_path, const std::string &para_path, const std::string &model_path, const std::string &para_path, bool optimize,
bool optimize) { bool quantification) {
auto program = this->LoadProgram(model_path, optimize); auto program = this->LoadProgram(model_path, optimize);
program.para_path = para_path; program.para_path = para_path;
program.combined = true; program.combined = true;
program.quantification = quantification;
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram( const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
const std::string &model_path, bool optimize, bool can_add_split) { const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) {
std::string model_filename = model_path; std::string model_filename = model_path;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL; uint8_t *buf = NULL;
...@@ -82,6 +85,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram( ...@@ -82,6 +85,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
framework::Program<Dtype, P> program; framework::Program<Dtype, P> program;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification;
auto scope = std::make_shared<framework::Scope>(); auto scope = std::make_shared<framework::Scope>();
program.scope = scope; program.scope = scope;
......
...@@ -30,6 +30,7 @@ class Loader { ...@@ -30,6 +30,7 @@ class Loader {
* */ * */
const framework::Program<Dtype, P> Load(const std::string &dirname, const framework::Program<Dtype, P> Load(const std::string &dirname,
bool optimize = false, bool optimize = false,
bool quantification = false,
bool can_add_split = false); bool can_add_split = false);
/* /*
...@@ -38,11 +39,13 @@ class Loader { ...@@ -38,11 +39,13 @@ class Loader {
* */ * */
const framework::Program<Dtype, P> Load(const std::string &model_path, const framework::Program<Dtype, P> Load(const std::string &model_path,
const std::string &para_path, const std::string &para_path,
bool optimize = false); bool optimize = false,
bool quantification = false);
private: private:
const framework::Program<Dtype, P> LoadProgram(const std::string &model_path, const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
bool optimize = false, bool optimize = false,
bool quantification = false,
bool can_add_split = false); bool can_add_split = false);
}; };
......
...@@ -26,7 +26,7 @@ void PaddleMobile<Dtype, P>::SetThreadNum(int num) { ...@@ -26,7 +26,7 @@ void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
int batch_size) { bool quantification, int batch_size) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<Loader<Dtype, P>>(); loader_ = std::make_shared<Loader<Dtype, P>>();
} else { } else {
...@@ -35,7 +35,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, ...@@ -35,7 +35,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<Executor<Dtype, P>>( executor_ = std::make_shared<Executor<Dtype, P>>(
loader_->Load(dirname, optimize), batch_size, optimize); loader_->Load(dirname, optimize, quantification), batch_size, optimize);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
...@@ -46,7 +46,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, ...@@ -46,7 +46,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::Load(const std::string &model_path, bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
const std::string &para_path, bool optimize, const std::string &para_path, bool optimize,
int batch_size) { bool quantification, int batch_size) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<Loader<Dtype, P>>(); loader_ = std::make_shared<Loader<Dtype, P>>();
} else { } else {
...@@ -55,7 +55,8 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path, ...@@ -55,7 +55,8 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<Executor<Dtype, P>>( executor_ = std::make_shared<Executor<Dtype, P>>(
loader_->Load(model_path, para_path, optimize), batch_size, optimize); loader_->Load(model_path, para_path, optimize, quantification),
batch_size, optimize);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
......
...@@ -39,14 +39,18 @@ class PaddleMobile { ...@@ -39,14 +39,18 @@ class PaddleMobile {
* @b 加载分开形式的 fluid 模型 * @b 加载分开形式的 fluid 模型
* */ * */
bool Load(const std::string &dirname, bool optimize = false, bool Load(const std::string &dirname, bool optimize = false,
int batch_size = 1); bool quantification = false, int batch_size = 1);
/* /*
* @b load combine format fluid mode * @b load combine format fluid mode
* @b 加载结合在一起格式的模型 * @b 加载结合在一起格式的模型
* */ * */
bool Load(const std::string &model_path, const std::string &para_path, bool Load(const std::string &model_path, const std::string &para_path,
bool optimize = false, int batch_size = 1); bool optimize = false, bool quantification = false,
int batch_size = 1);
/*
* @b 设置线程数, 当 cmake 中开启 openmp 时生效
* */
void SetThreadNum(int num); void SetThreadNum(int num);
/* /*
......
...@@ -16,10 +16,32 @@ limitations under the License. */ ...@@ -16,10 +16,32 @@ limitations under the License. */
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace memory { namespace memory {
const int MALLOC_ALIGN = 64; const int MALLOC_ALIGN = 64;
#ifdef PADDLE_MOBILE_FPGA
namespace api = paddle::mobile::fpga::api;
void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num);
}
void *Alloc(size_t size) { return api::malloc(size); }
void Free(void *ptr) {
if (ptr) {
api::fpga_free(ptr);
}
}
#else
void Copy(void *dst, const void *src, size_t num) { void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
...@@ -42,5 +64,7 @@ void Free(void *ptr) { ...@@ -42,5 +64,7 @@ void Free(void *ptr) {
} }
} }
#endif
} // namespace memory } // namespace memory
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -26,7 +26,7 @@ void BatchNormOp<Dtype, T>::InferShape() const { ...@@ -26,7 +26,7 @@ void BatchNormOp<Dtype, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims(); auto x_dims = this->param_.InputX()->dims();
this->param_.OutputY()->Resize(x_dims); this->param_.OutputY()->Resize(x_dims);
} }
template class BatchNormOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -47,7 +47,7 @@ void BoxCoderOp<Dtype, T>::InferShape() const { ...@@ -47,7 +47,7 @@ void BoxCoderOp<Dtype, T>::InferShape() const {
this->param_.OutputBox()->Resize(framework::make_ddim( this->param_.OutputBox()->Resize(framework::make_ddim(
{input_targetbox_dims[0], input_priorbox_dims[0], 4})); {input_targetbox_dims[0], input_priorbox_dims[0], 4}));
} }
template class BoxCoderOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -56,7 +56,6 @@ void ConcatOp<Dtype, T>::InferShape() const { ...@@ -56,7 +56,6 @@ void ConcatOp<Dtype, T>::InferShape() const {
this->param_.Out()->Resize(out_dims); this->param_.Out()->Resize(out_dims);
} }
template class ConcatOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -48,8 +48,6 @@ void ConvOp<Dtype, T>::InferShape() const { ...@@ -48,8 +48,6 @@ void ConvOp<Dtype, T>::InferShape() const {
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class ConvOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -49,8 +49,6 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const { ...@@ -49,8 +49,6 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class DepthwiseConvOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -22,7 +22,7 @@ void DropoutOp<Dtype, T>::InferShape() const { ...@@ -22,7 +22,7 @@ void DropoutOp<Dtype, T>::InferShape() const {
auto input_dims = this->param_.InputX()->dims(); auto input_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} }
template class DropoutOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -24,7 +24,7 @@ void ElementwiseAddOp<Dtype, T>::InferShape() const { ...@@ -24,7 +24,7 @@ void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims(); auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim); this->param_.Out()->Resize(x_dim);
} }
template class ElementwiseAddOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -14,10 +14,7 @@ limitations under the License. */ ...@@ -14,10 +14,7 @@ limitations under the License. */
#include "feed_op.h" #include "feed_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {}
template class FeedOp<CPU, float>;
}
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
......
...@@ -14,10 +14,7 @@ limitations under the License. */ ...@@ -14,10 +14,7 @@ limitations under the License. */
#include "fetch_op.h" #include "fetch_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {}
template class FetchOp<CPU, float>;
}
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
......
...@@ -45,7 +45,6 @@ void FusionConvAddOp<Dtype, T>::InferShape() const { ...@@ -45,7 +45,6 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class FusionConvAddOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -36,8 +36,6 @@ class FusionConvAddMatcher : public framework::FusionOpMatcher { ...@@ -36,8 +36,6 @@ class FusionConvAddMatcher : public framework::FusionOpMatcher {
void FolderNodes( void FolderNodes(
framework::Node *node, framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) { std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node->OpDescs(node_.Depth());
node->Folder(node_.Depth(), Type(), node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}}, removed_nodes); {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}}, removed_nodes);
} }
...@@ -68,11 +66,11 @@ class FusionConvAddOp : public framework::OperatorWithKernel< ...@@ -68,11 +66,11 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
//#ifndef CONV_ADD_REGISTER #ifndef CONV_ADD_REGISTER
// static framework::FusionOpRegistrar convadd_registrar( static framework::FusionOpRegistrar convadd_registrar(
// new FusionConvAddMatcher()); new FusionConvAddMatcher());
//#define CONV_ADD_REGISTER #define CONV_ADD_REGISTER
//#endif #endif
#endif #endif
......
...@@ -44,7 +44,7 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const { ...@@ -44,7 +44,7 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const {
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class FusionConvAddBNReluOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -39,8 +39,6 @@ class FusionConvAddBNReluMatcher : public framework::FusionOpMatcher { ...@@ -39,8 +39,6 @@ class FusionConvAddBNReluMatcher : public framework::FusionOpMatcher {
void FolderNodes( void FolderNodes(
framework::Node *node, framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) { std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node->OpDescs(node_.Depth());
node->Folder(node_.Depth(), Type(), node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_BATCHNORM, {G_OP_TYPE_BATCHNORM,
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
#include "fusion_conv_add_relu_op.h" #include "fusion_conv_add_relu_op.h"
#include "operators/math/conv_func.h" #include "operators/math/conv_func.h"
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef CONVADDRELU_OP #ifdef FUSION_CONVADDRELU_OP
#pragma once #pragma once
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/fusion_conv_bn_relu_op.h"
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvBNReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
using std::vector;
class FusionConvBNReluMatcher : public framework::FusionOpMatcher {
public:
FusionConvBNReluMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM) >
std::make_shared<framework::Node>(G_OP_TYPE_RELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_BATCHNORM,
{{"Scale", "Scale"},
{"Mean", "Mean"},
{"Bias", "Bias"},
{"Variance", "Variance"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_BN_RELU; }
};
template <typename DeviceType, typename T>
class FusionConvBNReluOp : public framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam,
operators::ConvBNReluKernel<DeviceType, T>> {
public:
FusionConvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam,
operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam,
operators::ConvBNReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_CONV_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
new FusionConvBNReluMatcher());
#define FUSION_CONV_BN_RELU_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(fusion_conv_bn_relu);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...@@ -44,7 +44,7 @@ void FusionDWConvBNReluOp<Dtype, T>::InferShape() const { ...@@ -44,7 +44,7 @@ void FusionDWConvBNReluOp<Dtype, T>::InferShape() const {
framework::DDim ddim = framework::make_ddim(output_shape); framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class FusionDWConvBNReluOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -38,8 +38,6 @@ class FusionDWConvBNReluMatcher : public framework::FusionOpMatcher { ...@@ -38,8 +38,6 @@ class FusionDWConvBNReluMatcher : public framework::FusionOpMatcher {
void FolderNodes( void FolderNodes(
framework::Node *node, framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) { std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
vector<std::shared_ptr<framework::OpDesc>> origin_descs =
node->OpDescs(node_.Depth());
node->Folder(node_.Depth(), Type(), node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_BATCHNORM, {{G_OP_TYPE_BATCHNORM,
{{"Scale", "Scale"}, {{"Scale", "Scale"},
......
...@@ -50,7 +50,6 @@ void FusionFcOp<Dtype, T>::InferShape() const { ...@@ -50,7 +50,6 @@ void FusionFcOp<Dtype, T>::InferShape() const {
this->param_.Out()->Resize(ddim); this->param_.Out()->Resize(ddim);
} }
template class FusionFcOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -47,8 +47,6 @@ void Im2SequenceOp<Dtype, T>::InferShape() const { ...@@ -47,8 +47,6 @@ void Im2SequenceOp<Dtype, T>::InferShape() const {
this->param_.Output()->Resize(ddim); this->param_.Output()->Resize(ddim);
} }
template class Im2SequenceOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADD_RELU_OP #ifdef FUSION_CONVADDRELU_OP
#include "operators/kernel/conv_add_relu_kernel.h" #include "operators/kernel/conv_add_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h" #include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_bn_relu_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvBNReluKernel<CPU, float>::Init(FusionConvBNReluParam *param) {
const Tensor *mean = param->InputMean();
const Tensor *variance = param->InputVariance();
const Tensor *scale = param->InputScale();
const Tensor *bias = param->InputBias();
const float epsilon = param->Epsilon();
// DLOG << "variance: " << *variance;
auto mean_ptr = mean->data<float>();
auto variance_ptr = variance->data<float>();
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
const int C = mean->numel();
float inv_std_ptr[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
return true;
}
template <>
void ConvBNReluKernel<CPU, float>::Compute(
const FusionConvBNReluParam &param) const {
ConvBNReluCompute<float>(param);
}
template class ConvBNReluKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -54,7 +54,40 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -54,7 +54,40 @@ void BatchnormCompute(const BatchNormParam &param) {
int HXW = H * W; int HXW = H * W;
#ifdef ARMV7 #if __ARM_NEON
#if __aarch64__
float *inv_std_ptr = new float[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor new_scale;
auto new_scale_ptr = new_scale.mutable_data<float>(framework::make_ddim({C}));
Tensor new_bias;
auto new_bias_ptr = new_bias.mutable_data<float>(framework::make_ddim({C}));
/// ((x - est_mean) * (inv_var) * scale + bias equal to
/// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
int tmp_index = n * stride0 + i * stride1 + h * stride2;
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
}
}
}
}
}
delete[] inv_std_ptr;
#else
if (HXW > 32) { if (HXW > 32) {
int NXC = N * C; int NXC = N * C;
float *inv_std_ptr = new float[NXC * 4]; float *inv_std_ptr = new float[NXC * 4];
...@@ -229,6 +262,7 @@ void BatchnormCompute(const BatchNormParam &param) { ...@@ -229,6 +262,7 @@ void BatchnormCompute(const BatchNormParam &param) {
delete[] inv_std_ptr; delete[] inv_std_ptr;
} }
#endif
#else #else
float *inv_std_ptr = new float[C]; float *inv_std_ptr = new float[C];
for (int i = 0; i < C; i++) { for (int i = 0; i < C; i++) {
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADD_RELU_OP #ifdef FUSION_CONVADDRELU_OP
#pragma once #pragma once
#include <vector> #include <vector>
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
void ConvBNReluBasic(const FusionConvBNReluParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor new_bias = *param.NewBias();
Tensor new_scale = *param.NewScale();
Tensor *output = param.Output();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmulWithBn<float>(
filter_slice, false, col_matrix, false, static_cast<float>(1),
&out_slice, static_cast<float>(0), true, &new_scale, &new_bias, g);
}
}
}
template <typename P>
void ConvBNReluCompute(const FusionConvBNReluParam &param) {
if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
math::DepthwiseConvAddBNRelu3x3s1p1(param.Input(), param.Filter(),
param.Output(), param.NewScale(),
param.NewBias(), true);
} else if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) {
// math::DepthwiseConvAddBNRelu3x3s2p1(param.Input(), param.Filter(),
// param.Output(), param.NewScale(),
// param.NewBias(), 1);
math::DepthwiseConvAddBNRelu3x3s2p1v2(param.Input(), param.Filter(),
param.Output(), param.NewScale(),
param.NewBias(), true);
} else {
ConvBNReluBasic(param);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -76,15 +76,20 @@ void PoolCompute(const PoolParam &param) { ...@@ -76,15 +76,20 @@ void PoolCompute(const PoolParam &param) {
} }
} else if (ksize[0] == 2 && ksize[0] == ksize[1]) { } else if (ksize[0] == 2 && ksize[0] == ksize[1]) {
#ifndef IOS #if __ARM_NEON
#if __aarch64__
PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
#else
if (pooling_type == "max") { if (pooling_type == "max") {
math::Pool2x2Max(strides, paddings, in_x, out); math::Pool2x2Max(strides, paddings, in_x, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
math::Pool2x2Avg(strides, paddings, in_x, out); math::Pool2x2Avg(strides, paddings, in_x, out);
} }
#endif
#else #else
PoolBasic(pooling_type, ksize, strides, paddings, in_x, out); PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
#endif #endif // __ARM_NEON
} else { } else {
PoolBasic(pooling_type, ksize, strides, paddings, in_x, out); PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
} }
......
...@@ -68,6 +68,7 @@ void sigmoid(const Tensor *X, Tensor *Y) { ...@@ -68,6 +68,7 @@ void sigmoid(const Tensor *X, Tensor *Y) {
input_outer_ptr++; input_outer_ptr++;
} }
} }
#else
#endif #endif
} }
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#ifdef FUSION_CONVADD_RELU_OP #ifdef FUSION_CONVADDRELU_OP
#include <vector> #include <vector>
#include "framework/ddim.h" #include "framework/ddim.h"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_CONVBNRELU_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvBNReluKernel
: public OpKernelBase<DeviceType, FusionConvBNReluParam> {
public:
void Compute(const FusionConvBNReluParam &param) const;
bool Init(FusionConvBNReluParam *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -24,7 +24,7 @@ void LrnOp<Dtype, T>::InferShape() const { ...@@ -24,7 +24,7 @@ void LrnOp<Dtype, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims(); auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims); this->param_.Out()->Resize(x_dims);
} }
template class LrnOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/math/depthwise_conv_3x3.h" #include "operators/math/depthwise_conv_3x3.h"
#ifdef __ARM_NEON #if __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include <vector> #include <vector>
...@@ -23,7 +23,6 @@ namespace math { ...@@ -23,7 +23,6 @@ namespace math {
void DepthwiseConv3x3(const Tensor *input, vector<int> strides, void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
vector<int> paddings, const Tensor *filter, Tensor *bias, vector<int> paddings, const Tensor *filter, Tensor *bias,
Tensor *output, bool if_bias) { Tensor *output, bool if_bias) {
#ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -181,7 +180,27 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides, ...@@ -181,7 +180,27 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
} }
} else { } else {
#if defined(ARMV17) #if __ARM_NEON
#if __aarch64__
const float32x4_t data1 = vld1q_f32(pos1);
const float32x4_t data2 = vld1q_f32(pos2);
const float32x4_t data3 = vld1q_f32(pos3);
const float32x4_t v_filter1 = vld1q_f32(filter1);
const float32x4_t v_filter2 = vld1q_f32(filter2);
const float32x4_t v_filter3 = vld1q_f32(filter3);
float32x4_t mula = vmulq_f32(data1, v_filter1);
mula = vmlaq_f32(mula, data2, v_filter2);
mula = vmlaq_f32(mula, data3, v_filter3);
float32x2_t res = vpadd_f32(
vget_high_f32(vsetq_lane_f32(0, mula, 3)), vget_low_f32(mula));
res = vpadd_f32(res, res);
if (if_bias) {
output_data[ph * output_width + pw] += vget_lane_f32(res, 0);
} else {
output_data[ph * output_width + pw] = vget_lane_f32(res, 0);
}
#else
asm volatile( asm volatile(
"vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q1}, [%[pos1]] \n\t"
...@@ -209,26 +228,10 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides, ...@@ -209,26 +228,10 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
[filter2] "r"(filter2), [filter3] "r"(filter3), [filter2] "r"(filter2), [filter3] "r"(filter3),
[output_ptr] "r"(output_ptr), [zero] "r"(zero) [output_ptr] "r"(output_ptr), [zero] "r"(zero)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6"); : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6");
#endif // __aarch64__
#else #else
const float32x4_t data1 = vld1q_f32(pos1);
const float32x4_t data2 = vld1q_f32(pos2);
const float32x4_t data3 = vld1q_f32(pos3);
const float32x4_t v_filter1 = vld1q_f32(filter1); #endif // __ARM_NEON
const float32x4_t v_filter2 = vld1q_f32(filter2);
const float32x4_t v_filter3 = vld1q_f32(filter3);
float32x4_t mula = vmulq_f32(data1, v_filter1);
mula = vmlaq_f32(mula, data2, v_filter2);
mula = vmlaq_f32(mula, data3, v_filter3);
float32x2_t res = vpadd_f32(
vget_high_f32(vsetq_lane_f32(0, mula, 3)), vget_low_f32(mula));
res = vpadd_f32(res, res);
if (if_bias) {
output_data[ph * output_width + pw] += vget_lane_f32(res, 0);
} else {
output_data[ph * output_width + pw] = vget_lane_f32(res, 0);
}
#endif
} }
} }
} }
...@@ -239,12 +242,11 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides, ...@@ -239,12 +242,11 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
input_data += input_batch_stride; input_data += input_batch_stride;
output_data += output_batch_stride; output_data += output_batch_stride;
} }
#endif
} }
void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
Tensor *output, Tensor *bias, bool if_bias) { Tensor *output, Tensor *bias, bool if_bias) {
#ifdef __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -520,7 +522,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -520,7 +522,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -824,7 +826,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -824,7 +826,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON #if __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
...@@ -1022,7 +1024,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, ...@@ -1022,7 +1024,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
Tensor *output, Tensor bias, bool if_bias) { Tensor *output, Tensor bias, bool if_bias) {
#ifdef __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -1225,7 +1227,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1225,7 +1227,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
......
此差异已折叠。
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#define B(i, j) B[(i)*ldb + (j)] #define B(i, j) B[(i)*ldb + (j)]
#define C(i, j) C[(i)*ldc + (j)] #define C(i, j) C[(i)*ldc + (j)]
#define MR 4 #define MR 6
#define NR 8 #define NR 8
#define s_min(i, j) ((i) < (j) ? (i) : (j)) #define s_min(i, j) ((i) < (j) ? (i) : (j))
...@@ -28,6 +28,7 @@ namespace paddle_mobile { ...@@ -28,6 +28,7 @@ namespace paddle_mobile {
namespace operators { namespace operators {
namespace math { namespace math {
/*
// 将 A 矩阵分块复制到连续内存(ColMajor) // 将 A 矩阵分块复制到连续内存(ColMajor)
void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, void PackMatrixA(int m, int k, int m_tail, const float *A, int lda,
float *buffer); float *buffer);
...@@ -35,14 +36,17 @@ void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, ...@@ -35,14 +36,17 @@ void PackMatrixA(int m, int k, int m_tail, const float *A, int lda,
// 将 B 矩阵分块复制到连续内存(ColMajor) // 将 B 矩阵分块复制到连续内存(ColMajor)
void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
float *buffer); float *buffer);
*/
// 将 A 矩阵分块复制到连续内存(RowMajor) // 将 A 矩阵分块复制到连续内存(RowMajor)
void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda, void PackMatrixA_4r(int m, int k, int m_tail, const float *A, int lda,
float *buffer); float *buffer);
void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
// 将 B 矩阵分块复制到连续内存(RowMajor) // 将 B 矩阵分块复制到连续内存(RowMajor)
void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb, void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer); float *buffer);
// 分块矩阵乘法 // 分块矩阵乘法
void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
...@@ -51,7 +55,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, ...@@ -51,7 +55,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
void InnerKernelWithBn(int mc, int nc, float alpha, const float *a, void InnerKernelWithBn(int mc, int nc, float alpha, const float *a,
const float *b, float beta, float *c, float *C, int ldc, const float *b, float beta, float *c, float *C, int ldc,
bool relu, float *new_scale, float *new_bias); bool relu, float *new_scale, float *new_bias);
/*
// 向量矩阵乘法 (M = 1) // 向量矩阵乘法 (M = 1)
void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc, const float *B, int ldb, float beta, float *C, int ldc,
...@@ -60,10 +64,12 @@ void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -60,10 +64,12 @@ void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A, void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A,
int lda, const float *B, int ldb, float beta, float *C, int lda, const float *B, int ldb, float beta, float *C,
int ldc, bool relu, float *new_scale, float *new_bias); int ldc, bool relu, float *new_scale, float *new_bias);
*/
// 计算一个更小的 C 矩阵分块 // 计算一个更小的 C 矩阵分块
void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc); void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc);
void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc); void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc);
void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc);
// 分块矩阵乘法结果回写 // 分块矩阵乘法结果回写
// C = A * B // C = A * B
...@@ -81,6 +87,7 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale, ...@@ -81,6 +87,7 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc,
float *new_scale, float *new_bias); float *new_scale, float *new_bias);
/*
// 向量矩阵乘法结果回写 // 向量矩阵乘法结果回写
// C = A * B // C = A * B
void VecWriteBasic(int n, float *c, float *C, int ldc); void VecWriteBasic(int n, float *c, float *C, int ldc);
...@@ -96,6 +103,7 @@ void VecWriteWithBn(int n, float *c, float *C, int ldc, float *new_scale, ...@@ -96,6 +103,7 @@ void VecWriteWithBn(int n, float *c, float *C, int ldc, float *new_scale,
// C = A * B, batchnorm(C), relu(C) // C = A * B, batchnorm(C), relu(C)
void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale, void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale,
float *new_bias); float *new_bias);
*/
// 32位 float 矩阵乘法 // 32位 float 矩阵乘法
void Sgemm(int m, int n, int k, float alpha, const float *A, int lda, void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "operators/math/im2col.h" #include "operators/math/im2col.h"
#include <vector> #include <vector>
#ifdef __ARM_NEON #ifdef __ARM_NEON
#include "arm_neon.h" #include <arm_neon.h>
#endif #endif
#include "common/types.h" #include "common/types.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -69,7 +69,7 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> { ...@@ -69,7 +69,7 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
int channels_col = im_channels * filter_height * filter_width; int channels_col = im_channels * filter_height * filter_width;
const T *im_data = im.data<T>(); const T *im_data = im.data<T>();
T *col_data = col->data<T>(); T *col_data = col->data<T>();
#ifdef __ARM_NEON #if __ARM_NEON
const int osize = col_height; const int osize = col_height;
const int isize = im_height; const int isize = im_height;
bool pad1 = padding[0] > 0; bool pad1 = padding[0] > 0;
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef POOL_OP #ifdef POOL_OP
#include "pool_2x2.h" #include "operators/math/pool_2x2.h"
#include <algorithm>
#include <vector>
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -21,10 +23,10 @@ namespace math { ...@@ -21,10 +23,10 @@ namespace math {
void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#ifdef __ARM_NEON #if __ARM_NEON
#ifdef ARMV7
#if __aarch64__
#else
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -93,15 +95,16 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -93,15 +95,16 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
output_data += output_batch_stride; output_data += output_batch_stride;
} }
#endif #endif
#else
#endif #endif
} }
void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#ifdef __ARM_NEON #if __ARM_NEON
#ifdef ARMV7 #if __aarch64__
#else
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -171,12 +174,9 @@ void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -171,12 +174,9 @@ void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
input_data += input_batch_stride; input_data += input_batch_stride;
output_data += output_batch_stride; output_data += output_batch_stride;
} }
#else
// TODO(): to imp other asm
#endif #endif
#else
#endif #endif
} }
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include <omp.h> #include <omp.h>
#endif #endif
#include "framework/tensor.h" #include "framework/tensor.h"
#include "pool_3x3.h" #include "operators/math/pool_3x3.h"
#if __ARM_NEON #if __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif // __ARM_NEON #endif // __ARM_NEON
...@@ -518,6 +518,8 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { ...@@ -518,6 +518,8 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
input_data += input_batch_stride; input_data += input_batch_stride;
out_data += output_batch_stride; out_data += output_batch_stride;
} }
#else
#endif #endif
} }
...@@ -582,7 +584,18 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -582,7 +584,18 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
} }
output_seg[ph * output_width + pw] = max_value; output_seg[ph * output_width + pw] = max_value;
} else { } else {
#if defined(ARMV7) #if __aarch64__
const float32x4_t data1 = vld1q_f32(pos1);
const float32x4_t data2 = vld1q_f32(pos1 + input_width);
const float32x4_t data3 = vld1q_f32(pos1 + 2 * input_width);
const float32x4_t max_data =
vmaxq_f32(vmaxq_f32(data1, data2), data3);
float32x2_t res =
vpmax_f32(vget_high_f32(vsetq_lane_f32(-INT_MAX, max_data, 3)),
vget_low_f32(max_data));
res = vpmax_f32(res, res);
output_seg[ph * output_width + pw] = vget_lane_f32(res, 0);
#else
asm volatile( asm volatile(
"vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q1}, [%[pos1]] \n\t"
"vld1.32 {q2}, [%[pos2]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t"
...@@ -598,17 +611,6 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -598,17 +611,6 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
[pos2] "r"(pos2), [pos3] "r"(pos3), [pos2] "r"(pos2), [pos3] "r"(pos3),
[output_ptr] "r"(output_ptr), [negative_max] "r"(negative_max) [output_ptr] "r"(output_ptr), [negative_max] "r"(negative_max)
: "memory", "q1", "q2", "q3", "q4"); : "memory", "q1", "q2", "q3", "q4");
#else
const float32x4_t data1 = vld1q_f32(pos1);
const float32x4_t data2 = vld1q_f32(pos1 + input_width);
const float32x4_t data3 = vld1q_f32(pos1 + 2 * input_width);
const float32x4_t max_data =
vmaxq_f32(vmaxq_f32(data1, data2), data3);
float32x2_t res =
vpmax_f32(vget_high_f32(vsetq_lane_f32(-INT_MAX, max_data, 3)),
vget_low_f32(max_data));
res = vpmax_f32(res, res);
output_seg[ph * output_width + pw] = vget_lane_f32(res, 0);
#endif #endif
} }
} }
...@@ -676,8 +678,8 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -676,8 +678,8 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
} }
output_seg[ph * output_width + pw] = sum / 9.0; output_seg[ph * output_width + pw] = sum / 9.0;
} else { } else {
#if defined(ARMV7) #if __aarch64__
#else
asm volatile( asm volatile(
"vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q1}, [%[pos1]] \n\t"
"vld1.32 {q2}, [%[pos2]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t"
...@@ -696,7 +698,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -696,7 +698,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
[output_ptr] "r"(output_ptr), [zero] "r"(zero), [output_ptr] "r"(output_ptr), [zero] "r"(zero),
[nine_ptr] "r"(nine_ptr) [nine_ptr] "r"(nine_ptr)
: "memory", "r6", "q1", "q2", "q3", "q4"); : "memory", "r6", "q1", "q2", "q3", "q4");
#else #endif
const float32x4_t data1 = vld1q_f32(pos1); const float32x4_t data1 = vld1q_f32(pos1);
const float32x4_t data2 = vld1q_f32(pos2); const float32x4_t data2 = vld1q_f32(pos2);
const float32x4_t data3 = vld1q_f32(pos3); const float32x4_t data3 = vld1q_f32(pos3);
...@@ -707,7 +709,6 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -707,7 +709,6 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
vget_low_f32(sum_data)); vget_low_f32(sum_data));
res = vpadd_f32(res, res); res = vpadd_f32(res, res);
output_seg[ph * output_width + pw] = vget_lane_f32(res, 0) / 9.0; output_seg[ph * output_width + pw] = vget_lane_f32(res, 0) / 9.0;
#endif
} }
} }
} }
...@@ -715,6 +716,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -715,6 +716,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
input_data += input_batch_stride; input_data += input_batch_stride;
output_data += output_batch_stride; output_data += output_batch_stride;
} }
#else
#endif #endif
} }
} // namespace math } // namespace math
......
...@@ -135,6 +135,7 @@ class SoftmaxFuntor<CPU, T> { ...@@ -135,6 +135,7 @@ class SoftmaxFuntor<CPU, T> {
} }
} }
} }
#else
#endif // ARM_NEON #endif // ARM_NEON
public: public:
......
...@@ -50,7 +50,7 @@ void MulOp<Dtype, T>::InferShape() const { ...@@ -50,7 +50,7 @@ void MulOp<Dtype, T>::InferShape() const {
framework::DDim ddim = framework::make_ddim(output_dims); framework::DDim ddim = framework::make_ddim(output_dims);
this->param_.Out()->Resize(ddim); this->param_.Out()->Resize(ddim);
} }
template class MulOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -34,7 +34,7 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const { ...@@ -34,7 +34,7 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
// pre size, will change in Compute. // pre size, will change in Compute.
this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6})); this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6}));
} }
template class MultiClassNMSOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -232,7 +232,6 @@ class ConvParam : OpParam { ...@@ -232,7 +232,6 @@ class ConvParam : OpParam {
Print &operator<<(Print &printer, const ConvParam &conv_param); Print &operator<<(Print &printer, const ConvParam &conv_param);
#endif #endif
#ifdef ELEMENTWISEADD_OP
class ElementwiseAddParam : OpParam { class ElementwiseAddParam : OpParam {
public: public:
ElementwiseAddParam(const VariableNameMap &inputs, ElementwiseAddParam(const VariableNameMap &inputs,
...@@ -259,6 +258,8 @@ class ElementwiseAddParam : OpParam { ...@@ -259,6 +258,8 @@ class ElementwiseAddParam : OpParam {
int axis_; int axis_;
}; };
#ifdef FUSION_ELEMENTWISEADDRELU_OP
using ElementwiseAddReluParam = ElementwiseAddParam;
#endif #endif
#ifdef MUL_OP #ifdef MUL_OP
...@@ -371,7 +372,7 @@ class BatchNormParam : OpParam { ...@@ -371,7 +372,7 @@ class BatchNormParam : OpParam {
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope); input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs); epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs); momentum_ = GetAttr<float>("momentum", attrs);
is_test_ = GetAttr<bool>("is_test", attrs); // is_test_ = GetAttr<bool>("is_test", attrs);
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
...@@ -421,7 +422,7 @@ class PoolParam : public OpParam { ...@@ -421,7 +422,7 @@ class PoolParam : public OpParam {
strides_ = GetAttr<vector<int>>("strides", attrs); strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs); paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs); ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
gloabal_pooling_ = GetAttr<bool>("global_pooling", attrs); global_pooling_ = GetAttr<bool>("global_pooling", attrs);
} }
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
...@@ -438,7 +439,7 @@ class PoolParam : public OpParam { ...@@ -438,7 +439,7 @@ class PoolParam : public OpParam {
bool isCeilMode() const { return ceil_mode_; } bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return gloabal_pooling_; } bool isGlobalPooling() const { return global_pooling_; }
private: private:
Tensor *input_; Tensor *input_;
...@@ -448,9 +449,82 @@ class PoolParam : public OpParam { ...@@ -448,9 +449,82 @@ class PoolParam : public OpParam {
vector<int> strides_; vector<int> strides_;
vector<int> paddings_; vector<int> paddings_;
bool ceil_mode_; bool ceil_mode_;
bool gloabal_pooling_ = false; bool global_pooling_ = false;
}; };
#endif
#ifdef FUSION_POOLBN_OP
class FusionPoolBNParam : OpParam {
public:
FusionPoolBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
global_pooling_ = GetAttr<bool>("global_pooling", attrs);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const string &PoolingType() const { return pooling_type_; }
const vector<int> &Ksize() const { return ksize_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return global_pooling_; }
Tensor *OutputY() const { return output_y_; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
const string &DataFormat() const { return data_format_; }
private:
Tensor *input_;
string pooling_type_;
vector<int> ksize_;
vector<int> strides_;
vector<int> paddings_;
bool ceil_mode_;
bool global_pooling_ = false;
Tensor *output_y_;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
string data_format_;
};
#endif #endif
#ifdef PRIORBOX_OP #ifdef PRIORBOX_OP
...@@ -875,7 +949,6 @@ class PReluParam : public OpParam { ...@@ -875,7 +949,6 @@ class PReluParam : public OpParam {
}; };
#endif #endif
#ifdef FUSION_FC_OP
class FusionFcParam : public OpParam { class FusionFcParam : public OpParam {
public: public:
FusionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FusionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
...@@ -911,9 +984,11 @@ class FusionFcParam : public OpParam { ...@@ -911,9 +984,11 @@ class FusionFcParam : public OpParam {
int y_num_col_dims_; int y_num_col_dims_;
int axis_; int axis_;
}; };
#ifdef FUSION_FCRELU_OP
using FusionFcReluParam = FusionFcParam;
#endif #endif
#ifdef FUSION_CONVADD_OP
class FusionConvAddParam : public OpParam { class FusionConvAddParam : public OpParam {
public: public:
FusionConvAddParam(const VariableNameMap &inputs, FusionConvAddParam(const VariableNameMap &inputs,
...@@ -960,9 +1035,8 @@ class FusionConvAddParam : public OpParam { ...@@ -960,9 +1035,8 @@ class FusionConvAddParam : public OpParam {
}; };
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
#endif
#ifdef FUSION_CONVADD_RELU_OP #ifdef FUSION_CONVADDRELU_OP
class FusionConvAddReluParam : public FusionConvAddParam { class FusionConvAddReluParam : public FusionConvAddParam {
public: public:
FusionConvAddReluParam(const VariableNameMap &inputs, FusionConvAddReluParam(const VariableNameMap &inputs,
...@@ -993,7 +1067,7 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -993,7 +1067,7 @@ class FusionConvAddBNReluParam : public OpParam {
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope); input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs); epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs); momentum_ = GetAttr<float>("momentum", attrs);
is_test_ = GetAttr<bool>("is_test", attrs); // is_test_ = GetAttr<bool>("is_test", attrs);
} }
Tensor *Bias() const { return bias_; } Tensor *Bias() const { return bias_; }
...@@ -1055,8 +1129,91 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1055,8 +1129,91 @@ class FusionConvAddBNReluParam : public OpParam {
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
}; };
#endif
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); #ifdef FUSION_CONVADDBN_OP
class FusionConvAddBNParam : public OpParam {
public:
FusionConvAddBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
bias_ = InputYFrom<LoDTensor>(inputs, scope);
axis_ = GetAttr<int>("axis", attrs);
filter_ = FilterFrom<LoDTensor>(inputs, scope);
input_ = InputFrom<LoDTensor>(inputs, scope);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
Tensor *Bias() const { return bias_; }
const int &Axis() const { return axis_; }
const Tensor *Input() const { return input_; }
const Tensor *Filter() const { return filter_; }
Tensor *OutputY() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; }
void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; }
const Tensor *NewScale() const { return new_scale_; }
const Tensor *NewBias() const { return new_bias_; }
protected:
Tensor *bias_;
int axis_;
Tensor *input_;
Tensor *output_y_;
Tensor *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
};
#endif #endif
#ifdef FUSION_DWCONVBNRELU_OP #ifdef FUSION_DWCONVBNRELU_OP
...@@ -1078,7 +1235,7 @@ class FusionDWConvBNReluParam : public OpParam { ...@@ -1078,7 +1235,7 @@ class FusionDWConvBNReluParam : public OpParam {
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope); input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs); epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs); momentum_ = GetAttr<float>("momentum", attrs);
is_test_ = GetAttr<bool>("is_test", attrs); // is_test_ = GetAttr<bool>("is_test", attrs);
} }
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
...@@ -1139,6 +1296,85 @@ class FusionDWConvBNReluParam : public OpParam { ...@@ -1139,6 +1296,85 @@ class FusionDWConvBNReluParam : public OpParam {
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
#endif #endif
#ifdef FUSION_CONVBNRELU_OP
class FusionConvBNReluParam : public OpParam {
public:
FusionConvBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
filter_ = FilterFrom<LoDTensor>(inputs, scope);
input_ = InputFrom<LoDTensor>(inputs, scope);
output_ = OutFrom<LoDTensor>(outputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const Tensor *Filter() const { return filter_; }
Tensor *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; }
void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; }
const Tensor *NewScale() const { return new_scale_; }
const Tensor *NewBias() const { return new_bias_; }
protected:
Tensor *input_;
Tensor *output_;
Tensor *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
};
#endif
#ifdef IM2SEQUENCE_OP #ifdef IM2SEQUENCE_OP
class Im2SequenceParam : public OpParam { class Im2SequenceParam : public OpParam {
public: public:
...@@ -1190,5 +1426,9 @@ class DropoutParam : public OpParam { ...@@ -1190,5 +1426,9 @@ class DropoutParam : public OpParam {
}; };
#endif #endif
#ifdef REGION_OP
class RegionParam : public OpParam {};
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -54,7 +54,7 @@ void PoolOp<DeviceType, T>::InferShape() const { ...@@ -54,7 +54,7 @@ void PoolOp<DeviceType, T>::InferShape() const {
} }
this->param_.Output()->Resize(framework::make_ddim(output_shape)); this->param_.Output()->Resize(framework::make_ddim(output_shape));
} }
template class PoolOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -23,7 +23,7 @@ void PReluOp<Dtype, T>::InferShape() const { ...@@ -23,7 +23,7 @@ void PReluOp<Dtype, T>::InferShape() const {
auto input_dims = this->param_.InputX()->dims(); auto input_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} }
template class PReluOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -44,7 +44,7 @@ void PriorBoxOp<Dtype, T>::InferShape() const { ...@@ -44,7 +44,7 @@ void PriorBoxOp<Dtype, T>::InferShape() const {
this->param_.OutputBoxes()->Resize(framework::make_ddim(dim_vec)); this->param_.OutputBoxes()->Resize(framework::make_ddim(dim_vec));
this->param_.OutputVariances()->Resize(framework::make_ddim(dim_vec)); this->param_.OutputVariances()->Resize(framework::make_ddim(dim_vec));
} }
template class PriorBoxOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -23,7 +23,7 @@ void ReluOp<Dtype, T>::InferShape() const { ...@@ -23,7 +23,7 @@ void ReluOp<Dtype, T>::InferShape() const {
auto input_dims = this->param_.InputX()->dims(); auto input_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} }
template class ReluOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -27,7 +27,7 @@ void ReshapeOp<Dtype, T>::InferShape() const { ...@@ -27,7 +27,7 @@ void ReshapeOp<Dtype, T>::InferShape() const {
auto out_dims = ValidateShape(shape, input_x_dims); auto out_dims = ValidateShape(shape, input_x_dims);
this->param_.Out()->Resize(out_dims); this->param_.Out()->Resize(out_dims);
} }
template class ReshapeOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -24,7 +24,7 @@ void ResizeOp<Dtype, T>::InferShape() const { ...@@ -24,7 +24,7 @@ void ResizeOp<Dtype, T>::InferShape() const {
auto out_dims = CalOutputShape(this->param_); auto out_dims = CalOutputShape(this->param_);
this->param_.Out()->Resize(out_dims); this->param_.Out()->Resize(out_dims);
} }
template class ResizeOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -24,7 +24,7 @@ void ScaleOp<Dtype, T>::InferShape() const { ...@@ -24,7 +24,7 @@ void ScaleOp<Dtype, T>::InferShape() const {
auto input_dims = this->param_.InputX()->dims(); auto input_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(input_dims); this->param_.Out()->Resize(input_dims);
} }
template class ScaleOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -22,7 +22,7 @@ template <typename DeviceType, typename T> ...@@ -22,7 +22,7 @@ template <typename DeviceType, typename T>
void SigmoidOp<DeviceType, T>::InferShape() const { void SigmoidOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims()); this->param_.Out()->Resize(this->param_.InputX()->dims());
} }
template class SigmoidOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -23,7 +23,7 @@ template <typename Dtype, typename T> ...@@ -23,7 +23,7 @@ template <typename Dtype, typename T>
void SliceOp<Dtype, T>::InferShape() const { void SliceOp<Dtype, T>::InferShape() const {
/// todo: add InputShape() detection. /// todo: add InputShape() detection.
} }
template class SliceOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -22,7 +22,7 @@ template <typename DeviceType, typename T> ...@@ -22,7 +22,7 @@ template <typename DeviceType, typename T>
void SoftmaxOp<DeviceType, T>::InferShape() const { void SoftmaxOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims()); this->param_.Out()->Resize(this->param_.InputX()->dims());
} }
template class SoftmaxOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -47,7 +47,7 @@ void TransposeOp<Dtype, T>::InferShape() const { ...@@ -47,7 +47,7 @@ void TransposeOp<Dtype, T>::InferShape() const {
} }
this->param_.Out()->Resize(out_dims); this->param_.Out()->Resize(out_dims);
} }
template class TransposeOp<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
set(dir ${CMAKE_CURRENT_SOURCE_DIR}) set(dir ${CMAKE_CURRENT_SOURCE_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
if (NET STREQUAL "googlenet") if ("googlenet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-googlenet paddle-mobile) target_link_libraries(test-googlenet paddle-mobile)
elseif (NET STREQUAL "mobilenet") elseif ("mobilenet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet paddle-mobile) target_link_libraries(test-mobilenet paddle-mobile)
elseif (NET STREQUAL "yolo") elseif ("yolo" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-yolo paddle-mobile) target_link_libraries(test-yolo paddle-mobile)
elseif (NET STREQUAL "squeezenet") elseif ("squeezenet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-squeezenet paddle-mobile) target_link_libraries(test-squeezenet paddle-mobile)
elseif(NET STREQUAL "resnet") elseif("resnet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile) target_link_libraries(test-resnet paddle-mobile)
...@@ -145,6 +145,10 @@ else () ...@@ -145,6 +145,10 @@ else ()
ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-relu-op paddle-mobile) target_link_libraries(test-conv-add-relu-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif() endif()
...@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType> ...@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> { class Executor4Test : public Executor<DeviceType> {
public: public:
Executor4Test(Program<DeviceType> p, string op_type, Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false) bool use_optimize = false, int predict_op_count = 1)
: Executor<DeviceType>() { : Executor<DeviceType>() {
this->use_optimize_ = use_optimize; this->use_optimize_ = use_optimize;
this->program_ = p; this->program_ = p;
...@@ -57,12 +57,14 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -57,12 +57,14 @@ class Executor4Test : public Executor<DeviceType> {
LOG(paddle_mobile::LogLevel::kLOG_ERROR) LOG(paddle_mobile::LogLevel::kLOG_ERROR)
<< "to_predict_program_ == nullptr"; << "to_predict_program_ == nullptr";
} }
const std::vector<std::shared_ptr<BlockDesc>> blocks = const std::vector<std::shared_ptr<BlockDesc>> blocks =
this->to_predict_program_->Blocks(); this->to_predict_program_->Blocks();
for (std::shared_ptr<BlockDesc> block_desc : blocks) { for (std::shared_ptr<BlockDesc> block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (std::shared_ptr<OpDesc> op : ops) { for (int i = 0; i < ops.size(); ++i) {
if (op->Type() == op_type) { auto op = ops[i];
if (op->Type() == op_type && i < predict_op_count) {
DLOG << "匹配到: " << op->Type(); DLOG << "匹配到: " << op->Type();
/// test first meeting op in program /// test first meeting op in program
...@@ -72,11 +74,17 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -72,11 +74,17 @@ class Executor4Test : public Executor<DeviceType> {
op->Type(), op->GetInputs(), op->GetOutputs(), op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope); op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr); this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
} }
} }
} }
this->InitMemory(); this->InitMemory();
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
auto &ops = this->ops_of_block_[*to_predict_block.get()];
for (const auto &op : ops) {
op->Init();
}
} }
template <typename T = LoDTensor> template <typename T = LoDTensor>
...@@ -130,9 +138,6 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -130,9 +138,6 @@ class Executor4Test : public Executor<DeviceType> {
auto *output_tensor = con_output->GetMutable<LoDTensor>(); auto *output_tensor = con_output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>(dDim); output_tensor->mutable_data<float>(dDim);
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block = std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0); this->to_predict_program_->Block(0);
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size();
...@@ -141,6 +146,7 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -141,6 +146,7 @@ class Executor4Test : public Executor<DeviceType> {
op->Run(); op->Run();
} }
return out_tensor; return std::make_shared<paddle_mobile::framework::Tensor>(
paddle_mobile::framework::Tensor(*output_tensor));
} }
}; };
...@@ -19,7 +19,9 @@ int main() { ...@@ -19,7 +19,9 @@ int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto program = loader.Load(g_googlenet, true); // auto program = loader.Load(g_googlenet, true);
auto program = loader.Load(g_mobilenet_ssd, true);
// auto program = loader.Load(g_googlenet_combine + "/model", // auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine + // g_googlenet_combine +
// "/params", true); // "/params", true);
......
...@@ -23,7 +23,7 @@ int main() { ...@@ -23,7 +23,7 @@ int main() {
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) { if (paddle_mobile.Load(g_googlenet, optimize)) {
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost: " << time_diff(time1, time1) << "ms";
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); GetInput<float>(g_test_image_1x3x224x224, &input, dims);
......
...@@ -12,28 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,28 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <fstream> #include <iostream>
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_mobilenet_ssd, true)) { auto isok = paddle_mobile.Load(
std::string(g_mobilenet_ssd_gesture) + "/model",
std::string(g_mobilenet_ssd_gesture) + "/params", true);
// auto isok = paddle_mobile.Load(g_mobilenet_ssd, false);
if (isok) {
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 300, 300}; std::vector<int64_t> dims{1, 3, 300, 300};
Tensor input_tensor; GetInput<float>(g_hand, &input, dims);
SetupTensor<float>(&input_tensor, {1, 3, 300, 300}, static_cast<float>(0),
static_cast<float>(1));
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
auto time3 = time(); auto time3 = time();
paddle_mobile.Predict(input, dims); auto output = paddle_mobile.Predict(input, dims);
auto time4 = time(); auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
} }
return 0; return 0;
} }
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <fstream> #include <iostream>
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
...@@ -22,20 +22,23 @@ int main() { ...@@ -22,20 +22,23 @@ int main() {
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_mobilenet, true)) { if (paddle_mobile.Load(g_mobilenet, true)) {
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
Tensor input_tensor; GetInput<float>(g_test_image_1x3x224x224, &input, dims);
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1)); for (int i = 0; i < 10; ++i) {
auto time3 = time();
std::vector<float> input(input_tensor.data<float>(), auto vec_result = paddle_mobile.Predict(input, dims);
input_tensor.data<float>() + input_tensor.numel()); auto time4 = time();
auto time3 = time(); std::vector<float>::iterator biggest =
auto vec_result = paddle_mobile.Predict(input, dims); std::max_element(std::begin(vec_result), std::end(vec_result));
auto time4 = time(); std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
}
} }
return 0; return 0;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/fusion_conv_add_bn_relu_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_mobilenet, true);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU,
paddle_mobile::operators::FusionConvAddBNReluOp<
paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_bn_relu", true);
std::cout << "executor 4 test: " << std::endl;
paddle_mobile::framework::Tensor input;
GetInput<float>(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224});
// // use SetupTensor if not has local input image .
// SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
// static_cast<float>(1));
DLOG << " fuck: " << input;
auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112});
std::cout << "before predict: " << std::endl;
auto output =
executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim);
std::cout << "after predict " << std::endl;
auto output_ptr = output->data<float>();
int stride = output->numel() / 100;
for (int i = 0; i < 100; i++) {
DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride];
}
// for (int i = 0; i < 100; i++) {
// DLOG << " index:" << i << " value: "<< output_ptr[i];
// }
// for (int j = 0; j < output->numel(); ++j) {
// std::cout << " (index: " << j << " value: " << output_ptr[j] << ") ";
// }
std::cout << std::endl;
return 0;
}
...@@ -16,22 +16,29 @@ limitations under the License. */ ...@@ -16,22 +16,29 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include <random> #include <random>
#include <string>
#include <vector>
#include "common/common.h" #include "common/common.h"
#include "common/log.h" #include "common/log.h"
#include "framework/ddim.h" #include "framework/ddim.h"
#include "framework/tensor.h" #include "framework/tensor.h"
static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const std::string g_squeezenet = "../models/squeezenet"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const std::string g_googlenet = "../models/googlenet"; static const char *g_squeezenet = "../models/squeezenet";
static const std::string g_mobilenet = "../models/mobilenet"; static const char *g_googlenet = "../models/googlenet";
static const std::string g_resnet_50 = "../models/resnet_50"; static const char *g_mobilenet = "../models/mobilenet";
static const std::string g_resnet = "../models/resnet"; static const char *g_resnet_50 = "../models/resnet_50";
static const std::string g_googlenet_combine = "../models/googlenet_combine"; static const char *g_resnet = "../models/resnet";
static const std::string g_yolo = "../models/yolo"; static const char *g_googlenet_combine = "../models/googlenet_combine";
static const std::string g_test_image_1x3x224x224 = static const char *g_yolo = "../models/yolo";
static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float"; "../images/test_image_1x3x224x224_float";
static const char *g_test_image_1x3x224x224_banana =
"../images/input_3x224x224_banana";
static const char *g_hand = "../images/hand_image";
using paddle_mobile::framework::DDim; using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Tensor;
...@@ -62,9 +69,9 @@ void GetInput(const std::string &input_name, std::vector<T> *input, ...@@ -62,9 +69,9 @@ void GetInput(const std::string &input_name, std::vector<T> *input,
size *= dim; size *= dim;
} }
T *input_ptr = (T *)malloc(sizeof(T) * size); T *input_ptr = reinterpret_cast<T *>(malloc(sizeof(T) * size));
std::ifstream in(input_name, std::ios::in | std::ios::binary); std::ifstream in(input_name, std::ios::in | std::ios::binary);
in.read((char *)(input_ptr), size * sizeof(T)); in.read(reinterpret_cast<char *>(input_ptr), size * sizeof(T));
in.close(); in.close();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
input->push_back(input_ptr[i]); input->push_back(input_ptr[i]);
...@@ -79,6 +86,6 @@ void GetInput(const std::string &input_name, ...@@ -79,6 +86,6 @@ void GetInput(const std::string &input_name,
T *input_ptr = input->mutable_data<T>(dims); T *input_ptr = input->mutable_data<T>(dims);
std::ifstream in(input_name, std::ios::in | std::ios::binary); std::ifstream in(input_name, std::ios::in | std::ios::binary);
in.read((char *)(input_ptr), input->numel() * sizeof(T)); in.read(reinterpret_cast<char *>(input_ptr), input->numel() * sizeof(T));
in.close(); in.close();
} }
#!/usr/bin/env bash #!/usr/bin/env bash
NETS=""
declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet")
build_for_mac() { build_for_mac() {
if [ ! `which brew` ]; then if [ ! `which brew` ]; then
...@@ -38,7 +40,8 @@ build_for_android() { ...@@ -38,7 +40,8 @@ build_for_android() {
fi fi
if [ -z "$PLATFORM" ]; then if [ -z "$PLATFORM" ]; then
PLATFORM="arm-v7a" # Users could choose "arm-v8a" or other platforms from the command line. PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
# PLATFORM="arm-v8a"
fi fi
if [ "${PLATFORM}" = "arm-v7a" ]; then if [ "${PLATFORM}" = "arm-v7a" ]; then
...@@ -59,7 +62,8 @@ build_for_android() { ...@@ -59,7 +62,8 @@ build_for_android() {
ANDROID_PLATFORM_VERSION="android-22" ANDROID_PLATFORM_VERSION="android-22"
TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
ANDROID_ARM_MODE="arm" ANDROID_ARM_MODE="arm"
if [ $# -eq 1 ]; then
if [ "${#NETS}" > 1 ]; then
cmake .. \ cmake .. \
-B"../build/release/${PLATFORM}" \ -B"../build/release/${PLATFORM}" \
-DANDROID_ABI="${ABI}" \ -DANDROID_ABI="${ABI}" \
...@@ -69,7 +73,7 @@ build_for_android() { ...@@ -69,7 +73,7 @@ build_for_android() {
-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-DANDROID_STL=c++_static \ -DANDROID_STL=c++_static \
-DANDROID=true \ -DANDROID=true \
-DNET=$1 \ -DNET="${NETS}" \
-D"${ARM_PLATFORM}"=true -D"${ARM_PLATFORM}"=true
else else
...@@ -92,23 +96,25 @@ build_for_ios() { ...@@ -92,23 +96,25 @@ build_for_ios() {
# rm -rf "../build" # rm -rf "../build"
PLATFORM="ios" PLATFORM="ios"
MODE="Release" MODE="Release"
BUILD_DIR=../build/release/"${PLATFORM}" BUILD_DIR=../build/release/"${PLATFORM}"/
TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake"
mkdir -p "${BUILD_DIR}" mkdir -p "${BUILD_DIR}"
if [ $# -eq 1 ]; then if [ "${#NETS}" > 1 ]; then
cmake .. \ cmake .. \
-B"${BUILD_DIR}" \ -B"${BUILD_DIR}" \
-DCMAKE_BUILD_TYPE="${MODE}" \ -DCMAKE_BUILD_TYPE="${MODE}" \
-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-DIOS_PLATFORM=OS \ -DIOS_PLATFORM=OS \
-DNET=$1 \ -DIOS_ARCH="${IOS_ARCH}" \
-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-DNET="${NETS}" \
-DIS_IOS="true" -DIS_IOS="true"
else else
cmake .. \ cmake .. \
-B"${BUILD_DIR}" \ -B"${BUILD_DIR}" \
-DCMAKE_BUILD_TYPE="${MODE}" \ -DCMAKE_BUILD_TYPE="${MODE}" \
-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-DIOS_PLATFORM=OS \ -DIOS_PLATFORM=OS \
-DIOS_ARCH="${IOS_ARCH}" \
-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-DIS_IOS="true" -DIS_IOS="true"
fi fi
cd "${BUILD_DIR}" cd "${BUILD_DIR}"
...@@ -120,7 +126,7 @@ build_for_ios() { ...@@ -120,7 +126,7 @@ build_for_ios() {
} }
build_error() { build_error() {
echo "unknown argument" echo "unknown target : $1"
} }
if [ $# -lt 1 ]; then if [ $# -lt 1 ]; then
...@@ -128,31 +134,37 @@ if [ $# -lt 1 ]; then ...@@ -128,31 +134,37 @@ if [ $# -lt 1 ]; then
echo "available targets: ios|android" echo "available targets: ios|android"
echo "sample usage: ./build.sh android" echo "sample usage: ./build.sh android"
else else
if [ $# -eq 2 ]; then params=($@)
if [ $2 != "googlenet" -a $2 != "mobilenet" -a $2 != "yolo" -a $2 != "squeezenet" -a $2 != "resnet" ]; then for(( i=1; i<$#; i++ )); do
if [ $1 = "android" ]; then if [ ${i} != 1 ]; then
build_for_android NETS=$NETS$";"
elif [ $1 = "ios" ]; then fi
build_for_ios NETS=$NETS$"${params[i]}"
else done
build_error params=${@:2}
fi
else supported=false
if [ $1 = "android" ]; then for name in ${params[@]}; do
build_for_android $2 for net in ${supportedNets[@]}; do
elif [ $1 = "ios" ]; then match=false
build_for_ios $2 if [ "$name"x = "$net"x ];then
else supported=true
build_error match=true
fi break 1
fi
done
if [ "$match" = false ];then
echo "${name} not supported!"
echo "supported nets are: ${supportedNets[@]}"
exit -1
fi fi
done
if [ $1 = "android" ]; then
build_for_android
elif [ $1 = "ios" ]; then
build_for_ios
else else
if [ $1 = "android" ]; then build_error "$1"
build_for_android fi
elif [ $1 = "ios" ]; then
build_for_ios
else
build_error
fi
fi
fi fi
\ No newline at end of file
...@@ -34,6 +34,7 @@ set (CMAKE_SYSTEM_VERSION 1) ...@@ -34,6 +34,7 @@ set (CMAKE_SYSTEM_VERSION 1)
set (UNIX True) set (UNIX True)
set (APPLE True) set (APPLE True)
set (IOS True) set (IOS True)
set (IOS_ARCH armv7 armv7s arm64)
# Required as of cmake 2.8.10 # Required as of cmake 2.8.10
set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE)
...@@ -159,7 +160,6 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su ...@@ -159,7 +160,6 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su
# set the architecture for iOS # set the architecture for iOS
if (${IOS_PLATFORM} STREQUAL "OS") if (${IOS_PLATFORM} STREQUAL "OS")
set (IOS_ARCH armv7 armv7s arm64)
elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR")
set (IOS_ARCH i386) set (IOS_ARCH i386)
elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR64") elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR64")
......
BEGIN {
print "digraph G {"
}
/op:/ {
id++
opname[id] = $NF
}
/input/ {
type = "input"
para = $NF
if (input[id]) {
input[id] = input[id] "|"
}
input[id] = input[id] "<" para ">" para
}
/output/ {
type = "output"
para = $NF
if (output[id]) {
output[id] = output[id] "|"
}
output[id] = output[id] "<" para ">" para
}
/attr/ {
type = "attr"
aname = $NF
if (attr_key[id]) {
attr_key[id] = attr_key[id] "|"
attr_value[id] = attr_value[id] "|"
}
attr_key[id] = attr_key[id] $NF
}
/argument/ {
if (type == "attr") {
split($0, arr, " - ")
attr_value[id] = attr_value[id] arr[2]
} else if ((type == "input") || (type == "output")) {
if (!var2id[$NF]) {
var_id++
var[var_id] = $NF
var2id[$NF] = var_id
}
varid = var2id[$NF]
lid++
if (type == "input") {
line[lid] = "var_" varid " -> " "op_" id ":<" para ">"
if (xout[$NF]) {
xi++
xline[xi] = "xop_" xout[$NF] " -> " "xop_" id
}
} else if (type == "output") {
line[lid] = "op_" id ":<" para ">" " -> " "var_" varid
xout[$NF] = id
}
}
}
/var name/ {
varname = $NF
vid = var2id[varname]
}
/var tensor desc dim / {
if (tensor[vid]) tensor[vid] = tensor[vid] " x "
tensor[vid] = tensor[vid] $NF
}
END {
print "subgraph cluster_G0 {"
for (i = 1; i <= id; i++) {
print "xop_" i "[label=\"" i ". " opname[i] "\"]"
}
for (i = 1; i <= xi; i++) {
print xline[i]
}
print "}"
for (i = 1; i <= id; i++) {
print "op_" i "[group=op;shape=record;label=\"{{" input[i] "}|<op>" i ". " opname[i] "|{" output[i] "}}\"]"
}
for (i = 1; i <= var_id; i++) {
print "var_" i "[label=\"" var[i] " [" tensor[i] "]\"]"
}
for (i = 1; i <= lid; i++) {
print line[i]
}
for (i = 1; i <= id; i++) {
print "attr_" i "[shape=record;label=\"{" attr_key[i] "}|{" attr_value[i] "}\"]"
print "attr_" i " -> " "op_" i ":<op>"
}
print "}"
}
if (NET STREQUAL "googlenet") set(FOUND_MATCH OFF)
if ("googlenet" IN_LIST NET)
message("googlenet enabled")
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(CONV_OP ON) set(CONV_OP ON)
set(LRN_OP ON) set(LRN_OP ON)
...@@ -8,8 +10,13 @@ if (NET STREQUAL "googlenet") ...@@ -8,8 +10,13 @@ if (NET STREQUAL "googlenet")
set(POOL_OP ON) set(POOL_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
set(FUSION_CONVADD_RELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
elseif (NET STREQUAL "mobilenet")
set(FOUND_MATCH ON)
endif()
if ("mobilenet" IN_LIST NET)
message("mobilenet enabled")
set(CONV_OP ON) set(CONV_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
...@@ -21,12 +28,23 @@ elseif (NET STREQUAL "mobilenet") ...@@ -21,12 +28,23 @@ elseif (NET STREQUAL "mobilenet")
set(RESHAPE_OP ON) set(RESHAPE_OP ON)
set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
elseif (NET STREQUAL "yolo")
set(FOUND_MATCH ON)
endif()
if ("yolo" IN_LIST NET)
message("yolo enabled")
set(BATCHNORM_OP ON) set(BATCHNORM_OP ON)
set(CONV_OP ON) set(CONV_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
elseif (NET STREQUAL "squeezenet")
set(FOUND_MATCH ON)
endif()
if ("squeezenet" IN_LIST NET)
message("squeezenet enabled")
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(CONV_OP ON) set(CONV_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
...@@ -34,15 +52,45 @@ elseif (NET STREQUAL "squeezenet") ...@@ -34,15 +52,45 @@ elseif (NET STREQUAL "squeezenet")
set(POOL_OP ON) set(POOL_OP ON)
set(RESHAPE_OP ON) set(RESHAPE_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
elseif (NET STREQUAL "resnet")
set(FOUND_MATCH ON)
endif()
if ("resnet" IN_LIST NET)
message("resnet enabled")
set(CONCAT_OP ON)
set(CONV_OP ON) set(CONV_OP ON)
set(BATCHNORM_OP ON) set(RELU_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(POOL_OP ON)
set(RESHAPE_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(MUL_OP ON)
set(FOUND_MATCH ON)
endif()
if ("FPGAnets" IN_LIST NET)
message("FPGAnets enabled")
set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(FUSION_POOLBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON)
set(REGION_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
set(RELU_OP ON) set(CONCAT_OP ON)
else () set(SOFTMAX_OP ON)
set(DROPOUT_OP ON)
set(FOUND_MATCH ON)
endif()
if(NOT FOUND_MATCH)
message("--default--")
set(BATCHNORM_OP ON) set(BATCHNORM_OP ON)
set(BOXCODER_OP ON) set(BOXCODER_OP ON)
set(CONCAT_OP ON) set(CONCAT_OP ON)
...@@ -50,7 +98,7 @@ else () ...@@ -50,7 +98,7 @@ else ()
set(DEPTHWISECONV_OP ON) set(DEPTHWISECONV_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
set(CONVADDRELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
set(FUSION_FC_OP ON) set(FUSION_FC_OP ON)
set(LRN_OP ON) set(LRN_OP ON)
set(MUL_OP ON) set(MUL_OP ON)
...@@ -62,15 +110,17 @@ else () ...@@ -62,15 +110,17 @@ else ()
set(SIGMOID_OP ON) set(SIGMOID_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(TRANSPOSE_OP ON) set(TRANSPOSE_OP ON)
set(FUSION_CONVADD_RELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_DWCONVBNRELU_OP ON) set(FUSION_DWCONVBNRELU_OP ON)
set(FUSION_CONVBNRELU_OP ON)
set(PRELU_OP ON) set(PRELU_OP ON)
set(RESIZE_OP ON) set(RESIZE_OP ON)
set(SCALE_OP ON) set(SCALE_OP ON)
set(SLICE_OP ON) set(SLICE_OP ON)
set(DROPOUT_OP ON) set(DROPOUT_OP ON)
set(IM2SEQUENCE_OP ON) set(IM2SEQUENCE_OP ON)
endif()
# option(BATCHNORM_OP "" ON) # option(BATCHNORM_OP "" ON)
# option(BOXCODER_OP "" ON) # option(BOXCODER_OP "" ON)
# option(CONCAT_OP "" ON) # option(CONCAT_OP "" ON)
...@@ -78,7 +128,7 @@ else () ...@@ -78,7 +128,7 @@ else ()
# option(DEPTHWISECONV_OP "" ON) # option(DEPTHWISECONV_OP "" ON)
# option(ELEMENTWISEADD_OP "" ON) # option(ELEMENTWISEADD_OP "" ON)
# option(FUSION_CONVADD_OP "" ON) # option(FUSION_CONVADD_OP "" ON)
# option(CONVADDRELU_OP "" ON) # option(FUSION_CONVADDRELU_OP "" ON)
# option(FUSION_FC_OP "" ON) # option(FUSION_FC_OP "" ON)
# option(LRN_OP "" ON) # option(LRN_OP "" ON)
# option(MUL_OP "" ON) # option(MUL_OP "" ON)
...@@ -90,8 +140,7 @@ else () ...@@ -90,8 +140,7 @@ else ()
# option(SIGMOID_OP "" ON) # option(SIGMOID_OP "" ON)
# option(SOFTMAX_OP "" ON) # option(SOFTMAX_OP "" ON)
# option(TRANSPOSE_OP "" ON) # option(TRANSPOSE_OP "" ON)
# option(FUSION_CONVADD_RELU_OP "" ON) # endif ()
endif ()
if (BATCHNORM_OP) if (BATCHNORM_OP)
add_definitions(-DBATCHNORM_OP) add_definitions(-DBATCHNORM_OP)
...@@ -114,8 +163,8 @@ endif() ...@@ -114,8 +163,8 @@ endif()
if (FUSION_CONVADD_OP) if (FUSION_CONVADD_OP)
add_definitions(-DFUSION_CONVADD_OP) add_definitions(-DFUSION_CONVADD_OP)
endif() endif()
if (CONVADDRELU_OP) if (FUSION_CONVADDRELU_OP)
add_definitions(-DCONVADDRELU_OP) add_definitions(-DFUSION_CONVADDRELU_OP)
endif() endif()
if (FUSION_FC_OP) if (FUSION_FC_OP)
add_definitions(-DFUSION_FC_OP) add_definitions(-DFUSION_FC_OP)
...@@ -150,15 +199,17 @@ endif() ...@@ -150,15 +199,17 @@ endif()
if (TRANSPOSE_OP) if (TRANSPOSE_OP)
add_definitions(-DTRANSPOSE_OP) add_definitions(-DTRANSPOSE_OP)
endif() endif()
if (FUSION_CONVADD_RELU_OP)
add_definitions(-DFUSION_CONVADD_RELU_OP)
endif()
if (FUSION_CONVADDBNRELU_OP) if (FUSION_CONVADDBNRELU_OP)
add_definitions(-DFUSION_CONVADDBNRELU_OP) add_definitions(-DFUSION_CONVADDBNRELU_OP)
endif() endif()
if (FUSION_DWCONVBNRELU_OP) if (FUSION_DWCONVBNRELU_OP)
add_definitions(-DFUSION_DWCONVBNRELU_OP) add_definitions(-DFUSION_DWCONVBNRELU_OP)
endif() endif()
if (FUSION_CONVBNRELU_OP)
add_definitions(-DFUSION_CONVBNRELU_OP)
endif()
if (PRELU_OP) if (PRELU_OP)
add_definitions(-DPRELU_OP) add_definitions(-DPRELU_OP)
endif() endif()
...@@ -177,3 +228,20 @@ endif() ...@@ -177,3 +228,20 @@ endif()
if (IM2SEQUENCE_OP) if (IM2SEQUENCE_OP)
add_definitions(-DIM2SEQUENCE_OP) add_definitions(-DIM2SEQUENCE_OP)
endif() endif()
if (FUSION_CONVADDBN_OP)
add_definitions(-DFUSION_CONVADDBN_OP)
endif()
if (FUSION_FCRELU_OP)
add_definitions(-DFUSION_FCRELU_OP)
endif()
if (FUSION_POOLBN_OP)
add_definitions(-DFUSION_POOLBN_OP)
endif()
if (FUSION_ELEMENTWISEADDRELU_OP)
add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP)
endif()
if (REGION_OP)
add_definitions(-DREGION_OP)
endif()
cmake_minimum_required(VERSION 3.6)
project(quali)
add_definitions(-DENABLE_EXCEPTION)
set(CMAKE_CXX_STANDARD 11)
file(GLOB_RECURSE QULIFICATON_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE QULIFICATON_H src/*.h)
include_directories(. src/)
#add_library(paddle-mobile SHARED ${QULIFICATON_CC} ${QULIFICATON_H} convert.cpp)
add_executable(quantify convert.cpp ${QULIFICATON_CC} ${QULIFICATON_H})
\ No newline at end of file
# 模型量化脚本
#### 量化脚本使用指南
1. 在PaddleMobile项目目录下(如 ~/PaddleProject/paddle-mobile)
2. cd到 tools/quantification/ 目录
3. cmake编译
``` sh
cmake .
make
```
4. 运行量化脚本
```sh
./quantify (0:seperated. 1:combined ) (输入路径) (输出路径)
# quantify googlenet seperated from /Users/xiebaiyuan/PaddleProject/quali/models/googlenet to ./googlenet_min
./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min
```
*注:*
*量化工具中*
*1.seperated模型model文件默认命名为 "__model__";*
*2.combined模型的model文件默认命名为 "model",参数文件默认命名为"params";*
##### 整体如下:
以googlenet非combined为例:
```sh
cd tools/quantification/
cmake .
make
./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min
```
#include "src/enforce.h"
#include "src/var_desc.h"
#include "src/program_desc.h"
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <utility>
#include <vector>
#include "src/framework.pb-c.h"
#include "src/protobuf-c.h"
#include <fstream>
#include <iostream>
const size_t kSize64 = sizeof(uint64_t);
const size_t kSize32 = sizeof(uint32_t);
char *Get_binary_data(const std::string &filename) {
FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
rewind(file);
auto *data = new char[size];
size_t bytes_read = fread(data, 1, static_cast<size_t>(size), file);
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
fclose(file);
return data;
}
static size_t ReadBuffer(const char *file_name, uint8_t **out) {
FILE *fp;
fp = fopen(file_name, "rb");
PADDLE_MOBILE_ENFORCE(fp != nullptr, " %s open failed !", file_name);
fseek(fp, 0, SEEK_END);
auto size = static_cast<size_t>(ftell(fp));
rewind(fp);
*out = reinterpret_cast<uint8_t *>(malloc(size));
size_t cur_len = 0;
size_t nread;
while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) {
cur_len += nread;
}
fclose(fp);
return cur_len;
}
std::shared_ptr<ProgramDesc> loadParams(const std::string &model_path) {
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = nullptr;
size_t read_size = ReadBuffer(model_path.c_str(), &buf);
PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");
c_program = paddle_mobile__framework__proto__program_desc__unpack(
nullptr, read_size, buf);
PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
return originProgramDesc;
}
void LoadWithDump(const paddle_mobile::framework::VarDesc &var_desc, char *dataP, FILE *out_file) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(dataP);
// write version
fwrite(&version, kSize32, 1, out_file);
dataP += kSize32;
// 2 Lod information
auto *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, dataP, kSize64);
uint64_t lod_level = 0;
// write lod Information
fwrite(&lod_level, kSize64, 1, out_file);
delete lod_level_ptr;
dataP += kSize64;
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(dataP);
// write lod size
fwrite(&size, kSize64, 1, out_file);
(dataP) += kSize64;
std::vector<size_t> tmp(size / sizeof(size_t));
for (unsigned long &k : tmp) {
k = *reinterpret_cast<size_t *>(dataP);
(dataP) += sizeof(size_t);
}
// write lod size vector
fwrite(&tmp, sizeof(size_t), tmp.size(), out_file);
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(dataP);
// write tensor version
fwrite(&tensor_version, kSize32, 1, out_file);
(dataP) += kSize32;
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(dataP);
// write tensor desc
fwrite(&size, sizeof(int32_t), 1, out_file);
(dataP) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (dataP)[m];
}
fwrite(buf.get(), sizeof(char), static_cast<size_t>(size), out_file);
(dataP) += (sizeof(char) * size);
const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
int type_size = 0;
switch (desc.DataType()) {
case paddle_mobile::framework::VARTYPE_TYPE_FP16:
type_size = 2;
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP32:
type_size = 4;
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT32:
type_size = 4;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_BOOL:
type_size = 1;
break;
default:
break;
}
size_t tensorSize = sizeof(char) * memory_size * type_size;
memory = new char[tensorSize];
for (int n = 0; n < tensorSize; ++n) {
static_cast<char *>(memory)[n] = (dataP)[n];
}
dataP += tensorSize;
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = 0; k < memory_size; ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file);
for (int g = 0; g < memory_size; ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file);
}
}
void
quantificate_combined(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) {
auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path);
char *data = origin_data;
FILE *out_file = fopen(param_min_path.c_str(), "wb");
for (const auto &block : program->Blocks()) {
for (const auto &var_desc : block->Vars()) {
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadWithDump(*var_desc, data, out_file);
}
}
}
fclose(out_file);
delete origin_data;
}
void quantificate_seperated(const std::string model_dir, const std::string param_min_path) {
auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path;
system(shell_command.c_str());
for (const auto &block : program->Blocks()) {
for (const auto &var_desc : block->Vars()) {
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
std::string file_name = param_min_path + "/" + var_desc->Name();
FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data;
LoadWithDump(*var_desc, data, out_file);
delete origin_data;
fclose(out_file);
}
}
}
}
int main(int argc, char **argv) {
const std::string kNoteEg = "( eg: ./quantify 1 your_combined_model_path output_path or ./quantify 0 your_seperated_model_path output_path)";
PADDLE_MOBILE_ENFORCE(argc > 1, "wee need params.%s ", kNoteEg.c_str());
std::string action_type = argv[1];
PADDLE_MOBILE_ENFORCE(argc > 1 && (action_type) == "1" || action_type == "0",
"only 1 or 2 supported, current is %s %s ",
action_type.c_str(),
kNoteEg.c_str());
PADDLE_MOBILE_ENFORCE(argc > 2, "we need your model path. %s ", kNoteEg.c_str());
std::string base_path = argv[2];
PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str());
std::string output_path = argv[3];
if (action_type == "0") {
// for seperated
const std::string &seperated_min_dir = output_path;
quantificate_seperated(base_path, seperated_min_dir);
return 0;
}
if (action_type == "1") {
// for combined
const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params";
quantificate_combined(model_path, param_path, combined_min_dir);
return 0;
}
return -1;
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
set(ANDROID_ARM_NEON ON) set(ANDROID_ARM_NEON ON)
include("${CMAKE_CURRENT_LIST_DIR}/../android-cmake/android.toolchain.cmake") set(ANDROID_PIE TRUE)
\ No newline at end of file set(ANDROID_STL "c++_static")
set(ANDROID_PLATFORM "android-22")
include("${CMAKE_CURRENT_LIST_DIR}/../android-cmake/android.toolchain.cmake")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册