提交 3d475c9a 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into dev-latest

...@@ -92,3 +92,4 @@ metal/images/ ...@@ -92,3 +92,4 @@ metal/images/
metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
*.xcuserdatad/ *.xcuserdatad/
*/xcuserdata/ */xcuserdata/
/venv/
...@@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.0) ...@@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.0)
project(paddle-mobile) project(paddle-mobile)
# select the platform to build # select the platform to build
option(CPU "armv7 with neon support" ON) option(CPU "armv7 with neon support" OFF)
option(MALI_GPU "mali gpu support" OFF) option(MALI_GPU "mali gpu support" OFF)
option(FPGA "fpga support" OFF) option(FPGA "fpga support" ON)
option(USE_OPENMP "openmp support" OFF) option(USE_OPENMP "openmp support" OFF)
option(DEBUGING "enable debug mode" ON) option(DEBUGING "enable debug mode" ON)
...@@ -20,6 +20,7 @@ set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}") ...@@ -20,6 +20,7 @@ set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}")
if(IS_IOS) if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \ set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
add_compile_options(-fembed-bitcode)
else() else()
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
endif() endif()
...@@ -28,7 +29,10 @@ if(DEBUGING) ...@@ -28,7 +29,10 @@ if(DEBUGING)
message(STATUS "debugging mode") message(STATUS "debugging mode")
add_definitions(-DPADDLE_MOBILE_DEBUG) add_definitions(-DPADDLE_MOBILE_DEBUG)
else() else()
if(FPGA)
else()
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif()
endif() endif()
if(USE_EXCEPTION) if(USE_EXCEPTION)
...@@ -92,8 +96,7 @@ else() ...@@ -92,8 +96,7 @@ else()
endif() endif()
if(FPGA) if(FPGA)
set(DEBUGING ON) message("FPGA mode enabled")
add_definitions(-DPADDLE_MOBILE_DEBUG)
add_definitions(-DPADDLE_MOBILE_FPGA) add_definitions(-DPADDLE_MOBILE_FPGA)
else() else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc) file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
...@@ -176,6 +179,10 @@ if(DEBUGING) ...@@ -176,6 +179,10 @@ if(DEBUGING)
else() else()
add_subdirectory(test) add_subdirectory(test)
endif() endif()
elseif(FPGA)
add_subdirectory(test)
endif() endif()
...@@ -110,7 +110,8 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ...@@ -110,7 +110,8 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平
### 开发文档 ### 开发文档
开发文档主要是关于编译、运行等问题。做为开发者,它可以和贡献文档共同结合使用。 开发文档主要是关于编译、运行等问题。做为开发者,它可以和贡献文档共同结合使用。
[开发文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_doc.md) * [iOS](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_ios.md)
* [Android](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_android.md)
### 贡献文档 ### 贡献文档
- [贡献文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/CONTRIBUTING.md) - [贡献文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/CONTRIBUTING.md)
......
|mobilenet arm v7|1线程|2线程|4线程|
|------------|----|-----|-----|
|麒麟970(ms)|108.180|63.935|37.545|
|麒麟960(ms)|108.588|63.073|36.822|
|高通845(ms)|85.952|48.890|28.641|
|高通835(ms)|105.434|62.752|37.131|
|||||
|mobilenetssd arm v7|1线程|2线程|4线程|
|麒麟970(ms)|212.686|127.205|77.485|
|麒麟960(ms)|212.641|125.338|75.250|
|高通845(ms)|182.863|95.671|56.857|
|高通835(ms)|213.849|127.717|77.006|
|||||
|googlenet(v1) arm v7|1线程|2线程|4线程|
|麒麟970(ms)|335.288|234.559|161.295|
|麒麟960(ms)|354.443|232.642|157.815|
|高通845(ms)|282.007|173.146|122.148|
|高通835(ms)|341.250|233.354|158.554|
|||||
|squeezenet arm v7|1线程|2线程|4线程|
|麒麟970(ms)|83.726|57.944|36.923|
|麒麟960(ms)|85.835|55.762|36.496|
|高通845(ms)|71.301|41.618|28.785|
|高通835(ms)|82.407|56.176|36.455|
|||||
|yolo arm v7|1线程|2线程|4线程|
|麒麟970(ms)|129.658|79.993|49.969|
|麒麟960(ms)|130.208|78.791|48.390|
|高通845(ms)|109.244|61.736|40.600|
|高通835(ms)|130.402|80.863|50.359|
测试机型信息:
麒麟970:荣耀v10 (2.36GHz * 4 + 1.8GHz * 4)
麒麟960:华为mate9 (2.36GHz * 4 + 1.8GHz * 4)
骁龙835:小米6 (2.45GHz * 4 + 1.9GHz * 4)
骁龙845:OPPO FindX (2.80GHz * 4 + 1.8GHz * 4)
\ No newline at end of file
|mobilenetfssd|速度|
|------------|-----|
|A9(ms)|33.78|
|A10(ms)|24.05|
|A11(ms)|17.15|
|||
|genet|速度|
|A9(ms) |3.49|
|A10(ms)|2.54|
|A11(ms)|1.43|
\ No newline at end of file
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#### 以下是 paddle-mobile 代码的执行流程图: #### 以下是 paddle-mobile 代码的执行流程图:
![执行流程图](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305189473720.png) ![执行流程图](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/flow_chart.png)
#### 主要分为: Loader 模块、 Program 模块、 Executor 模块、 op 模块、 kernel 模块、scope variable Tensor 模块 #### 主要分为: Loader 模块、 Program 模块、 Executor 模块、 op 模块、 kernel 模块、scope variable Tensor 模块
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
先来看一下模型, 模型分为两种结构: 先来看一下模型, 模型分为两种结构:
一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件 一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件
![模型描述](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305190629577.png) ![模型描述](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc.png)
另一种为参数文件结合在一起的, 如下图, 红框内为模型结构描述的 protobuf 文件, 另一个文件为结合在一起的参数文件 另一种为参数文件结合在一起的, 如下图, 红框内为模型结构描述的 protobuf 文件, 另一个文件为结合在一起的参数文件
![模型描述combined](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191057130.png) ![模型描述combined](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc_combined.png)
loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu). loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu).
...@@ -161,7 +161,7 @@ sh build.sh android yolo ...@@ -161,7 +161,7 @@ sh build.sh android yolo
### 五. kernel ### 五. kernel
kernel 为 op 的底层运算实现, 主要有两个函数, Init 和 Compute, 分别用来初始化、预处理 和 运算操作, 值得提出的是, kernel 会根据泛型特化到不同的平台, 如图所示: kernel 为 op 的底层运算实现, 主要有两个函数, Init 和 Compute, 分别用来初始化、预处理 和 运算操作, 值得提出的是, kernel 会根据泛型特化到不同的平台, 如图所示:
![设备特化]![](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191401976.png) ![设备特化](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/devices.png)
不同平台的 kernel 实现, 为同一个 kernel 类不同泛型的特化实现, 目前有三个平台, arm、mali、fpga, 图中的 central-arm-func\ 目录为 op kernel 的 arm 实现, 它承担了 arm\ 目录下 kernel 的底层实现, 同时 arm 处理器作为中央处理器, central-arm-func\ 也可以作为其他协处理器的底层实现, 如: fpga 的某一个 op kernel 还没有 fpga 协处理器的实现, 就可以直接调用使用这里的 arm 实现. 不同平台的 kernel 实现, 为同一个 kernel 类不同泛型的特化实现, 目前有三个平台, arm、mali、fpga, 图中的 central-arm-func\ 目录为 op kernel 的 arm 实现, 它承担了 arm\ 目录下 kernel 的底层实现, 同时 arm 处理器作为中央处理器, central-arm-func\ 也可以作为其他协处理器的底层实现, 如: fpga 的某一个 op kernel 还没有 fpga 协处理器的实现, 就可以直接调用使用这里的 arm 实现.
......
### iOS&Android开发文档
# iOS开发文档
## 编译
```sh
# 在 paddle-mobile 目录下:
cd tools
sh build.sh ios
# 如果只想编译某个特定模型的 op, 则需执行以下命令
sh build.sh ios googlenet
# 在这个文件夹下, 你可以拿到生成的 .a 库
cd ../build/release/ios/build
```
#### 常见问题:
1. No iOS SDK's found in default search path ...
这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定,
以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
## 集成
```
将上一步生成的:
libpaddle-mobile.a
/src/ios_io/ 下的
PaddleMobile.h
```
拖入工程
#### oc 接口
接口如下:
```
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
```
# Android开发文档 # Android开发文档
用户可通过如下两种方式,交叉编译Android平台上适用的paddle-mobile库: 用户可通过如下两种方式,交叉编译Android平台上适用的paddle-mobile库:
......
# iOS开发文档
## CPU
需要: xcode
### 编译
```sh
# 在 paddle-mobile 目录下:
cd tools
sh build.sh ios
# 如果只想编译某个特定模型的 op, 则需执行以下命令
sh build.sh ios googlenet
# 在这个文件夹下, 你可以拿到生成的 .a 库
cd ../build/release/ios/build
```
#### 常见问题:
1. No iOS SDK's found in default search path ...
这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定,
以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
### 集成
```
将上一步生成的:
libpaddle-mobile.a
/src/ios_io/ 下的
PaddleMobile.h
```
拖入工程
#### oc 接口
接口如下:
```
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
```
## GPU
需要: xcode、cocoapods
```
# 在 paddle-mobile 目录下:
cd metal
pod install
open paddle-mobile.xcworkspace
```
## Paddle-Mobile ## Paddle-Mobile
This folder is used to develop metal version for ios gpu 需要: xcode、 cocoapods
```
pod install
open paddle-mobile.xcworkspace
```
Demo 所需依赖的模型可在[这里](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)下载
...@@ -62,6 +62,8 @@ const char *G_OP_TYPE_CRF = "crf_decoding"; ...@@ -62,6 +62,8 @@ const char *G_OP_TYPE_CRF = "crf_decoding";
const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp"; const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_FLATTEN = "flatten"; const char *G_OP_TYPE_FLATTEN = "flatten";
const char *G_OP_TYPE_SHAPE = "shape"; const char *G_OP_TYPE_SHAPE = "shape";
const char *G_OP_TYPE_ELEMENTWISE_MUL = "elementwise_mul";
const char *G_OP_TYPE_SUM = "sum";
const char *G_OP_TYPE_QUANTIZE = "quantize"; const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize"; const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
...@@ -115,7 +117,8 @@ std::unordered_map< ...@@ -115,7 +117,8 @@ std::unordered_map<
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}}, {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
{G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}, {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
{G_OP_TYPE_SUM, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}}, {G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
{G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}}; {G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -126,6 +126,8 @@ extern const char *G_OP_TYPE_REGION; ...@@ -126,6 +126,8 @@ extern const char *G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_FUSION_CONV_BN; extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE; extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU; extern const char *G_OP_TYPE_PRELU;
extern const char *G_OP_TYPE_SUM;
extern const char *G_OP_TYPE_ELEMENTWISE_MUL;
extern const char *G_OP_TYPE_QUANTIZE; extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE; extern const char *G_OP_TYPE_DEQUANTIZE;
......
...@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <cstdlib> #pragma once
#include <cstdlib>
#include <cstring>
#include <string>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
#pragma once
namespace paddle_mobile { namespace paddle_mobile {
template <int ID, typename Type> template <int ID, typename Type>
struct IDToType { struct IDToType {
typedef Type type_t; typedef Type type_t;
...@@ -79,13 +81,13 @@ struct Variant { ...@@ -79,13 +81,13 @@ struct Variant {
template <typename T, typename... Args> template <typename T, typename... Args>
void Set(Args &&... args) { void Set(Args &&... args) {
helper::Destroy(type_id, &data.data); helper::Destroy(type_id, data.data);
new (&data.data) T(std::forward<Args>(args)...); new (data.data) T(std::forward<Args>(args)...);
type_id = typeid(T).hash_code(); type_id = typeid(T).hash_code();
} }
void SetString(std::string &string) { void SetString(std::string &string) {
// helper::Destroy(type_id, &data); helper::Destroy(type_id, data.data);
type_id = typeid(std::string).hash_code(); type_id = typeid(std::string).hash_code();
strcpy(data.data, string.c_str()); strcpy(data.data, string.c_str());
} }
...@@ -109,7 +111,7 @@ struct Variant { ...@@ -109,7 +111,7 @@ struct Variant {
"stl lib with string copy)"); "stl lib with string copy)");
exit(0); exit(0);
} else if (type_id == typeid(T).hash_code()) { } else if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data)); return *const_cast<T *>(reinterpret_cast<const T *>(data.data));
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant"); PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant");
exit(0); exit(0);
...@@ -122,7 +124,8 @@ struct Variant { ...@@ -122,7 +124,8 @@ struct Variant {
static inline size_t invalid_type() { return typeid(void).hash_code(); } static inline size_t invalid_type() { return typeid(void).hash_code(); }
typedef VariantHelper<Ts...> helper; typedef VariantHelper<Ts...> helper;
size_t type_id; size_t type_id;
RawData<helper::size> data; // todo use an anto size to suite this.
RawData<64> data;
}; };
template <typename T> template <typename T>
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
#include "fpga/filter.h" #include "fpga/filter.h"
#include "fpga/image.h" #include "fpga/image.h"
#define FPGA_TEST_MODE #define FPGA_TEST_MODE
#define PADDLE_MOBILE_OS_LINUX // #define PADDLE_MOBILE_OS_LINUX
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
...@@ -125,6 +125,7 @@ float fp16_2_fp32(half fp16_num) { ...@@ -125,6 +125,7 @@ float fp16_2_fp32(half fp16_num) {
} }
int ComputeBasicConv(const struct ConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "======Compute Basic Conv======"; DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address << " sb_address:" << args.sb_address
...@@ -144,11 +145,11 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -144,11 +145,11 @@ int ComputeBasicConv(const struct ConvArgs &args) {
<< " stride_w:" << args.kernel.stride_w; << " stride_w:" << args.kernel.stride_w;
DLOG << " out_address:" << args.output.address DLOG << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address; << " out_scale_address:" << args.output.scale_address;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args); return do_ioctl(IOCTL_CONFIG_CONV, &args);
} }
int ComputeFpgaConv(const struct WrapperConvArgs &args) { int ComputeFpgaConv(const struct SplitConvArgs &args) {
#ifdef FPGA_TEST_MODE #ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFPGAConv==========="; DLOG << "=============ComputeFPGAConv===========";
DLOG << " filter_num:" << args.filter_num DLOG << " filter_num:" << args.filter_num
...@@ -192,8 +193,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -192,8 +193,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_TEST_MODE #ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFpgaEWAdd==========="; DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled << " const0:" << args.const0 DLOG << " relu_enabled:" << args.relu_enabled
<< " const1:" << args.const1; << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address << " image0_scale_address:" << args.image0.scale_address
<< " image0_channels:" << args.image0.channels << " image0_channels:" << args.image0.channels
...@@ -381,10 +383,10 @@ void format_concat_output(framework::Tensor *out, int height, int width, ...@@ -381,10 +383,10 @@ void format_concat_output(framework::Tensor *out, int height, int width,
out->reset_data_ptr(data_ptr); out->reset_data_ptr(data_ptr);
} }
void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, int stride_w, bool relu_enabled, int group_num, int stride_h,
int padding_h, int padding_w, float *bs_ptr) { int stride_w, int padding_h, int padding_w, float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>(); auto out_ptr = out->data<float>();
...@@ -401,8 +403,8 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -401,8 +403,8 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
arg->concat_arg.image_num = arg->split_num; arg->concat_arg.image_num = arg->split_num;
arg->concat_arg.image_out = out_ptr; arg->concat_arg.image_out = out_ptr;
arg->concat_arg.scale_out = out->scale; arg->concat_arg.scale_out = out->scale;
arg->concat_arg.height = (uint32_t)filter->dims()[2]; arg->concat_arg.height = (uint32_t)out->dims()[2];
arg->concat_arg.width = (uint32_t)filter->dims()[3]; arg->concat_arg.width = (uint32_t)out->dims()[3];
int n = arg->split_num; int n = arg->split_num;
arg->concat_arg.images_in = arg->concat_arg.images_in =
...@@ -411,7 +413,6 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -411,7 +413,6 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
(float **)fpga_malloc(n * sizeof(float *)); // NOLINT (float **)fpga_malloc(n * sizeof(float *)); // NOLINT
arg->concat_arg.channel_num = arg->concat_arg.channel_num =
(uint32_t *)fpga_malloc(n * sizeof(uint32_t)); // NOLINT (uint32_t *)fpga_malloc(n * sizeof(uint32_t)); // NOLINT
arg->concat_arg.image_out = out_ptr;
auto channel = (int)out->dims()[1]; // NOLINT auto channel = (int)out->dims()[1]; // NOLINT
int filter_num_per_div = get_filter_num_per_div(filter, group_num); int filter_num_per_div = get_filter_num_per_div(filter, group_num);
......
...@@ -89,7 +89,7 @@ struct ConcatArgs { ...@@ -89,7 +89,7 @@ struct ConcatArgs {
uint32_t width; uint32_t width;
}; };
struct WrapperConvArgs { struct SplitConvArgs {
uint32_t split_num; uint32_t split_num;
uint32_t group_num; uint32_t group_num;
uint32_t filter_num; uint32_t filter_num;
...@@ -98,6 +98,14 @@ struct WrapperConvArgs { ...@@ -98,6 +98,14 @@ struct WrapperConvArgs {
struct ConcatArgs concat_arg; struct ConcatArgs concat_arg;
}; };
struct GroupConvArgs {
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct SplitConvArgs* conv_args;
struct ConcatArgs concat_arg;
};
struct PoolingArgs { struct PoolingArgs {
int16_t mode; // mode: 0:max, 1:avg int16_t mode; // mode: 0:max, 1:avg
half kernel_reciprocal; half kernel_reciprocal;
...@@ -159,30 +167,6 @@ struct MemoryCacheArgs { ...@@ -159,30 +167,6 @@ struct MemoryCacheArgs {
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
enum FPGA_ERR_TYPE {
ERR_IOCTL_CMD = -1,
ERR_TIMEOUT = -2,
ERR_COMPLETION_TIMEOUT = -3,
ERR_INVALID_FPGA_ADDR = -4,
ERR_NOMEM = -5,
ERR_NO_RESERVE_MEM = -6,
ERR_COPY_FROM_USER = -7,
ERR_COPY_TO_USER = -8,
ERR_DEL_TIMER = -9,
ERR_ENABLE_MSI = -10,
ERR_REGISTER_IRQ = -11,
ERR_PCIE_REGISTER = -12,
ERR_PCIE_PROBE = -13,
ERR_REGISTER_BLOCK = -14,
ERR_ALLOC_GENDISK = -15,
ERR_INIT_QUEUE = -16,
ERR_WAIT = -17,
ERR_ECC_ERROR = -31,
ERR_FPGA_FAIL_STOP = -64,
ERR_FPGA_DEBUG_STOP = -113,
DEV_TMP_UNAVAILABLE = -128
};
//============================== API ============================= //============================== API =============================
int open_device(); int open_device();
...@@ -195,7 +179,7 @@ int fpga_flush(void* address, size_t size); ...@@ -195,7 +179,7 @@ int fpga_flush(void* address, size_t size);
int fpga_invalidate(void* address, size_t size); int fpga_invalidate(void* address, size_t size);
int PerformBypass(const struct BypassArgs& args); int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct WrapperConvArgs& args); int ComputeFpgaConv(const struct SplitConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args);
int ComputeFPGAConcat(const struct ConcatArgs& args); int ComputeFPGAConcat(const struct ConcatArgs& args);
...@@ -220,10 +204,10 @@ void format_bias_scale_array(float** bias_scale_array, ...@@ -220,10 +204,10 @@ void format_bias_scale_array(float** bias_scale_array,
void format_concat_output(framework::Tensor* out, int height, int width, void format_concat_output(framework::Tensor* out, int height, int width,
int image_num, uint32_t* channel_num); int image_num, uint32_t* channel_num);
void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, int stride_w, bool relu_enabled, int group_num, int stride_h,
int padding_h, int padding_w, float* bs_ptr); int stride_w, int padding_h, int padding_w, float* bs_ptr);
half fp32_2_fp16(float fp32_num); half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num); float fp16_2_fp32(half fp16_num);
......
...@@ -27,6 +27,9 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) { ...@@ -27,6 +27,9 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) {
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; (num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_per_div_after_alignment = int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, BS_NUM_ALIGNMENT); align_to_x(num_per_div_before_alignment, BS_NUM_ALIGNMENT);
if (num_per_div_before_alignment == num_per_div_after_alignment) {
return;
}
int num_element = int num_element =
2 * div_num * num_per_div_after_alignment; // including bias & scale 2 * div_num * num_per_div_after_alignment; // including bias & scale
float *ptr_aligned = float *ptr_aligned =
......
...@@ -21,7 +21,10 @@ namespace paddle_mobile { ...@@ -21,7 +21,10 @@ namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace filter { namespace filter {
int calc_division_capacity(int chw) { return 2048 / ((chw + 15) / 16) * 32; } int calc_division_capacity(int chw) {
int n = 2048 / ((chw + 15) / 16) * 32;
return n < 2048 ? n : 2048;
}
int calc_split_num(int num, int division_capacity) { int calc_split_num(int num, int division_capacity) {
return (num + division_capacity - 1) / division_capacity; return (num + division_capacity - 1) / division_capacity;
...@@ -210,12 +213,12 @@ void format_filter(float **data_in, int num, int channel, int height, int width, ...@@ -210,12 +213,12 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT); align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num = int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; (num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * div_num; int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
quantize(data_in, data_size, max); quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT char **quantize_data = (char **)data_in; // NOLINT
convert_to_hwc(quantize_data, num, channel, height, width); convert_to_hwc(quantize_data, num, channel, height, width);
align_element(quantize_data, num, chw); align_element(quantize_data, num, chw);
align_num(quantize_data, num_per_div_before_alignment, num, chw); align_num(quantize_data, num_per_div_before_alignment, num, chw);
......
...@@ -199,6 +199,12 @@ LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA); ...@@ -199,6 +199,12 @@ LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA);
#ifdef MULTICLASSNMS_OP #ifdef MULTICLASSNMS_OP
LOAD_OP1(multiclass_nms, CPU); LOAD_OP1(multiclass_nms, CPU);
#endif #endif
#ifdef SUM_OP
LOAD_OP1(sum, CPU);
#endif
#ifdef ELEMENTWISEMUL_OP
LOAD_OP1(elementwise_mul, CPU);
#endif
#ifdef SLICE_OP #ifdef SLICE_OP
LOAD_OP2(slice, CPU, MALI_GPU); LOAD_OP2(slice, CPU, MALI_GPU);
#endif #endif
...@@ -206,5 +212,8 @@ LOAD_OP2(slice, CPU, MALI_GPU); ...@@ -206,5 +212,8 @@ LOAD_OP2(slice, CPU, MALI_GPU);
LOAD_OP2(fusion_conv_bn, CPU, FPGA); LOAD_OP2(fusion_conv_bn, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn); LOAD_FUSION_MATCHER(fusion_conv_bn);
#endif #endif
#ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU)
#endif
LOAD_OP1(quantize, CPU); LOAD_OP1(quantize, CPU);
LOAD_OP1(dequantize, CPU); LOAD_OP1(dequantize, CPU);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <initializer_list>
#include <vector>
#include "framework/tensor.h"
#include "framework/tensor_util.h"
namespace paddle_mobile {
namespace framework {
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
class Vector {
public:
using value_type = T;
// Default ctor. Create empty Vector
Vector() { InitEmpty(); }
// Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T& value = T()) {
InitEmpty();
if (count != 0) {
resize(count);
T* ptr = begin();
for (size_t i = 0; i < count; ++i) {
ptr[i] = value;
}
}
}
// Ctor with init_list
Vector(std::initializer_list<T> init) {
if (init.size() == 0) {
InitEmpty();
} else {
InitByIter(init.size(), init.begin(), init.end());
}
}
// implicit cast from std::vector.
template <typename U>
Vector(const std::vector<U>& dat) { // NOLINT
if (dat.size() == 0) {
InitEmpty();
} else {
InitByIter(dat.size(), dat.begin(), dat.end());
}
}
// Copy ctor
Vector(const Vector<T>& other) { this->operator=(other); }
// Copy operator
Vector<T>& operator=(const Vector<T>& other) {
if (other.size() != 0) {
this->InitByIter(other.size(), other.begin(), other.end());
} else {
InitEmpty();
}
return *this;
}
// Move ctor
Vector(Vector<T>&& other) {
this->size_ = other.size_;
this->flag_ = other.flag_;
if (other.cuda_vec_.memory_size()) {
this->cuda_vec_.ShareDataWith(other.cuda_vec_);
}
if (other.cpu_vec_.memory_size()) {
this->cpu_vec_.ShareDataWith(other.cpu_vec_);
}
}
// CPU data access method. Mutable.
T& operator[](size_t i) {
MutableCPU();
return const_cast<T*>(cpu_vec_.data<T>())[i];
}
// CPU data access method. Immutable.
const T& operator[](size_t i) const {
// ImmutableCPU();
return cpu_vec_.data<T>()[i];
}
// std::vector iterator methods. Based on CPU data access method
size_t size() const { return size_; }
T* begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); }
T* end() {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size());
}
T& front() { return *begin(); }
T& back() {
auto it = end();
--it;
return *it;
}
const T* begin() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](0);
}
const T* end() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size());
}
const T* cbegin() const { return begin(); }
const T* cend() const { return end(); }
const T& back() const {
auto it = end();
--it;
return *it;
}
T* data() { return begin(); }
const T* data() const { return begin(); }
const T& front() const { return *begin(); }
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template <typename Iter>
void assign(Iter begin, Iter end) {
InitByIter(end - begin, begin, end);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void push_back(T elem) {
if (size_ + 1 > capacity()) {
reserve((size_ + 1) << 1);
}
*end() = elem;
++size_;
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template <typename It>
void Extend(It begin, It end) {
size_t pre_size = size_;
resize(pre_size + (end - begin));
T* ptr = this->begin() + pre_size;
for (; begin < end; ++begin, ++ptr) {
*ptr = *begin;
}
}
// resize the vector
void resize(size_t size) {
if (size + 1 <= capacity()) {
size_ = size;
} else {
MutableCPU();
Tensor cpu_tensor;
T* ptr = cpu_tensor.mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(size)}));
const T* old_ptr =
cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + size_, ptr);
}
size_ = size;
cpu_vec_.ShareDataWith(cpu_tensor);
}
}
// clear
void clear() {
size_ = 0;
flag_ = kDirty | kDataInCPU;
}
size_t capacity() const {
return cpu_vec_.memory_size() / SizeOfType(typeid(T));
}
// reserve data
void reserve(size_t size) {
size_t pre_size = size_;
resize(size);
resize(pre_size);
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator std::vector<T>() const {
std::vector<T> result;
result.resize(size());
std::copy(begin(), end(), result.begin());
return result;
}
bool operator==(const Vector<T>& other) const {
if (size() != other.size()) return false;
auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}
}
return true;
}
private:
void InitEmpty() {
size_ = 0;
flag_ = kDataInCPU;
}
template <typename Iter>
void InitByIter(size_t size, Iter begin, Iter end) {
T* ptr = this->cpu_vec_.template mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(size)}));
for (size_t i = 0; i < size; ++i) {
*ptr++ = *begin++;
}
flag_ = kDataInCPU | kDirty;
size_ = size;
}
enum DataFlag {
kDataInCPU = 0x01,
kDataInCUDA = 0x02,
// kDirty means the data has been changed in one device.
kDirty = 0x10
};
void MutableCPU() { flag_ = kDirty | kDataInCPU; }
void UnsetFlag(int flag) const { flag_ &= ~flag; }
void SetFlag(int flag) const { flag_ |= flag; }
static T& EmptyDummy() {
static T dummy = T();
return dummy;
}
mutable int flag_;
mutable Tensor cpu_vec_;
mutable Tensor cuda_vec_;
size_t size_;
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/selected_rows.h"
namespace paddle_mobile {
namespace framework {
struct ReAllocateVisitor {
ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims)
: tensor_(tensor), dims_(dims) {}
template <typename T>
void operator()() const {
framework::Tensor cpu_tensor;
T* ptr = cpu_tensor.mutable_data<T>(dims_);
const T* old_ptr =
tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + tensor_->numel(), ptr);
}
tensor_->ShareDataWith(cpu_tensor);
}
framework::Tensor* tensor_;
framework::DDim dims_;
};
// TensorCopyVisitor(value, i * value_width, *value_.get(),
// index * value_width, value_width));
struct TensorCopyVisitor {
TensorCopyVisitor(framework::Tensor* dst, int64_t dst_offset,
const framework::Tensor src, int64_t src_offset,
int64_t size)
: dst_(dst),
dst_offset_(dst_offset),
src_(src),
src_offset_(src_offset),
size_(size) {}
template <typename T>
void operator()() const {
// TODO(Yancey1989): support other place
memory::Copy(dst_->mutable_data<T>() + dst_offset_,
src_.data<T>() + src_offset_, size_ * sizeof(T));
}
framework::Tensor* dst_;
int64_t dst_offset_;
framework::Tensor src_;
int64_t src_offset_;
int64_t size_;
};
bool SelectedRows::HasKey(int64_t key) const {
return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false
: true;
}
// std::vector<int64_t> SelectedRows::Get(std::vector<int64_t> keys,
// framework::Tensor* value) const {
// PADDLE_MOBILE_ENFORCE(value->IsInitialized(),
// "The value tensor should be initialized.");
// std::vector<int64_t> non_keys;
// int64_t value_width = value_->numel() / value_->dims()[0];
// PADDLE_MOBILE_ENFORCE(value_width == value->numel() / value->dims()[0],
// "output tensor should have the same shape with table "
// "execpt the dims[0].");
//
// for (size_t i = 0; i < keys.size(); ++i) {
// int64_t index = Index(keys[i]);
// if (index == -1) {
// non_keys.push_back(keys[i]);
// } else {
// framework::VisitDataType(
// framework::ToDataType(value_->type()),
// TensorCopyVisitor(value, i * value_width, *value_.get(),
// index * value_width, value_width));
// }
// }
// return non_keys;
//}
// bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
// PADDLE_MOBILE_ENFORCE(value.IsInitialized(), "The value should be
// initialized."); if (value_->IsInitialized()) {
// PADDLE_MOBILE_ENFORCE(
// value.type() == value_->type(),
// "The type of the value should be same with the original value");
// }
// PADDLE_MOBILE_ENFORCE(value.dims()[0] == static_cast<size_t>(1),
// "The first dim of value should be 1.");
// auto index = Index(key);
// bool is_new_key = false;
// if (index == -1) {
// rows_.push_back(key);
// index = rows_.size() - 1;
// is_new_key = true;
// // whether need to resize the table
// if (static_cast<int64_t>(rows_.size()) > value_->dims()[0]) {
// auto dims = value_->dims();
// dims[0] = (dims[0] + 1) << 1;
// framework::VisitDataType(framework::ToDataType(value.type()),
// ReAllocateVisitor(value_.get(), dims));
// }
// }
//
// framework::VisitDataType(
// framework::ToDataType(value.type()),
// TensorCopyVisitor(value_.get(),
// index * value_->numel() / value_->dims()[0], value,
// static_cast<int64_t>(0), value.numel()));
// return is_new_key;
//}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "framework/lod_tensor.h"
#include "framework/mixed_vector.h"
#include "framework/tensor.h"
#include "memory/t_malloc.h"
namespace paddle_mobile {
namespace framework {
class SelectedRows {
/*
* @brief We can use the SelectedRows structure to reproduce a sparse table.
* A sparse table is a key-value structure that the key is an `int64_t`
* number,
* and the value is a Tensor which the first dimension is 0.
* You can use the following interface to operate the sparse table, and you
* can find
* some detail information from the comments of each interface:
*
* HasKey(key), whether the sparse table has the specified key.
* Set(key, value), set a key-value pair into the sparse table.
* Get(keys, value*), get value by given key list and apply it to the given
* value pointer
* with the specified offset.
*
*/
public:
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() {
height_ = 0;
value_.reset(new Tensor());
}
// platform::Place place() const { return value_->place(); }
const Tensor& value() const { return *value_; }
Tensor* mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
const Vector<int64_t>& rows() const { return rows_; }
Vector<int64_t>* mutable_rows() { return &rows_; }
void set_rows(const Vector<int64_t>& rows) { rows_ = rows; }
/*
* @brief wheter has the specified key in the table.
*
* @return true if the key is exists.
*/
bool HasKey(int64_t key) const;
/*
* @brief Get value by the key list, if the
*
* @return a list of keys which does not exists in table
*/
std::vector<int64_t> Get(std::vector<int64_t> keys,
framework::Tensor* tensor) const;
/*
* @brief Set a key-value pair into the table.
* This function will double the value memory if it's not engouth.
*
* @note:
* 1. The first dim of the value should be 1
* 2. The value should be initialized and the data type
* should be the same with the table.
*
* @return true if the key is a new one, otherwise false
*
*/
bool Set(int64_t key, const Tensor& value);
/*
* @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
return static_cast<int64_t>(-1);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
// Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here.
// SelectedRows are simply concated when adding together. Until a
// SelectedRows add a Tensor, will the duplicate rows be handled.
Vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
/*
* Serialize/Desiralize SelectedRows to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows);
void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows);
} // namespace framework
} // namespace paddle_mobile
...@@ -338,6 +338,8 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) { ...@@ -338,6 +338,8 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
for (int i = 0; i < tensor.numel(); i += stride) { for (int i = 0; i < tensor.numel(); i += stride) {
if (tensor.type() == typeid(float)) { if (tensor.type() == typeid(float)) {
printer << tensor.data<float>()[i] << " "; printer << tensor.data<float>()[i] << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(int64_t)) { } else if (tensor.type() == typeid(int64_t)) {
printer << tensor.data<int64_t>()[i] << " "; printer << tensor.data<int64_t>()[i] << " ";
} else if (tensor.type() == typeid(int8_t)) { } else if (tensor.type() == typeid(int8_t)) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#include "operators/elementwise_mul_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void ElementwiseMulOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "kernel/elementwise_mul_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class ElementwiseMulOp : public framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>> {
public:
ElementwiseMulOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#include "operators/elementwise_sub_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void ElementwiseSubOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "kernel/elementwise_sub_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class ElementwiseSubOp : public framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>> {
public:
ElementwiseSubOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -12,56 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,56 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef MUL_OP #ifdef ELEMENTWISEMUL_OP
#include "operators/kernel/mul_kernel.h" #include "operators/kernel/elementwise_mul_kernel.h"
#include "operators/kernel/central-arm-func/elementwise_mul_arm_func.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool MulKernel<FPGA, float>::Init(MulParam<FPGA> *param) { bool ElementwiseMulKernel<CPU, float>::Init(ElementwiseMulParam<CPU> *param) {
bool relu_enabled = false;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<LoDTensor *>(param->InputY());
auto out = param->Out();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
"Image channel should be equal to weight number");
int channel = (uint32_t)out->dims()[1];
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = 0;
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(),
"Filter element num should be equal to IFM element num");
int height = (uint32_t)input_x->dims()[2];
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value);
int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true; return true;
} }
template <> template <>
void MulKernel<FPGA, float>::Compute(const MulParam<FPGA> &param) const { void ElementwiseMulKernel<CPU, float>::Compute(
fpga::ComputeFpgaConv(param.FpgaArgs()); const ElementwiseMulParam<CPU> &param) const {
ElementwiseMulCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
} }
} // namespace operators } // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#include "operators/kernel/elementwise_sub_kernel.h"
#include "operators/kernel/central-arm-func/elementwise_sub_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ElementwiseSubKernel<CPU, float>::Init(ElementwiseSubParam<CPU> *param) {
return true;
}
template <>
void ElementwiseSubKernel<CPU, float>::Compute(
const ElementwiseSubParam<CPU> &param) const {
ElementwiseSubCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -31,6 +31,8 @@ void MulKernel<CPU, float>::Compute(const MulParam<CPU> &param) const { ...@@ -31,6 +31,8 @@ void MulKernel<CPU, float>::Compute(const MulParam<CPU> &param) const {
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
template class MulKernel<CPU, float>;
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#include "operators/kernel/sum_kernel.h"
#include "operators/kernel/central-arm-func/sum_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SumKernel<CPU, float>::Init(SumParam<CPU> *param) {
return true;
}
template <>
void SumKernel<CPU, float>::Compute(const SumParam<CPU> &param) const {
SumCompute<float>(param);
param.Out()->set_lod(param.Inputs()[0]->lod());
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct MulFunctor {
inline T operator()(T a, T b) const { return a * b; }
};
template <typename P>
void ElementwiseMulCompute(const ElementwiseMulParam<CPU> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
int axis = param.Axis();
ElementwiseComputeEx<MulFunctor<float>, float>(input_x, input_y, axis,
MulFunctor<float>(), Out);
}
template class ElementwiseMulKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct SubFunctor {
inline T operator()(T a, T b) const { return a - b; }
};
template <typename P>
void ElementwiseSubCompute(const ElementwiseSubParam<CPU> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
int axis = param.Axis();
ElementwiseComputeEx<SubFunctor<float>, float>(input_x, input_y, axis,
SubFunctor<float>(), Out);
}
template class ElementwiseSubKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -20,14 +20,12 @@ limitations under the License. */ ...@@ -20,14 +20,12 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "framework/tensor.h" #include "framework/tensor.h"
#include "operators/math/poly_util.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
constexpr int kOutputDim = 6;
constexpr int kBBoxSize = 4;
template <class T> template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1, bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) { const std::pair<float, T>& pair2) {
...@@ -90,6 +88,21 @@ static inline T JaccardOverlap(const T* box1, const T* box2, ...@@ -90,6 +88,21 @@ static inline T JaccardOverlap(const T* box1, const T* box2,
} }
} }
template <class T>
static inline T PolyIoU(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
T bbox1_area = math::PolyArea<T>(box1, box_size, normalized);
T bbox2_area = math::PolyArea<T>(box2, box_size, normalized);
T inter_area = math::PolyOverlapArea<T>(box1, box2, box_size, normalized);
if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
// If coordinate values are is invalid
// if area size <= 0, return 0.
return static_cast<T>(0.);
} else {
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename T> template <typename T>
static inline void NMSFast(const framework::Tensor& bbox, static inline void NMSFast(const framework::Tensor& bbox,
const framework::Tensor& scores, const framework::Tensor& scores,
...@@ -116,8 +129,14 @@ static inline void NMSFast(const framework::Tensor& bbox, ...@@ -116,8 +129,14 @@ static inline void NMSFast(const framework::Tensor& bbox,
for (size_t k = 0; k < selected_indices->size(); ++k) { for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) { if (keep) {
const int kept_idx = (*selected_indices)[k]; const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size, T overlap = T(0.);
if (box_size == 4) {
overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true); bbox_data + kept_idx * box_size, true);
} else {
overlap = PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, box_size, true);
}
keep = overlap <= adaptive_threshold; keep = overlap <= adaptive_threshold;
} else { } else {
break; break;
...@@ -190,6 +209,8 @@ void MultiClassOutput(const framework::Tensor& scores, ...@@ -190,6 +209,8 @@ void MultiClassOutput(const framework::Tensor& scores,
const std::map<int, std::vector<int>>& selected_indices, const std::map<int, std::vector<int>>& selected_indices,
framework::Tensor* outs) { framework::Tensor* outs) {
int predict_dim = scores.dims()[1]; int predict_dim = scores.dims()[1];
int box_size = bboxes.dims()[1];
int out_dim = bboxes.dims()[1] + 2;
auto* scores_data = scores.data<T>(); auto* scores_data = scores.data<T>();
auto* bboxes_data = bboxes.data<T>(); auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>(); auto* odata = outs->data<T>();
...@@ -202,11 +223,11 @@ void MultiClassOutput(const framework::Tensor& scores, ...@@ -202,11 +223,11 @@ void MultiClassOutput(const framework::Tensor& scores,
const std::vector<int>& indices = it.second; const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) { for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j]; int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize; const T* bdata = bboxes_data + idx * box_size;
odata[count * kOutputDim] = label; // label odata[count * out_dim] = label; // label
odata[count * kOutputDim + 1] = sdata[idx]; // score odata[count * out_dim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax // xmin, ymin, xmax, ymax
std::memcpy(odata + count * kOutputDim + 2, bdata, 4 * sizeof(T)); std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T));
count++; count++;
} }
} }
...@@ -256,7 +277,8 @@ void MultiClassNMSCompute(const MultiClassNMSParam<CPU>& param) { ...@@ -256,7 +277,8 @@ void MultiClassNMSCompute(const MultiClassNMSParam<CPU>& param) {
float* od = outs->mutable_data<float>({1}); float* od = outs->mutable_data<float>({1});
od[0] = -1; od[0] = -1;
} else { } else {
outs->mutable_data<float>({num_kept, kOutputDim}); int64_t out_dim = box_dim + 2;
outs->mutable_data<float>({num_kept, out_dim});
for (int64_t i = 0; i < batch_size; ++i) { for (int64_t i = 0; i < batch_size; ++i) {
framework::Tensor ins_score = input_scores->Slice(i, i + 1); framework::Tensor ins_score = input_scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim}); ins_score.Resize({class_num, predict_dim});
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include <vector>
#include "operators/math/selected_rows_functor.h"
namespace paddle_mobile {
namespace operators {
using LoDTensorArray = std::vector<LoDTensor>;
template <typename P>
void SumCompute(const SumParam<CPU> &param) {
auto inputsvars = param.InputsVars();
int N = inputsvars.size();
auto *outvar = param.OutVar();
bool in_place = outvar == inputsvars[0];
if (outvar->IsType<framework::LoDTensor>()) {
auto *out = outvar->GetMutable<LoDTensor>();
if (!in_place) {
out->mutable_data<float>();
}
auto *outptr = out->data<float>();
// auto result = Flatten(*out);
if (!in_place) {
std::fill(out->data<float>(), out->data<float>() + out->numel(), 0);
}
math::SelectedRowsAddToTensor<float> functor;
for (int i = in_place ? 1 : 0; i < N; i++) {
if (inputsvars[i]->IsType<framework::LoDTensor>()) {
auto *in_t = inputsvars[i]->Get<framework::LoDTensor>();
auto *inptr = in_t->data<float>();
if (in_t->numel() == 0) {
continue;
}
for (int j = 0; j < out->numel(); ++j) {
outptr[j] = outptr[j] + inptr[j];
}
} else if (inputsvars[i]->IsType<framework::SelectedRows>()) {
auto *in_t = inputsvars[i]->Get<framework::SelectedRows>();
functor(*in_t, out);
} else {
PADDLE_MOBILE_THROW_EXCEPTION(
"Variable type must be LoDTensor/SelectedRows.");
}
}
} else if (outvar->IsType<framework::SelectedRows>()) {
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto *in_sel0 = inputsvars[0]->Get<framework::SelectedRows>();
auto &rows = in_sel0->rows();
in0.reset(new framework::SelectedRows(rows, in_sel0->height()));
in0->mutable_value()->ShareDataWith(in_sel0->value());
}
auto get_selected_row = [&](size_t i) -> const framework::SelectedRows & {
if (i == 0 && in0) {
return *in0.get();
} else {
return *(inputsvars[i]->Get<framework::SelectedRows>());
}
};
auto *out = outvar->GetMutable<framework::SelectedRows>();
out->mutable_rows()->clear();
auto *out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto &sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<float>();
math::SelectedRowsAddTo<float> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto &sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_MOBILE_ENFORCE(out->height() == sel_row.height(),
"seletrows height != outheight");
functor(sel_row, offset, out);
offset += sel_row.value().numel();
}
} else if (outvar->IsType<LoDTensorArray>()) {
auto &out_array = *outvar->GetMutable<LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < inputsvars.size(); ++i) {
PADDLE_MOBILE_ENFORCE(inputsvars[i]->IsType<LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto *in_array = inputsvars[i]->Get<LoDTensorArray>();
for (size_t i = 0; i < in_array->size(); ++i) {
if ((*in_array)[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy((*in_array)[i], &out_array[i]);
out_array[i].set_lod((*in_array)[i].lod());
} else {
PADDLE_MOBILE_ENFORCE(out_array[i].lod() == (*in_array)[i].lod(),
"outLod != inLod");
auto *inptr = (*in_array)[i].data<float>();
auto *outptr = out_array[i].data<float>();
for (int j = 0; j < (*in_array)[i].numel(); ++j) {
outptr[j] = inptr[j] + outptr[j];
}
}
}
}
}
} else {
PADDLE_MOBILE_THROW_EXCEPTION(
"Unexpected branch, output variable type is %s", outvar->Type().name());
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class ElementwiseMulKernel
: public framework::OpKernelBase<DeviceType,
ElementwiseMulParam<DeviceType>> {
public:
void Compute(const ElementwiseMulParam<DeviceType> &param) const;
bool Init(ElementwiseMulParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class ElementwiseSubKernel
: public framework::OpKernelBase<DeviceType,
ElementwiseSubParam<DeviceType>> {
public:
void Compute(const ElementwiseSubParam<DeviceType> &param) const;
bool Init(ElementwiseSubParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -66,10 +66,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -66,10 +66,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
......
...@@ -65,10 +65,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -65,10 +65,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -47,10 +47,11 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -47,10 +47,11 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -59,10 +59,11 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -59,10 +59,11 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -59,10 +59,11 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -59,10 +59,11 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -44,6 +44,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -44,6 +44,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
int width = (uint32_t)input_x->dims()[3]; int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width; int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width})); filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value); fpga::format_fc_filter(filter, max_value);
...@@ -52,9 +53,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -52,9 +53,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, bs_ptr); 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -45,6 +45,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -45,6 +45,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
int width = (uint32_t)input_x->dims()[3]; int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width; int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width})); filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value); fpga::format_fc_filter(filter, max_value);
...@@ -53,9 +54,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -53,9 +54,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, bs_ptr); 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -27,7 +27,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -27,7 +27,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto float_input = new Tensor; auto float_input = new Tensor;
float_input->mutable_data<float>(input->dims()); float_input->mutable_data<float>({1, input->dims()[1]});
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
...@@ -56,7 +56,6 @@ void SoftmaxKernel<FPGA, float>::Compute( ...@@ -56,7 +56,6 @@ void SoftmaxKernel<FPGA, float>::Compute(
fpga::fpga_invalidate( fpga::fpga_invalidate(
(void *)in_x->data<float>(), // NOLINT (void *)in_x->data<float>(), // NOLINT
fpga::get_align_image_cw(in_x->dims()[1]) * sizeof(float)); fpga::get_align_image_cw(in_x->dims()[1]) * sizeof(float));
math::SoftmaxFuntor<CPU, float>()(in_x, out); math::SoftmaxFuntor<CPU, float>()(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size()); fpga::fpga_flush(out->data<float>(), out->memory_size());
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class SumKernel
: public framework::OpKernelBase<DeviceType, SumParam<DeviceType>> {
public:
void Compute(const SumParam<DeviceType> &param) const;
bool Init(SumParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -1667,7 +1667,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1667,7 +1667,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
const int w_times = (out_w - 2) / 3; const int w_times = (out_w - 2) / 3;
float32x4_t zero = vdupq_n_f32(0.0); float32x4_t zero = vdupq_n_f32(0.0);
for (int b = batch_size; b > 0; --b) { for (int b = batch_size; b > 0; --b) {
#pragma omp parallel for #pragma omp parallel for
for (int j = 0; j < c; j++) { for (int j = 0; j < c; j++) {
const float *input_row_ptr; const float *input_row_ptr;
float *output_row_ptr; float *output_row_ptr;
...@@ -1912,9 +1912,7 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, ...@@ -1912,9 +1912,7 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
float w20 = filter_data[6]; float w20 = filter_data[6];
float w21 = filter_data[7]; float w21 = filter_data[7];
float w22 = filter_data[8]; float w22 = filter_data[8];
float32x4_t biasv = vld1q_dup_f32(bias_data); float32x4_t biasv = vld1q_dup_f32(bias_data);
for (int i = 0; i < output_height; i += 1) { for (int i = 0; i < output_height; i += 1) {
for (int m = 0; m < output_width - 2; m += 3) { for (int m = 0; m < output_width - 2; m += 3) {
float *output_ptr = output_data + i * output_width + m; float *output_ptr = output_data + i * output_width + m;
...@@ -1949,8 +1947,9 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, ...@@ -1949,8 +1947,9 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
out0 = vmlaq_n_f32(out0, in4, w20); out0 = vmlaq_n_f32(out0, in4, w20);
out0 = vmlaq_n_f32(out0, tmp4, w21); out0 = vmlaq_n_f32(out0, tmp4, w21);
out0 = vmlaq_n_f32(out0, tmp5, w22); out0 = vmlaq_n_f32(out0, tmp5, w22);
if (if_bias) {
out0 = vaddq_f32(out0, biasv); out0 = vaddq_f32(out0, biasv);
}
vst1q_lane_f32(output_ptr, out0, 0); vst1q_lane_f32(output_ptr, out0, 0);
vst1q_lane_f32(output_ptr + 1, out0, 1); vst1q_lane_f32(output_ptr + 1, out0, 1);
vst1q_lane_f32(output_ptr + 2, out0, 2); vst1q_lane_f32(output_ptr + 2, out0, 2);
...@@ -1960,20 +1959,22 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, ...@@ -1960,20 +1959,22 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
} }
for (int j = m; j < output_width; j++) { for (int j = m; j < output_width; j++) {
output_data[i * output_width + j] = output_data[i * output_width + j] =
input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 + input_data[(2 * i) * input_width + 2 * j] * w00 +
input_data[(2 * i - 1) * input_width + 2 * j] * w01 + input_data[(2 * i) * input_width + 2 * j + 1] * w01 +
input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 + input_data[(2 * i) * input_width + 2 * j + 2] * w02 +
input_data[(2 * i) * input_width + 2 * j - 1] * w10 + input_data[(2 * i + 1) * input_width + 2 * j] * w10 +
input_data[(2 * i) * input_width + 2 * j] * w11 + input_data[(2 * i + 1) * input_width + 2 * j + 1] * w11 +
input_data[(2 * i) * input_width + 2 * j + 1] * w12 + input_data[(2 * i + 1) * input_width + 2 * j + 2] * w12 +
input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 + input_data[(2 * i + 2) * input_width + 2 * j] * w20 +
input_data[(2 * i + 1) * input_width + 2 * j] * w21 + input_data[(2 * i + 2) * input_width + 2 * j + 1] * w21 +
input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22; input_data[(2 * i + 2) * input_width + 2 * j + 2] * w22;
if (if_bias) {
output_data[i * output_width + j] += *bias_data; output_data[i * output_width + j] += *bias_data;
} }
} }
} }
} }
}
#endif #endif
} }
......
...@@ -187,29 +187,29 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, ...@@ -187,29 +187,29 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
const float *B, int ldb, float *C, int ldc, float *p, const float *B, int ldb, float *C, int ldc, float *p,
std::string mode, float *bias, float *bias1); std::string mode, float *bias, float *bias1);
/************************ 8 bit function cluster ************************/ // 8 bits function cluster begins
// 8 bit int small block inner product // 8 bits int small block inner product
void AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c, void AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
int32_t ldc); int32_t ldc);
// 8 bit int inner product // 8 bits int inner product
void InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha, void InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha,
const int8_t *a, const int8_t *b, int8_t beta, const int8_t *a, const int8_t *b, int8_t beta,
int32_t *c, int32_t *C, int32_t ldc, bool relu, int32_t *c, int32_t *C, int32_t ldc, bool relu,
int8_t *bias); int8_t *bias);
// 8 bit int pack function // 8 bits int pack function
void PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A, void PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A,
int32_t lda, int8_t *buffer); int32_t lda, int8_t *buffer);
void PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B, void PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B,
int32_t ldb, int8_t *buffer); int32_t ldb, int8_t *buffer);
// 8 bit int matrix product // 8 bits int matrix product
void Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A, void Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A,
int32_t lda, const int8_t *B, int32_t ldb, int8_t beta, int32_t *C, int32_t lda, const int8_t *B, int32_t ldb, int8_t beta, int32_t *C,
int32_t ldc, bool relu, int8_t *bias); int32_t ldc, bool relu, int8_t *bias);
// 8 bit int write back // 8 bits int write back
// C = alpha * A * B + beta * C // C = alpha * A * B + beta * C
void WriteWithAlphaBeta(int32_t mc, int32_t nc, int32_t *c, int32_t *C, void WriteWithAlphaBeta(int32_t mc, int32_t nc, int32_t *c, int32_t *C,
int32_t ldc); int32_t ldc);
...@@ -239,7 +239,7 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, ...@@ -239,7 +239,7 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
float *packedC; float *packedC;
float *zero; float *zero;
// 8 bit int // 8 bits int
int8_t *packedA_int8; int8_t *packedA_int8;
int8_t *packedB_int8; int8_t *packedB_int8;
int32_t *packedC_int8; int32_t *packedC_int8;
......
...@@ -27,7 +27,7 @@ namespace paddle_mobile { ...@@ -27,7 +27,7 @@ namespace paddle_mobile {
namespace operators { namespace operators {
namespace math { namespace math {
// 8 bit int small block inner product // 8 bits int small block inner product
void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c, void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
int32_t ldc) { int32_t ldc) {
#if __ARM_NEON #if __ARM_NEON
...@@ -36,13 +36,16 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c, ...@@ -36,13 +36,16 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
b_ptr = b; b_ptr = b;
int32_t kc1 = k >> 3; int32_t kc1 = k >> 3;
int32_t kc2 = k & 7; int32_t kc2 = k & 7;
int32_t kc3 = kc2 >> 1; int32_t kc3 = kc2 >> 2;
int32_t kc4 = kc2 & 1; int32_t kc4 = kc2 & 3;
int32_t kc5 = kc4 >> 1;
int32_t kc6 = kc4 & 1;
int32_t step = sizeof(int32_t) * ldc; int32_t step = sizeof(int32_t) * ldc;
asm volatile( asm volatile(
// q4-q15: save 48 results // q4-q15: save 48 results
"pld [%[a_ptr]] \n\t" "pld [%[a_ptr]] \n\t"
"pld [%[b_ptr]] \n\t" "pld [%[b_ptr]] \n\t"
"pld [%[b_ptr], #64] \n\t"
"vmov.s8 q4, #0 \n\t" "vmov.s8 q4, #0 \n\t"
"vmov.s8 q5, #0 \n\t" "vmov.s8 q5, #0 \n\t"
"vmov.s8 q6, #0 \n\t" "vmov.s8 q6, #0 \n\t"
...@@ -55,282 +58,344 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c, ...@@ -55,282 +58,344 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
"vmov.s8 q13, #0 \n\t" "vmov.s8 q13, #0 \n\t"
"vmov.s8 q14, #0 \n\t" "vmov.s8 q14, #0 \n\t"
"vmov.s8 q15, #0 \n\t" "vmov.s8 q15, #0 \n\t"
"mov r0, #6 \n\t" "mov r0, #12 \n\t"
"subs %[kc1], %[kc1], #1 \n\t" "subs %[kc1], %[kc1], #1 \n\t"
"blt 1f \n\t" "blt 1f \n\t"
"0: \n\t" "0: \n\t"
"vld1.s8 {d0}, [%[a_ptr]], r0 \n\t" // A col0 "pld [%[a_ptr], #64] \n\t"
"vld1.s8 {d1}, [%[a_ptr]], r0 \n\t" // A col1, q0 "pld [%[b_ptr], #128] \n\t"
// used "vld1.s8 {d0-d2}, [%[a_ptr]]! \n\t" // A 4 cols, q0 used,
"vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t" // B row0, B // 1/2 q3 used
// row1, q1
// used
"vmov.s8 q2, #0 \n\t" // q2 used "vmov.s8 q2, #0 \n\t" // q2 used
"vdup.s8 d6, d0[0] \n\t" "vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t" // B 2 rows, B row1,
"vdup.s8 d7, d1[0] \n\t" // q3 used // q1
"vmlal.s8 q2, d2, d6 \n\t" // A col00 * B "vdup.s8 d3, d0[0] \n\t" // q3 used // used
// row0 "vmlal.s8 q2, d6, d3 \n\t" // A col00 * B row0
"vmlal.s8 q2, d3, d7 \n\t" // A col10 * B "vdup.s8 d3, d0[6] \n\t" // q3 used
// row1, q3 "vmlal.s8 q2, d7, d3 \n\t" // A col10 * B row1,
// free // q3 free
"vaddw.s16 q4, q4, d4 \n\t" "vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0 "vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[1] \n\t" "vdup.s8 d3, d0[1] \n\t"
"vdup.s8 d7, d1[1] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d0[7] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t" "vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1 "vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[2] \n\t" "vdup.s8 d3, d0[2] \n\t"
"vdup.s8 d7, d1[2] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[0] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t" "vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2 "vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[3] \n\t" "vdup.s8 d3, d0[3] \n\t"
"vdup.s8 d7, d1[3] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[1] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t" "vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3 "vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0. \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[4] \n\t" "vdup.s8 d3, d0[4] \n\t"
"vdup.s8 d7, d1[4] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[2] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t" "vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4 "vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[5] \n\t" "vdup.s8 d3, d0[5] \n\t"
"vdup.s8 d7, d1[5] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[3] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t" "vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5 "vaddw.s16 q15, q15, d5 \n\t" // res row 5
"vld1.s8 {d0}, [%[a_ptr]], r0 \n\t" // A col0 "vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t" // B 2 rows, B row1,
"vld1.s8 {d1}, [%[a_ptr]], r0 \n\t" // A col1, q0 // q1
// used
"vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t" // B row0, B
// row1, q1
// used
"vmov.s8 q2, #0 \n\t" // q2 used "vmov.s8 q2, #0 \n\t" // q2 used
"vdup.s8 d6, d0[0] \n\t" "vdup.s8 d3, d1[4] \n\t" // q3 used // used
"vdup.s8 d7, d1[0] \n\t" // q3 used "vmlal.s8 q2, d6, d3 \n\t" // A col00 * B row0
"vmlal.s8 q2, d2, d6 \n\t" // A col00 * B "vdup.s8 d3, d2[2] \n\t" // q3 used
// row0 "vmlal.s8 q2, d7, d3 \n\t" // A col10 * B row1,
"vmlal.s8 q2, d3, d7 \n\t" // A col10 * B // q3 free
// row1, q3
// free
"vaddw.s16 q4, q4, d4 \n\t" "vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0 "vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[1] \n\t" "vdup.s8 d3, d1[5] \n\t"
"vdup.s8 d7, d1[1] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[3] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t" "vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1 "vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[2] \n\t" "vdup.s8 d3, d1[6] \n\t"
"vdup.s8 d7, d1[2] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[4] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t" "vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2 "vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[3] \n\t" "vdup.s8 d3, d1[7] \n\t"
"vdup.s8 d7, d1[3] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[5] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t" "vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3 "vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0. \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[4] \n\t" "vdup.s8 d3, d2[0] \n\t"
"vdup.s8 d7, d1[4] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[6] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t" "vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4 "vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[5] \n\t" "vdup.s8 d3, d2[1] \n\t"
"vdup.s8 d7, d1[5] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[7] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t" "vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5 "vaddw.s16 q15, q15, d5 \n\t" // res row 5
"vld1.s8 {d0}, [%[a_ptr]], r0 \n\t" // A col0 "vld1.s8 {d0-d2}, [%[a_ptr]]! \n\t" // A 4 cols, q0 used,
"vld1.s8 {d1}, [%[a_ptr]], r0 \n\t" // A col1, q0 // 1/2 q3 used
// used
"vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t" // B row0, B
// row1, q1
// used
"vmov.s8 q2, #0 \n\t" // q2 used "vmov.s8 q2, #0 \n\t" // q2 used
"vdup.s8 d6, d0[0] \n\t" "vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t" // B 2 rows, B row1,
"vdup.s8 d7, d1[0] \n\t" // q3 used // q1
"vmlal.s8 q2, d2, d6 \n\t" // A col00 * B "vdup.s8 d3, d0[0] \n\t" // q3 used // used
// row0 "vmlal.s8 q2, d6, d3 \n\t" // A col00 * B row0
"vmlal.s8 q2, d3, d7 \n\t" // A col10 * B "vdup.s8 d3, d0[6] \n\t" // q3 used
// row1, q3 "vmlal.s8 q2, d7, d3 \n\t" // A col10 * B row1,
// free // q3 free
"vaddw.s16 q4, q4, d4 \n\t" "vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0 "vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[1] \n\t" "vdup.s8 d3, d0[1] \n\t"
"vdup.s8 d7, d1[1] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d0[7] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t" "vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1 "vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[2] \n\t" "vdup.s8 d3, d0[2] \n\t"
"vdup.s8 d7, d1[2] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[0] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t" "vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2 "vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[3] \n\t" "vdup.s8 d3, d0[3] \n\t"
"vdup.s8 d7, d1[3] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[1] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t" "vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3 "vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0. \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[4] \n\t" "vdup.s8 d3, d0[4] \n\t"
"vdup.s8 d7, d1[4] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[2] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t" "vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4 "vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[5] \n\t" "vdup.s8 d3, d0[5] \n\t"
"vdup.s8 d7, d1[5] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d1[3] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t" "vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5 "vaddw.s16 q15, q15, d5 \n\t" // res row 5
"vld1.s8 {d0}, [%[a_ptr]], r0 \n\t" // A col0 "vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t" // B 2 rows, B row1,
"vld1.s8 {d1}, [%[a_ptr]], r0 \n\t" // A col1, q0 // q1
// used
"vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t" // B row0, B
// row1, q1
// used
"vmov.s8 q2, #0 \n\t" // q2 used "vmov.s8 q2, #0 \n\t" // q2 used
"vdup.s8 d6, d0[0] \n\t" "vdup.s8 d3, d1[4] \n\t" // q3 used // used
"vdup.s8 d7, d1[0] \n\t" // q3 used "vmlal.s8 q2, d6, d3 \n\t" // A col00 * B row0
"vmlal.s8 q2, d2, d6 \n\t" // A col00 * B "vdup.s8 d3, d2[2] \n\t" // q3 used
// row0 "vmlal.s8 q2, d7, d3 \n\t" // A col10 * B row1,
"vmlal.s8 q2, d3, d7 \n\t" // A col10 * B // q3 free
// row1, q3
// free
"vaddw.s16 q4, q4, d4 \n\t" "vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0 "vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[1] \n\t" "vdup.s8 d3, d1[5] \n\t"
"vdup.s8 d7, d1[1] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[3] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t" "vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1 "vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[2] \n\t" "vdup.s8 d3, d1[6] \n\t"
"vdup.s8 d7, d1[2] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[4] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t" "vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2 "vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[3] \n\t" "vdup.s8 d3, d1[7] \n\t"
"vdup.s8 d7, d1[3] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[5] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t" "vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3 "vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0. \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[4] \n\t" "vdup.s8 d3, d2[0] \n\t"
"vdup.s8 d7, d1[4] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[6] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t" "vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4 "vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[5] \n\t" "vdup.s8 d3, d2[1] \n\t"
"vdup.s8 d7, d1[5] \n\t" "vmlal.s8 q2, d6, d3 \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vdup.s8 d3, d2[7] \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t" "vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5 "vaddw.s16 q15, q15, d5 \n\t" // res row 5
"subs %[kc1], %[kc1], #1 \n\t" // last <8 rows "subs %[kc1], %[kc1], #1 \n\t"
"bge 0b \n\t" "bge 0b \n\t"
"1: \n\t" "1: \n\t" // last <8 rows
"subs %[kc3], %[kc3], #1 \n\t" "subs %[kc3], %[kc3], #1 \n\t"
"blt 2f \n\t"
"vld1.s8 {d0-d2}, [%[a_ptr]]! \n\t"
"vmov.s8 q2, #0 \n\t"
"vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t"
"vdup.s8 d3, d0[0] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d0[6] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d0[1] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d0[7] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d0[2] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d1[0] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d0[3] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d1[1] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d0[4] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d1[2] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d0[5] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d1[3] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5
"vld1.s8 {d6-d7}, [%[b_ptr]]! \n\t"
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d1[4] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[2] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d1[5] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[3] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d1[6] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[4] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d1[7] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[5] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d2[0] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[6] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t"
"vdup.s8 d3, d2[1] \n\t"
"vmlal.s8 q2, d6, d3 \n\t"
"vdup.s8 d3, d2[7] \n\t"
"vmlal.s8 q2, d7, d3 \n\t"
"vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5
"2: \n\t" // last <4 rows
"subs %[kc5], %[kc5], #1 \n\t"
"blt 3f \n\t" "blt 3f \n\t"
"2: \n\t" "vld1.s8 {d0, d1}, [%[a_ptr]], r0 \n\t"
"vld1.s8 {d0}, [%[a_ptr]], r0 \n\t" // A col0 "vmov.s8 q2, #0 \n\t"
"vld1.s8 {d1}, [%[a_ptr]], r0 \n\t" // A col1, q0
// used
"vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t" // B row0, B
// row1, q1
// used
"vmov.s8 q2, #0 \n\t" // q2 used
"vdup.s8 d6, d0[0] \n\t" "vdup.s8 d6, d0[0] \n\t"
"vdup.s8 d7, d1[0] \n\t" // q3 used "vld1.s8 {d2-d3}, [%[b_ptr]]! \n\t"
"vmlal.s8 q2, d2, d6 \n\t" // A col00 * B "vdup.s8 d7, d0[6] \n\t"
// row0 "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" // A col10 * B "vmlal.s8 q2, d3, d7 \n\t"
// row1, q3
// free
"vaddw.s16 q4, q4, d4 \n\t" "vaddw.s16 q4, q4, d4 \n\t"
"vaddw.s16 q5, q5, d5 \n\t" // res row 0 "vaddw.s16 q5, q5, d5 \n\t" // res row 0
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[1] \n\t" "vdup.s8 d6, d0[1] \n\t"
"vdup.s8 d7, d1[1] \n\t" "vdup.s8 d7, d0[7] \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d3, d7 \n\t"
"vaddw.s16 q6, q6, d4 \n\t" "vaddw.s16 q6, q6, d4 \n\t"
"vaddw.s16 q7, q7, d5 \n\t" // res row 1 "vaddw.s16 q7, q7, d5 \n\t" // res row 1
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[2] \n\t" "vdup.s8 d6, d0[2] \n\t"
"vdup.s8 d7, d1[2] \n\t" "vdup.s8 d7, d1[0] \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d3, d7 \n\t"
"vaddw.s16 q8, q8, d4 \n\t" "vaddw.s16 q8, q8, d4 \n\t"
"vaddw.s16 q9, q9, d5 \n\t" // res row 2 "vaddw.s16 q9, q9, d5 \n\t" // res row 2
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[3] \n\t" "vdup.s8 d6, d0[3] \n\t"
"vdup.s8 d7, d1[3] \n\t" "vdup.s8 d7, d1[1] \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d3, d7 \n\t"
"vaddw.s16 q10, q10, d4 \n\t" "vaddw.s16 q10, q10, d4 \n\t"
"vaddw.s16 q11, q11, d5 \n\t" // res row 3 "vaddw.s16 q11, q11, d5 \n\t" // res row 3
"vmov.s8 q2, #0. \n\t" "vmov.s8 q2, #0. \n\t"
"vdup.s8 d6, d0[4] \n\t" "vdup.s8 d6, d0[4] \n\t"
"vdup.s8 d7, d1[4] \n\t" "vdup.s8 d7, d1[2] \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d3, d7 \n\t"
"vaddw.s16 q12, q12, d4 \n\t" "vaddw.s16 q12, q12, d4 \n\t"
"vaddw.s16 q13, q13, d5 \n\t" // res row 4 "vaddw.s16 q13, q13, d5 \n\t" // res row 4
"vmov.s8 q2, #0 \n\t" "vmov.s8 q2, #0 \n\t"
"vdup.s8 d6, d0[5] \n\t" "vdup.s8 d6, d0[5] \n\t"
"vdup.s8 d7, d1[5] \n\t" "vdup.s8 d7, d1[3] \n\t"
"vmlal.s8 q2, d2, d6 \n\t" "vmlal.s8 q2, d2, d6 \n\t"
"vmlal.s8 q2, d3, d7 \n\t" "vmlal.s8 q2, d3, d7 \n\t"
"vaddw.s16 q14, q14, d4 \n\t" "vaddw.s16 q14, q14, d4 \n\t"
"vaddw.s16 q15, q15, d5 \n\t" // res row 5 "vaddw.s16 q15, q15, d5 \n\t" // res row 5
"subs %[kc3], %[kc3], #1 \n\t" "3: \n\t" // last <2 rows
"bge 2b \n\t" "subs %[kc6], %[kc6], #1 \n\t"
"3: \n\t" // odd, last
// row
"subs %[kc4], %[kc4], #1 \n\t"
"blt 4f \n\t" "blt 4f \n\t"
"vld1.s8 {d0}, [%[a_ptr]] \n\t" "vld1.s8 {d0}, [%[a_ptr]] \n\t"
"vld1.s8 {d1}, [%[b_ptr]] \n\t" "vld1.s8 {d1}, [%[b_ptr]] \n\t"
...@@ -367,13 +432,13 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c, ...@@ -367,13 +432,13 @@ void Gemm::AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
"vst1.32 {q14, q15}, [%[c]] \n\t" "vst1.32 {q14, q15}, [%[c]] \n\t"
: :
: [a_ptr] "r"(a_ptr), [b_ptr] "r"(b_ptr), [c] "r"(c), [kc1] "r"(kc1), : [a_ptr] "r"(a_ptr), [b_ptr] "r"(b_ptr), [c] "r"(c), [kc1] "r"(kc1),
[kc3] "r"(kc3), [kc4] "r"(kc4), [step] "r"(step) [kc3] "r"(kc3), [kc5] "r"(kc5), [kc6] "r"(kc6), [step] "r"(step)
: "cc", "memory", "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", : "cc", "memory", "r0", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
#endif #endif
} }
// 8 bit int inner product // 8 bits int inner product
void Gemm::InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha, void Gemm::InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha,
const int8_t *a, const int8_t *b, int8_t beta, const int8_t *a, const int8_t *b, int8_t beta,
int32_t *c, int32_t *C, int32_t ldc, bool relu, int32_t *c, int32_t *C, int32_t ldc, bool relu,
...@@ -410,7 +475,7 @@ void Gemm::InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha, ...@@ -410,7 +475,7 @@ void Gemm::InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha,
} }
} }
// 8 bit int PackMatrixA // 8 bits int PackMatrixA
void Gemm::PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A, void Gemm::PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A,
int32_t lda, int8_t *buffer) { int32_t lda, int8_t *buffer) {
const int32_t i_length = m - m_tail; const int32_t i_length = m - m_tail;
...@@ -465,7 +530,7 @@ void Gemm::PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A, ...@@ -465,7 +530,7 @@ void Gemm::PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A,
} }
} }
// 8 bit int PackMatrixB // 8 bits int PackMatrixB
void Gemm::PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B, void Gemm::PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B,
int32_t ldb, int8_t *buffer) { int32_t ldb, int8_t *buffer) {
const int32_t j_length = n - n_tail; const int32_t j_length = n - n_tail;
...@@ -507,7 +572,7 @@ void Gemm::PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B, ...@@ -507,7 +572,7 @@ void Gemm::PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B,
} }
} }
// 8 bit int matrix product (m*k x k*n) // 8 bits int matrix product (m*k x k*n)
void Gemm::Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A, void Gemm::Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A,
int32_t lda, const int8_t *B, int32_t ldb, int8_t beta, int32_t lda, const int8_t *B, int32_t ldb, int8_t beta,
int32_t *C, int32_t ldc, bool relu, int8_t *bias) { int32_t *C, int32_t ldc, bool relu, int8_t *bias) {
...@@ -570,7 +635,7 @@ void Gemm::Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A, ...@@ -570,7 +635,7 @@ void Gemm::Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A,
paddle_mobile::memory::Free(zero_int8); paddle_mobile::memory::Free(zero_int8);
} }
// 8 bit int write back // 8 bits int write back
// C = alpha * A * B + beta * C // C = alpha * A * B + beta * C
void Gemm::WriteWithAlphaBeta(int32_t mc, int32_t nc, int32_t *c, int32_t *C, void Gemm::WriteWithAlphaBeta(int32_t mc, int32_t nc, int32_t *c, int32_t *C,
int32_t ldc) {} int32_t ldc) {}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#include "operators/math/gpc.h"
namespace gpc {
typedef struct lmt_shape { /* Local minima table */
double y; /* Y coordinate at local minimum */
edge_node *first_bound; /* Pointer to bound list */
struct lmt_shape *next; /* Pointer to next local minimum */
} lmt_node;
typedef struct sbt_t_shape { /* Scanbeam tree */
double y; /* Scanbeam node y value */
struct sbt_t_shape *less; /* Pointer to nodes with lower y */
struct sbt_t_shape *more; /* Pointer to nodes with higher y */
} sb_tree;
typedef struct it_shape { /* Intersection table */
edge_node *ie[2]; /* Intersecting edge (bundle) pair */
gpc_vertex point; /* Point of intersection */
struct it_shape *next; /* The next intersection table node */
} it_node;
typedef struct st_shape { /* Sorted edge table */
edge_node *edge; /* Pointer to AET edge */
double xb; /* Scanbeam bottom x coordinate */
double xt; /* Scanbeam top x coordinate */
double dx; /* Change in x for a unit y increase */
struct st_shape *prev; /* Previous edge in sorted list */
} st_node;
typedef struct bbox_shape { /* Contour axis-aligned bounding box */
double xmin; /* Minimum x coordinate */
double ymin; /* Minimum y coordinate */
double xmax; /* Maximum x coordinate */
double ymax; /* Maximum y coordinate */
} bbox;
/*
===========================================================================
Global Data
===========================================================================
*/
/* Horizontal edge state transitions within scanbeam boundary */
const h_state next_h_state[3][6] = {
/* ABOVE BELOW CROSS */
/* L R L R L R */
/* NH */
{BH, TH, TH, BH, NH, NH},
/* BH */
{NH, NH, NH, NH, TH, TH},
/* TH */
{NH, NH, NH, NH, BH, BH}};
/*
===========================================================================
Private Functions
===========================================================================
*/
static void reset_it(it_node **it) {
it_node *itn;
while (*it) {
itn = (*it)->next;
gpc_free<it_node>(*it);
*it = itn;
}
}
static void reset_lmt(lmt_node **lmt) {
lmt_node *lmtn;
while (*lmt) {
lmtn = (*lmt)->next;
gpc_free<lmt_node>(*lmt);
*lmt = lmtn;
}
}
static void insert_bound(edge_node **b, edge_node *e) {
edge_node *existing_bound = NULL;
if (!*b) {
/* Link node e to the tail of the list */
*b = e;
} else {
/* Do primary sort on the x field */
if (e[0].bot.x < (*b)[0].bot.x) {
/* Insert a new node mid-list */
existing_bound = *b;
*b = e;
(*b)->next_bound = existing_bound;
} else {
if (e[0].bot.x == (*b)[0].bot.x) {
/* Do secondary sort on the dx field */
if (e[0].dx < (*b)[0].dx) {
/* Insert a new node mid-list */
existing_bound = *b;
*b = e;
(*b)->next_bound = existing_bound;
} else {
/* Head further down the list */
insert_bound(&((*b)->next_bound), e);
}
} else {
/* Head further down the list */
insert_bound(&((*b)->next_bound), e);
}
}
}
}
static edge_node **bound_list(lmt_node **lmt, double y) {
lmt_node *existing_node;
if (!*lmt) {
/* Add node onto the tail end of the LMT */
gpc_malloc<lmt_node>(*lmt, sizeof(lmt_node),
const_cast<char *>("LMT insertion"));
(*lmt)->y = y;
(*lmt)->first_bound = NULL;
(*lmt)->next = NULL;
return &((*lmt)->first_bound);
} else if (y < (*lmt)->y) {
/* Insert a new LMT node before the current node */
existing_node = *lmt;
gpc_malloc<lmt_node>(*lmt, sizeof(lmt_node),
const_cast<char *>("LMT insertion"));
(*lmt)->y = y;
(*lmt)->first_bound = NULL;
(*lmt)->next = existing_node;
return &((*lmt)->first_bound);
} else {
if (y > (*lmt)->y) {
/* Head further up the LMT */
return bound_list(&((*lmt)->next), y);
} else {
/* Use this existing LMT node */
return &((*lmt)->first_bound);
}
}
}
static void add_to_sbtree(int *entries, sb_tree **sbtree, double y) {
if (!*sbtree) {
/* Add a new tree node here */
gpc_malloc<sb_tree>(*sbtree, sizeof(sb_tree),
const_cast<char *>("scanbeam tree insertion"));
(*sbtree)->y = y;
(*sbtree)->less = NULL;
(*sbtree)->more = NULL;
(*entries)++;
} else {
if ((*sbtree)->y > y) {
/* Head into the 'less' sub-tree */
add_to_sbtree(entries, &((*sbtree)->less), y);
} else {
if ((*sbtree)->y < y) {
/* Head into the 'more' sub-tree */
add_to_sbtree(entries, &((*sbtree)->more), y);
}
}
}
}
static void build_sbt(int *entries, double *sbt, sb_tree *sbtree) {
if (sbtree->less) {
build_sbt(entries, sbt, sbtree->less);
}
sbt[*entries] = sbtree->y;
(*entries)++;
if (sbtree->more) {
build_sbt(entries, sbt, sbtree->more);
}
}
static void free_sbtree(sb_tree **sbtree) {
if (*sbtree) {
free_sbtree(&((*sbtree)->less));
free_sbtree(&((*sbtree)->more));
gpc_free<sb_tree>(*sbtree);
}
}
static int count_optimal_vertices(gpc_vertex_list c) {
int result = 0;
int i = 0;
/* Ignore non-contributing contours */
if (c.num_vertices > 0) {
for (i = 0; i < c.num_vertices; i++) {
/* Ignore superfluous vertices embedded in horizontal edges */
if (gpc_optimal(c.vertex, i, c.num_vertices)) {
result++;
}
}
}
return result;
}
static edge_node *build_lmt(lmt_node **lmt, sb_tree **sbtree, int *sbt_entries,
gpc_polygon *p, int type, gpc_op op) {
int c = 0;
int i = 0;
int min = 0;
int max = 0;
int num_edges = 0;
int v = 0;
int num_vertices = 0;
int total_vertices = 0;
int e_index = 0;
edge_node *e = NULL;
edge_node *edge_table = NULL;
for (c = 0; c < p->num_contours; c++) {
total_vertices += count_optimal_vertices(p->contour[c]);
}
/* Create the entire input polygon edge table in one go */
gpc_malloc<edge_node>(edge_table, total_vertices * sizeof(edge_node),
const_cast<char *>("edge table creation"));
for (c = 0; c < p->num_contours; c++) {
if (p->contour[c].num_vertices < 0) {
/* Ignore the non-contributing contour and repair the vertex count */
p->contour[c].num_vertices = -p->contour[c].num_vertices;
} else {
/* Perform contour optimisation */
num_vertices = 0;
for (i = 0; i < p->contour[c].num_vertices; i++) {
if (gpc_optimal(p->contour[c].vertex, i, p->contour[c].num_vertices)) {
edge_table[num_vertices].vertex.x = p->contour[c].vertex[i].x;
edge_table[num_vertices].vertex.y = p->contour[c].vertex[i].y;
/* Record vertex in the scanbeam table */
add_to_sbtree(sbt_entries, sbtree, edge_table[num_vertices].vertex.y);
num_vertices++;
}
}
/* Do the contour forward pass */
for (min = 0; min < num_vertices; min++) {
/* If a forward local minimum... */
if (gpc_fwd_min(edge_table, min, num_vertices)) {
/* Search for the next local maximum... */
num_edges = 1;
max = gpc_next_index(min, num_vertices);
while (gpc_not_fmax(edge_table, max, num_vertices)) {
num_edges++;
max = gpc_next_index(max, num_vertices);
}
/* Build the next edge list */
e = &edge_table[e_index];
e_index += num_edges;
v = min;
e[0].bstate[BELOW] = UNBUNDLED;
e[0].bundle[BELOW][CLIP] = 0;
e[0].bundle[BELOW][SUBJ] = 0;
for (i = 0; i < num_edges; i++) {
e[i].xb = edge_table[v].vertex.x;
e[i].bot.x = edge_table[v].vertex.x;
e[i].bot.y = edge_table[v].vertex.y;
v = gpc_next_index(v, num_vertices);
e[i].top.x = edge_table[v].vertex.x;
e[i].top.y = edge_table[v].vertex.y;
e[i].dx = (edge_table[v].vertex.x - e[i].bot.x) /
(e[i].top.y - e[i].bot.y);
e[i].type = type;
e[i].outp[ABOVE] = NULL;
e[i].outp[BELOW] = NULL;
e[i].next = NULL;
e[i].prev = NULL;
e[i].succ =
((num_edges > 1) && (i < (num_edges - 1))) ? &(e[i + 1]) : NULL;
e[i].pred = ((num_edges > 1) && (i > 0)) ? &(e[i - 1]) : NULL;
e[i].next_bound = NULL;
e[i].bside[CLIP] = (op == GPC_DIFF) ? RIGHT : LEFT;
e[i].bside[SUBJ] = LEFT;
}
insert_bound(bound_list(lmt, edge_table[min].vertex.y), e);
}
}
/* Do the contour reverse pass */
for (min = 0; min < num_vertices; min++) {
/* If a reverse local minimum... */
if (gpc_rev_min(edge_table, min, num_vertices)) {
/* Search for the previous local maximum... */
num_edges = 1;
max = gpc_prev_index(min, num_vertices);
while (gpc_not_rmax(edge_table, max, num_vertices)) {
num_edges++;
max = gpc_prev_index(max, num_vertices);
}
/* Build the previous edge list */
e = &edge_table[e_index];
e_index += num_edges;
v = min;
e[0].bstate[BELOW] = UNBUNDLED;
e[0].bundle[BELOW][CLIP] = 0;
e[0].bundle[BELOW][SUBJ] = 0;
for (i = 0; i < num_edges; i++) {
e[i].xb = edge_table[v].vertex.x;
e[i].bot.x = edge_table[v].vertex.x;
e[i].bot.y = edge_table[v].vertex.y;
v = gpc_prev_index(v, num_vertices);
e[i].top.x = edge_table[v].vertex.x;
e[i].top.y = edge_table[v].vertex.y;
e[i].dx = (edge_table[v].vertex.x - e[i].bot.x) /
(e[i].top.y - e[i].bot.y);
e[i].type = type;
e[i].outp[ABOVE] = NULL;
e[i].outp[BELOW] = NULL;
e[i].next = NULL;
e[i].prev = NULL;
e[i].succ =
((num_edges > 1) && (i < (num_edges - 1))) ? &(e[i + 1]) : NULL;
e[i].pred = ((num_edges > 1) && (i > 0)) ? &(e[i - 1]) : NULL;
e[i].next_bound = NULL;
e[i].bside[CLIP] = (op == GPC_DIFF) ? RIGHT : LEFT;
e[i].bside[SUBJ] = LEFT;
}
insert_bound(bound_list(lmt, edge_table[min].vertex.y), e);
}
}
}
}
return edge_table;
} // NOLINT
static void add_edge_to_aet(edge_node **aet, edge_node *edge, edge_node *prev) {
if (!*aet) {
/* Append edge onto the tail end of the AET */
*aet = edge;
edge->prev = prev;
edge->next = NULL;
} else {
/* Do primary sort on the xb field */
if (edge->xb < (*aet)->xb) {
/* Insert edge here (before the AET edge) */
edge->prev = prev;
edge->next = *aet;
(*aet)->prev = edge;
*aet = edge;
} else {
if (edge->xb == (*aet)->xb) {
/* Do secondary sort on the dx field */
if (edge->dx < (*aet)->dx) {
/* Insert edge here (before the AET edge) */
edge->prev = prev;
edge->next = *aet;
(*aet)->prev = edge;
*aet = edge;
} else {
/* Head further into the AET */
add_edge_to_aet(&((*aet)->next), edge, *aet);
}
} else {
/* Head further into the AET */
add_edge_to_aet(&((*aet)->next), edge, *aet);
}
}
}
}
static void add_intersection(it_node **it, edge_node *edge0, edge_node *edge1,
double x, double y) {
it_node *existing_node;
if (!*it) {
/* Append a new node to the tail of the list */
gpc_malloc<it_node>(*it, sizeof(it_node),
const_cast<char *>("IT insertion"));
(*it)->ie[0] = edge0;
(*it)->ie[1] = edge1;
(*it)->point.x = x;
(*it)->point.y = y;
(*it)->next = NULL;
} else {
if ((*it)->point.y > y) {
/* Insert a new node mid-list */
existing_node = *it;
gpc_malloc<it_node>(*it, sizeof(it_node),
const_cast<char *>("IT insertion"));
(*it)->ie[0] = edge0;
(*it)->ie[1] = edge1;
(*it)->point.x = x;
(*it)->point.y = y;
(*it)->next = existing_node;
} else {
/* Head further down the list */
add_intersection(&((*it)->next), edge0, edge1, x, y);
}
}
}
static void add_st_edge(st_node **st, it_node **it, edge_node *edge,
double dy) {
st_node *existing_node;
double den = 0.0;
double r = 0.0;
double x = 0.0;
double y = 0.0;
if (!*st) {
/* Append edge onto the tail end of the ST */
gpc_malloc<st_node>(*st, sizeof(st_node),
const_cast<char *>("ST insertion"));
(*st)->edge = edge;
(*st)->xb = edge->xb;
(*st)->xt = edge->xt;
(*st)->dx = edge->dx;
(*st)->prev = NULL;
} else {
den = ((*st)->xt - (*st)->xb) - (edge->xt - edge->xb);
/* If new edge and ST edge don't cross */
if ((edge->xt >= (*st)->xt) || (edge->dx == (*st)->dx) ||
(fabs(den) <= DBL_EPSILON)) {
/* No intersection - insert edge here (before the ST edge) */
existing_node = *st;
gpc_malloc<st_node>(*st, sizeof(st_node),
const_cast<char *>("ST insertion"));
(*st)->edge = edge;
(*st)->xb = edge->xb;
(*st)->xt = edge->xt;
(*st)->dx = edge->dx;
(*st)->prev = existing_node;
} else {
/* Compute intersection between new edge and ST edge */
r = (edge->xb - (*st)->xb) / den;
x = (*st)->xb + r * ((*st)->xt - (*st)->xb);
y = r * dy;
/* Insert the edge pointers and the intersection point in the IT */
add_intersection(it, (*st)->edge, edge, x, y);
/* Head further into the ST */
add_st_edge(&((*st)->prev), it, edge, dy);
}
}
}
static void build_intersection_table(it_node **it, edge_node *aet, double dy) {
st_node *st;
st_node *stp;
edge_node *edge = NULL;
/* Build intersection table for the current scanbeam */
reset_it(it);
st = NULL;
/* Process each AET edge */
for (edge = aet; edge; edge = edge->next) {
if ((edge->bstate[ABOVE] == BUNDLE_HEAD) || edge->bundle[ABOVE][CLIP] ||
edge->bundle[ABOVE][SUBJ]) {
add_st_edge(&st, it, edge, dy);
}
}
/* Free the sorted edge table */
while (st) {
stp = st->prev;
gpc_free<st_node>(st);
st = stp;
}
}
static int count_contours(polygon_node *polygon) {
int nc = 0;
int nv = 0;
vertex_node *v = NULL;
vertex_node *nextv = NULL;
for (nc = 0; polygon; polygon = polygon->next) {
if (polygon->active) {
/* Count the vertices in the current contour */
nv = 0;
for (v = polygon->proxy->v[LEFT]; v; v = v->next) {
nv++;
}
/* Record valid vertex counts in the active field */
if (nv > 2) {
polygon->active = nv;
nc++;
} else {
/* Invalid contour: just free the heap */
for (v = polygon->proxy->v[LEFT]; v; v = nextv) {
nextv = v->next;
gpc_free<vertex_node>(v);
}
polygon->active = 0;
}
}
}
return nc;
}
static void add_left(polygon_node *p, double x, double y) {
vertex_node *nv = NULL;
/* Create a new vertex node and set its fields */
gpc_malloc<vertex_node>(nv, sizeof(vertex_node),
const_cast<char *>("vertex node creation"));
nv->x = x;
nv->y = y;
/* Add vertex nv to the left end of the polygon's vertex list */
nv->next = p->proxy->v[LEFT];
/* Update proxy->[LEFT] to point to nv */
p->proxy->v[LEFT] = nv;
}
static void merge_left(polygon_node *p, polygon_node *q, polygon_node *list) {
polygon_node *target = NULL;
/* Label contour as a hole */
q->proxy->hole = 1;
if (p->proxy != q->proxy) {
/* Assign p's vertex list to the left end of q's list */
p->proxy->v[RIGHT]->next = q->proxy->v[LEFT];
q->proxy->v[LEFT] = p->proxy->v[LEFT];
/* Redirect any p->proxy references to q->proxy */
for (target = p->proxy; list; list = list->next) {
if (list->proxy == target) {
list->active = 0;
list->proxy = q->proxy;
}
}
}
}
static void add_right(polygon_node *p, double x, double y) {
vertex_node *nv = NULL;
/* Create a new vertex node and set its fields */
gpc_malloc<vertex_node>(nv, sizeof(vertex_node),
const_cast<char *>("vertex node creation"));
nv->x = x;
nv->y = y;
nv->next = NULL;
/* Add vertex nv to the right end of the polygon's vertex list */
p->proxy->v[RIGHT]->next = nv;
/* Update proxy->v[RIGHT] to point to nv */
p->proxy->v[RIGHT] = nv;
}
static void merge_right(polygon_node *p, polygon_node *q, polygon_node *list) {
polygon_node *target = NULL;
/* Label contour as external */
q->proxy->hole = 0;
if (p->proxy != q->proxy) {
/* Assign p's vertex list to the right end of q's list */
q->proxy->v[RIGHT]->next = p->proxy->v[LEFT];
q->proxy->v[RIGHT] = p->proxy->v[RIGHT];
/* Redirect any p->proxy references to q->proxy */
for (target = p->proxy; list; list = list->next) {
if (list->proxy == target) {
list->active = 0;
list->proxy = q->proxy;
}
}
}
}
static void add_local_min(polygon_node **p, edge_node *edge, double x,
double y) {
polygon_node *existing_min = NULL;
vertex_node *nv = NULL;
existing_min = *p;
gpc_malloc<polygon_node>(*p, sizeof(polygon_node),
const_cast<char *>("polygon node creation"));
/* Create a new vertex node and set its fields */
gpc_malloc<vertex_node>(nv, sizeof(vertex_node),
const_cast<char *>("vertex node creation"));
nv->x = x;
nv->y = y;
nv->next = NULL;
/* Initialise proxy to point to p itself */
(*p)->proxy = (*p);
(*p)->active = 1;
(*p)->next = existing_min;
/* Make v[LEFT] and v[RIGHT] point to new vertex nv */
(*p)->v[LEFT] = nv;
(*p)->v[RIGHT] = nv;
/* Assign polygon p to the edge */
edge->outp[ABOVE] = *p;
}
static int count_tristrips(polygon_node *tn) {
int total = 0;
for (total = 0; tn; tn = tn->next) {
if (tn->active > 2) {
total++;
}
}
return total;
}
void add_vertex(vertex_node **t, double x, double y) {
if (!(*t)) {
gpc_malloc<vertex_node>(*t, sizeof(vertex_node),
const_cast<char *>("tristrip vertex creation"));
(*t)->x = x;
(*t)->y = y;
(*t)->next = NULL;
} else {
/* Head further down the list */
add_vertex(&((*t)->next), x, y);
}
}
void gpc_vertex_create(edge_node *e, int p, int s, double x, double y) {
add_vertex(&(e->outp[p]->v[s]), x, y);
e->outp[p]->active++;
}
static void new_tristrip(polygon_node **tn, edge_node *edge, double x,
double y) {
if (!(*tn)) {
gpc_malloc<polygon_node>(*tn, sizeof(polygon_node),
const_cast<char *>("tristrip node creation"));
(*tn)->next = NULL;
(*tn)->v[LEFT] = NULL;
(*tn)->v[RIGHT] = NULL;
(*tn)->active = 1;
add_vertex(&((*tn)->v[LEFT]), x, y);
edge->outp[ABOVE] = *tn;
} else {
/* Head further down the list */
new_tristrip(&((*tn)->next), edge, x, y);
}
}
static bbox *create_contour_bboxes(gpc_polygon *p) {
bbox *box;
int c = 0;
int v = 0;
gpc_malloc<bbox>(box, p->num_contours * sizeof(bbox),
const_cast<char *>("Bounding box creation"));
/* Construct contour bounding boxes */
for (c = 0; c < p->num_contours; c++) {
/* Initialise bounding box extent */
box[c].xmin = DBL_MAX;
box[c].ymin = DBL_MAX;
box[c].xmax = -DBL_MAX;
box[c].ymax = -DBL_MAX;
for (v = 0; v < p->contour[c].num_vertices; v++) {
/* Adjust bounding box */
if (p->contour[c].vertex[v].x < box[c].xmin) {
box[c].xmin = p->contour[c].vertex[v].x;
}
if (p->contour[c].vertex[v].y < box[c].ymin) {
box[c].ymin = p->contour[c].vertex[v].y;
}
if (p->contour[c].vertex[v].x > box[c].xmax) {
box[c].xmax = p->contour[c].vertex[v].x;
}
if (p->contour[c].vertex[v].y > box[c].ymax) {
box[c].ymax = p->contour[c].vertex[v].y;
}
}
}
return box;
}
static void minimax_test(gpc_polygon *subj, gpc_polygon *clip, gpc_op op) {
bbox *s_bbox;
bbox *c_bbox;
int s = 0;
int c = 0;
int *o_table = NULL;
int overlap = 0;
s_bbox = create_contour_bboxes(subj);
c_bbox = create_contour_bboxes(clip);
gpc_malloc<int>(o_table,
subj->num_contours * clip->num_contours * sizeof(int),
const_cast<char *>("overlap table creation"));
/* Check all subject contour bounding boxes against clip boxes */
for (s = 0; s < subj->num_contours; s++) {
for (c = 0; c < clip->num_contours; c++) {
o_table[c * subj->num_contours + s] =
(!((s_bbox[s].xmax < c_bbox[c].xmin) ||
(s_bbox[s].xmin > c_bbox[c].xmax))) &&
(!((s_bbox[s].ymax < c_bbox[c].ymin) ||
(s_bbox[s].ymin > c_bbox[c].ymax)));
}
}
/* For each clip contour, search for any subject contour overlaps */
for (c = 0; c < clip->num_contours; c++) {
overlap = 0;
for (s = 0; (!overlap) && (s < subj->num_contours); s++) {
overlap = o_table[c * subj->num_contours + s];
}
if (!overlap) {
/* Flag non contributing status by negating vertex count */
clip->contour[c].num_vertices = -clip->contour[c].num_vertices;
}
}
if (op == GPC_INT) {
/* For each subject contour, search for any clip contour overlaps */
for (s = 0; s < subj->num_contours; s++) {
overlap = 0;
for (c = 0; (!overlap) && (c < clip->num_contours); c++) {
overlap = o_table[c * subj->num_contours + s];
}
if (!overlap) {
/* Flag non contributing status by negating vertex count */
subj->contour[s].num_vertices = -subj->contour[s].num_vertices;
}
}
}
gpc_free<bbox>(s_bbox);
gpc_free<bbox>(c_bbox);
gpc_free<int>(o_table);
}
/*
===========================================================================
Public Functions
===========================================================================
*/
void gpc_free_polygon(gpc_polygon *p) {
int c = 0;
for (c = 0; c < p->num_contours; c++) {
gpc_free<gpc_vertex>(p->contour[c].vertex);
}
gpc_free<int>(p->hole);
gpc_free<gpc_vertex_list>(p->contour);
p->num_contours = 0;
}
void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) {
int *extended_hole = NULL;
int c = 0;
int v = 0;
gpc_vertex_list *extended_contour = NULL;
/* Create an extended hole array */
gpc_malloc<int>(extended_hole, (p->num_contours + 1) * sizeof(int),
const_cast<char *>("contour hole addition"));
/* Create an extended contour array */
gpc_malloc<gpc_vertex_list>(extended_contour,
(p->num_contours + 1) * sizeof(gpc_vertex_list),
const_cast<char *>("contour addition"));
/* Copy the old contour and hole data into the extended arrays */
for (c = 0; c < p->num_contours; c++) {
extended_hole[c] = p->hole[c];
extended_contour[c] = p->contour[c];
}
/* Copy the new contour and hole onto the end of the extended arrays */
c = p->num_contours;
extended_hole[c] = hole;
extended_contour[c].num_vertices = new_contour->num_vertices;
gpc_malloc<gpc_vertex>(extended_contour[c].vertex,
new_contour->num_vertices * sizeof(gpc_vertex),
const_cast<char *>("contour addition"));
for (v = 0; v < new_contour->num_vertices; v++) {
extended_contour[c].vertex[v] = new_contour->vertex[v];
}
/* Dispose of the old contour */
gpc_free<gpc_vertex_list>(p->contour);
gpc_free<int>(p->hole);
/* Update the polygon information */
p->num_contours++;
p->hole = extended_hole;
p->contour = extended_contour;
}
// gpc_polygon_clip
void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
gpc_polygon *result) {
sb_tree *sbtree = NULL;
it_node *it = NULL;
it_node *intersect = NULL;
edge_node *edge = NULL;
edge_node *prev_edge = NULL;
edge_node *next_edge = NULL;
edge_node *succ_edge = NULL;
edge_node *e0 = NULL;
edge_node *e1 = NULL;
edge_node *aet = NULL;
edge_node *c_heap = NULL;
edge_node *s_heap = NULL;
lmt_node *lmt = NULL;
lmt_node *local_min = NULL;
polygon_node *out_poly = NULL;
polygon_node *p = NULL;
polygon_node *q = NULL;
polygon_node *poly = NULL;
polygon_node *npoly = NULL;
polygon_node *cf = NULL;
vertex_node *vtx = NULL;
vertex_node *nv = NULL;
h_state horiz[2];
int in[2];
int exists[2];
int parity[2] = {LEFT, LEFT};
int c = 0;
int v = 0;
int contributing = 0;
int search = 0;
int scanbeam = 0;
int sbt_entries = 0;
int vclass = 0;
int bl = 0;
int br = 0;
int tl = 0;
int tr = 0;
double *sbt = NULL;
double xb = 0.0;
double px = 0.0;
double yb = 0.0;
double yt = 0.0;
double dy = 0.0;
double ix = 0.0;
double iy = 0.0;
/* Test for trivial NULL result cases */
if (((subj->num_contours == 0) && (clip->num_contours == 0)) ||
((subj->num_contours == 0) && ((op == GPC_INT) || (op == GPC_DIFF))) ||
((clip->num_contours == 0) && (op == GPC_INT))) {
result->num_contours = 0;
result->hole = NULL;
result->contour = NULL;
return;
}
/* Identify potentialy contributing contours */
if (((op == GPC_INT) || (op == GPC_DIFF)) && (subj->num_contours > 0) &&
(clip->num_contours > 0)) {
minimax_test(subj, clip, op);
}
/* Build LMT */
if (subj->num_contours > 0) {
s_heap = build_lmt(&lmt, &sbtree, &sbt_entries, subj, SUBJ, op);
}
if (clip->num_contours > 0) {
c_heap = build_lmt(&lmt, &sbtree, &sbt_entries, clip, CLIP, op);
}
/* Return a NULL result if no contours contribute */
if (lmt == NULL) {
result->num_contours = 0;
result->hole = NULL;
result->contour = NULL;
reset_lmt(&lmt);
gpc_free<edge_node>(s_heap);
gpc_free<edge_node>(c_heap);
return;
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation"));
build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0;
free_sbtree(&sbtree);
/* Allow pointer re-use without causing memory leak */
if (subj == result) {
gpc_free_polygon(subj);
}
if (clip == result) {
gpc_free_polygon(clip);
}
/* Invert clip polygon for difference operation */
if (op == GPC_DIFF) {
parity[CLIP] = RIGHT;
}
local_min = lmt;
// Process each scanbeam
while (scanbeam < sbt_entries) {
/* Set yb and yt to the bottom and top of the scanbeam */
yb = sbt[scanbeam++];
if (scanbeam < sbt_entries) {
yt = sbt[scanbeam];
dy = yt - yb;
}
/* === SCANBEAM BOUNDARY PROCESSING ================================ */
/* If LMT node corresponding to yb exists */
if (local_min) {
if (local_min->y == yb) {
/* Add edges starting at this local minimum to the AET */
for (edge = local_min->first_bound; edge; edge = edge->next_bound) {
add_edge_to_aet(&aet, edge, NULL);
}
local_min = local_min->next;
}
}
/* Set dummy previous x value */
px = -DBL_MAX;
/* Create bundles within AET */
e0 = aet;
e1 = aet;
/* Set up bundle fields of first edge */
aet->bundle[ABOVE][aet->type] = (aet->top.y != yb);
aet->bundle[ABOVE][!aet->type] = 0;
aet->bstate[ABOVE] = UNBUNDLED;
for (next_edge = aet->next; next_edge; next_edge = next_edge->next) {
/* Set up bundle fields of next edge */
next_edge->bundle[ABOVE][next_edge->type] = (next_edge->top.y != yb);
next_edge->bundle[ABOVE][!next_edge->type] = 0;
next_edge->bstate[ABOVE] = UNBUNDLED;
/* Bundle edges above the scanbeam boundary if they coincide */
if (next_edge->bundle[ABOVE][next_edge->type]) {
if (gpc_eq(e0->xb, next_edge->xb) && gpc_eq(e0->dx, next_edge->dx) &&
(e0->top.y != yb)) {
next_edge->bundle[ABOVE][next_edge->type] ^=
e0->bundle[ABOVE][next_edge->type];
next_edge->bundle[ABOVE][!next_edge->type] =
e0->bundle[ABOVE][!next_edge->type];
next_edge->bstate[ABOVE] = BUNDLE_HEAD;
e0->bundle[ABOVE][CLIP] = 0;
e0->bundle[ABOVE][SUBJ] = 0;
e0->bstate[ABOVE] = BUNDLE_TAIL;
}
e0 = next_edge;
}
}
horiz[CLIP] = NH;
horiz[SUBJ] = NH;
// Process each edge at this scanbeam boundary
for (edge = aet; edge; edge = edge->next) {
exists[CLIP] =
edge->bundle[ABOVE][CLIP] + (edge->bundle[BELOW][CLIP] << 1);
exists[SUBJ] =
edge->bundle[ABOVE][SUBJ] + (edge->bundle[BELOW][SUBJ] << 1);
if (exists[CLIP] || exists[SUBJ]) {
/* Set bundle side */
edge->bside[CLIP] = parity[CLIP];
edge->bside[SUBJ] = parity[SUBJ];
/* Determine contributing status and quadrant occupancies */
switch (op) {
case GPC_DIFF:
case GPC_INT:
contributing = (exists[CLIP] && (parity[SUBJ] || horiz[SUBJ])) ||
(exists[SUBJ] && (parity[CLIP] || horiz[CLIP])) ||
(exists[CLIP] && exists[SUBJ] &&
(parity[CLIP] == parity[SUBJ]));
br = (parity[CLIP]) && (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) &&
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) &&
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) &&
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
case GPC_XOR:
contributing = exists[CLIP] || exists[SUBJ];
br = (parity[CLIP]) ^ (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) ^
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) ^
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) ^
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
case GPC_UNION:
contributing = (exists[CLIP] && (!parity[SUBJ] || horiz[SUBJ])) ||
(exists[SUBJ] && (!parity[CLIP] || horiz[CLIP])) ||
(exists[CLIP] && exists[SUBJ] &&
(parity[CLIP] == parity[SUBJ]));
br = (parity[CLIP]) || (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) ||
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) ||
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) ||
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
}
// Update parity
parity[CLIP] ^= edge->bundle[ABOVE][CLIP];
parity[SUBJ] ^= edge->bundle[ABOVE][SUBJ];
/* Update horizontal state */
if (exists[CLIP]) {
horiz[CLIP] = next_h_state[horiz[CLIP]]
[((exists[CLIP] - 1) << 1) + parity[CLIP]];
}
if (exists[SUBJ]) {
horiz[SUBJ] = next_h_state[horiz[SUBJ]]
[((exists[SUBJ] - 1) << 1) + parity[SUBJ]];
}
vclass = tr + (tl << 1) + (br << 2) + (bl << 3);
if (contributing) {
xb = edge->xb;
switch (vclass) {
case EMN:
case IMN:
add_local_min(&out_poly, edge, xb, yb);
px = xb;
cf = edge->outp[ABOVE];
break;
case ERI:
if (xb != px) {
add_right(cf, xb, yb);
px = xb;
}
edge->outp[ABOVE] = cf;
cf = NULL;
break;
case ELI:
add_left(edge->outp[BELOW], xb, yb);
px = xb;
cf = edge->outp[BELOW];
break;
case EMX:
if (xb != px) {
add_left(cf, xb, yb);
px = xb;
}
merge_right(cf, edge->outp[BELOW], out_poly);
cf = NULL;
break;
case ILI:
if (xb != px) {
add_left(cf, xb, yb);
px = xb;
}
edge->outp[ABOVE] = cf;
cf = NULL;
break;
case IRI:
add_right(edge->outp[BELOW], xb, yb);
px = xb;
cf = edge->outp[BELOW];
edge->outp[BELOW] = NULL;
break;
case IMX:
if (xb != px) {
add_right(cf, xb, yb);
px = xb;
}
merge_left(cf, edge->outp[BELOW], out_poly);
cf = NULL;
edge->outp[BELOW] = NULL;
break;
case IMM:
if (xb != px) {
add_right(cf, xb, yb);
px = xb;
}
merge_left(cf, edge->outp[BELOW], out_poly);
edge->outp[BELOW] = NULL;
add_local_min(&out_poly, edge, xb, yb);
cf = edge->outp[ABOVE];
break;
case EMM:
if (xb != px) {
add_left(cf, xb, yb);
px = xb;
}
merge_right(cf, edge->outp[BELOW], out_poly);
edge->outp[BELOW] = NULL;
add_local_min(&out_poly, edge, xb, yb);
cf = edge->outp[ABOVE];
break;
case LED:
if (edge->bot.y == yb) {
add_left(edge->outp[BELOW], xb, yb);
}
edge->outp[ABOVE] = edge->outp[BELOW];
px = xb;
break;
case RED:
if (edge->bot.y == yb) {
add_right(edge->outp[BELOW], xb, yb);
}
edge->outp[ABOVE] = edge->outp[BELOW];
px = xb;
break;
default:
break;
} /* End of switch */
} /* End of contributing conditional */
} /* End of edge exists conditional */
} // End of AET loop
/* Delete terminating edges from the AET, otherwise compute xt */
for (edge = aet; edge; edge = edge->next) {
if (edge->top.y == yb) {
prev_edge = edge->prev;
next_edge = edge->next;
if (prev_edge) {
prev_edge->next = next_edge;
} else {
aet = next_edge;
}
if (next_edge) {
next_edge->prev = prev_edge;
}
/* Copy bundle head state to the adjacent tail edge if required */
if ((edge->bstate[BELOW] == BUNDLE_HEAD) && prev_edge) {
if (prev_edge->bstate[BELOW] == BUNDLE_TAIL) {
prev_edge->outp[BELOW] = edge->outp[BELOW];
prev_edge->bstate[BELOW] = UNBUNDLED;
if (prev_edge->prev) {
if (prev_edge->prev->bstate[BELOW] == BUNDLE_TAIL) {
prev_edge->bstate[BELOW] = BUNDLE_HEAD;
}
}
}
}
} else {
if (edge->top.y == yt) {
edge->xt = edge->top.x;
} else {
edge->xt = edge->bot.x + edge->dx * (yt - edge->bot.y);
}
}
}
if (scanbeam < sbt_entries) {
/* === SCANBEAM INTERIOR PROCESSING ============================== */
build_intersection_table(&it, aet, dy);
/* Process each node in the intersection table */
for (intersect = it; intersect; intersect = intersect->next) {
e0 = intersect->ie[0];
e1 = intersect->ie[1];
/* Only generate output for contributing intersections */
if ((e0->bundle[ABOVE][CLIP] || e0->bundle[ABOVE][SUBJ]) &&
(e1->bundle[ABOVE][CLIP] || e1->bundle[ABOVE][SUBJ])) {
p = e0->outp[ABOVE];
q = e1->outp[ABOVE];
ix = intersect->point.x;
iy = intersect->point.y + yb;
in[CLIP] = (e0->bundle[ABOVE][CLIP] && !e0->bside[CLIP]) ||
(e1->bundle[ABOVE][CLIP] && e1->bside[CLIP]) ||
(!e0->bundle[ABOVE][CLIP] && !e1->bundle[ABOVE][CLIP] &&
e0->bside[CLIP] && e1->bside[CLIP]);
in[SUBJ] = (e0->bundle[ABOVE][SUBJ] && !e0->bside[SUBJ]) ||
(e1->bundle[ABOVE][SUBJ] && e1->bside[SUBJ]) ||
(!e0->bundle[ABOVE][SUBJ] && !e1->bundle[ABOVE][SUBJ] &&
e0->bside[SUBJ] && e1->bside[SUBJ]);
// Determine quadrant occupancies
switch (op) {
case GPC_DIFF:
case GPC_INT:
tr = (in[CLIP]) && (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
case GPC_XOR:
tr = (in[CLIP]) ^ (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
case GPC_UNION:
tr = (in[CLIP]) || (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
}
vclass = tr + (tl << 1) + (br << 2) + (bl << 3);
switch (vclass) {
case EMN:
add_local_min(&out_poly, e0, ix, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
break;
case ERI:
if (p) {
add_right(p, ix, iy);
e1->outp[ABOVE] = p;
e0->outp[ABOVE] = NULL;
}
break;
case ELI:
if (q) {
add_left(q, ix, iy);
e0->outp[ABOVE] = q;
e1->outp[ABOVE] = NULL;
}
break;
case EMX:
if (p && q) {
add_left(p, ix, iy);
merge_right(p, q, out_poly);
e0->outp[ABOVE] = NULL;
e1->outp[ABOVE] = NULL;
}
break;
case IMN:
add_local_min(&out_poly, e0, ix, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
break;
case ILI:
if (p) {
add_left(p, ix, iy);
e1->outp[ABOVE] = p;
e0->outp[ABOVE] = NULL;
}
break;
case IRI:
if (q) {
add_right(q, ix, iy);
e0->outp[ABOVE] = q;
e1->outp[ABOVE] = NULL;
}
break;
case IMX:
if (p && q) {
add_right(p, ix, iy);
merge_left(p, q, out_poly);
e0->outp[ABOVE] = NULL;
e1->outp[ABOVE] = NULL;
}
break;
case IMM:
if (p && q) {
add_right(p, ix, iy);
merge_left(p, q, out_poly);
add_local_min(&out_poly, e0, ix, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
}
break;
case EMM:
if (p && q) {
add_left(p, ix, iy);
merge_right(p, q, out_poly);
add_local_min(&out_poly, e0, ix, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
}
break;
default:
break;
} // End of switch
} /* End of contributing intersection conditional */
/* Swap bundle sides in response to edge crossing */
if (e0->bundle[ABOVE][CLIP]) {
e1->bside[CLIP] = !e1->bside[CLIP];
}
if (e1->bundle[ABOVE][CLIP]) {
e0->bside[CLIP] = !e0->bside[CLIP];
}
if (e0->bundle[ABOVE][SUBJ]) {
e1->bside[SUBJ] = !e1->bside[SUBJ];
}
if (e1->bundle[ABOVE][SUBJ]) {
e0->bside[SUBJ] = !e0->bside[SUBJ];
}
/* Swap e0 and e1 bundles in the AET */
prev_edge = e0->prev;
next_edge = e1->next;
if (next_edge) {
next_edge->prev = e0;
}
if (e0->bstate[ABOVE] == BUNDLE_HEAD) {
search = 1;
while (search) {
prev_edge = prev_edge->prev;
if (prev_edge) {
if (prev_edge->bstate[ABOVE] != BUNDLE_TAIL) {
search = 0;
}
} else {
search = 0;
}
}
}
if (!prev_edge) {
aet->prev = e1;
e1->next = aet;
aet = e0->next;
} else {
prev_edge->next->prev = e1;
e1->next = prev_edge->next;
prev_edge->next = e0->next;
}
e0->next->prev = prev_edge;
e1->next->prev = e1;
e0->next = next_edge;
} /* End of IT loop*/
// Prepare for next scanbeam
for (edge = aet; edge; edge = next_edge) {
next_edge = edge->next;
succ_edge = edge->succ;
if ((edge->top.y == yt) && succ_edge) {
/* Replace AET edge by its successor */
succ_edge->outp[BELOW] = edge->outp[ABOVE];
succ_edge->bstate[BELOW] = edge->bstate[ABOVE];
succ_edge->bundle[BELOW][CLIP] = edge->bundle[ABOVE][CLIP];
succ_edge->bundle[BELOW][SUBJ] = edge->bundle[ABOVE][SUBJ];
prev_edge = edge->prev;
if (prev_edge) {
prev_edge->next = succ_edge;
} else {
aet = succ_edge;
}
if (next_edge) {
next_edge->prev = succ_edge;
}
succ_edge->prev = prev_edge;
succ_edge->next = next_edge;
} else {
/* Update this edge */
edge->outp[BELOW] = edge->outp[ABOVE];
edge->bstate[BELOW] = edge->bstate[ABOVE];
edge->bundle[BELOW][CLIP] = edge->bundle[ABOVE][CLIP];
edge->bundle[BELOW][SUBJ] = edge->bundle[ABOVE][SUBJ];
edge->xb = edge->xt;
}
edge->outp[ABOVE] = NULL;
}
}
} /* === END OF SCANBEAM PROCESSING ================================== */
// Generate result polygon from out_poly
result->contour = NULL;
result->hole = NULL;
result->num_contours = count_contours(out_poly);
if (result->num_contours > 0) {
gpc_malloc<int>(result->hole, result->num_contours * sizeof(int),
const_cast<char *>("hole flag table creation"));
gpc_malloc<gpc_vertex_list>(result->contour,
result->num_contours * sizeof(gpc_vertex_list),
const_cast<char *>("contour creation"));
c = 0;
for (poly = out_poly; poly; poly = npoly) {
npoly = poly->next;
if (poly->active) {
result->hole[c] = poly->proxy->hole;
result->contour[c].num_vertices = poly->active;
gpc_malloc<gpc_vertex>(
result->contour[c].vertex,
result->contour[c].num_vertices * sizeof(gpc_vertex),
const_cast<char *>("vertex creation"));
v = result->contour[c].num_vertices - 1;
for (vtx = poly->proxy->v[LEFT]; vtx; vtx = nv) {
nv = vtx->next;
result->contour[c].vertex[v].x = vtx->x;
result->contour[c].vertex[v].y = vtx->y;
gpc_free<vertex_node>(vtx);
v--;
}
c++;
}
gpc_free<polygon_node>(poly);
}
} else {
for (poly = out_poly; poly; poly = npoly) {
npoly = poly->next;
gpc_free<polygon_node>(poly);
}
}
// Tidy up
reset_it(&it);
reset_lmt(&lmt);
gpc_free<edge_node>(c_heap);
gpc_free<edge_node>(s_heap);
gpc_free<double>(sbt);
} // NOLINT
void gpc_free_tristrip(gpc_tristrip *t) {
int s = 0;
for (s = 0; s < t->num_strips; s++) {
gpc_free<gpc_vertex>(t->strip[s].vertex);
}
gpc_free<gpc_vertex_list>(t->strip);
t->num_strips = 0;
}
void gpc_polygon_to_tristrip(gpc_polygon *s, gpc_tristrip *t) {
gpc_polygon c;
c.num_contours = 0;
c.hole = NULL;
c.contour = NULL;
gpc_tristrip_clip(GPC_DIFF, s, &c, t);
}
// gpc_tristrip_clip
void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
gpc_tristrip *result) {
sb_tree *sbtree = NULL;
it_node *it = NULL;
it_node *intersect = NULL;
edge_node *edge = NULL;
edge_node *prev_edge = NULL;
edge_node *next_edge = NULL;
edge_node *succ_edge = NULL;
edge_node *e0 = NULL;
edge_node *e1 = NULL;
edge_node *aet = NULL;
edge_node *c_heap = NULL;
edge_node *s_heap = NULL;
edge_node *cf = NULL;
lmt_node *lmt = NULL;
lmt_node *local_min = NULL;
polygon_node *tlist = NULL;
polygon_node *tn = NULL;
polygon_node *tnn = NULL;
polygon_node *p = NULL;
polygon_node *q = NULL;
vertex_node *lt = NULL;
vertex_node *ltn = NULL;
vertex_node *rt = NULL;
vertex_node *rtn = NULL;
h_state horiz[2];
vertex_type cft = NUL;
int in[2];
int exists[2];
int parity[2] = {LEFT, LEFT};
int s = 0;
int v = 0;
int contributing = 0;
int search = 0;
int scanbeam = 0;
int sbt_entries = 0;
int vclass = 0;
int bl = 0;
int br = 0;
int tl = 0;
int tr = 0;
double *sbt = NULL;
double xb = 0.0;
double px = 0.0;
double nx = 0.0;
double yb = 0.0;
double yt = 0.0;
double dy = 0.0;
double ix = 0.0;
double iy = 0.0;
/* Test for trivial NULL result cases */
if (((subj->num_contours == 0) && (clip->num_contours == 0)) ||
((subj->num_contours == 0) && ((op == GPC_INT) || (op == GPC_DIFF))) ||
((clip->num_contours == 0) && (op == GPC_INT))) {
result->num_strips = 0;
result->strip = NULL;
return;
}
/* Identify potentialy contributing contours */
if (((op == GPC_INT) || (op == GPC_DIFF)) && (subj->num_contours > 0) &&
(clip->num_contours > 0)) {
minimax_test(subj, clip, op);
}
/* Build LMT */
if (subj->num_contours > 0) {
s_heap = build_lmt(&lmt, &sbtree, &sbt_entries, subj, SUBJ, op);
}
if (clip->num_contours > 0) {
c_heap = build_lmt(&lmt, &sbtree, &sbt_entries, clip, CLIP, op);
}
/* Return a NULL result if no contours contribute */
if (lmt == NULL) {
result->num_strips = 0;
result->strip = NULL;
reset_lmt(&lmt);
gpc_free<edge_node>(s_heap);
gpc_free<edge_node>(c_heap);
return;
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation"));
build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0;
free_sbtree(&sbtree);
/* Invert clip polygon for difference operation */
if (op == GPC_DIFF) {
parity[CLIP] = RIGHT;
}
local_min = lmt;
// Process each scanbeam
while (scanbeam < sbt_entries) {
/* Set yb and yt to the bottom and top of the scanbeam */
yb = sbt[scanbeam++];
if (scanbeam < sbt_entries) {
yt = sbt[scanbeam];
dy = yt - yb;
}
/* === SCANBEAM BOUNDARY PROCESSING ================================ */
/* If LMT node corresponding to yb exists */
if (local_min) {
if (local_min->y == yb) {
/* Add edges starting at this local minimum to the AET */
for (edge = local_min->first_bound; edge; edge = edge->next_bound) {
add_edge_to_aet(&aet, edge, NULL);
}
local_min = local_min->next;
}
}
/* Set dummy previous x value */
/* Create bundles within AET */
px = -DBL_MAX;
e0 = aet;
e1 = aet;
/* Set up bundle fields of first edge */
aet->bundle[ABOVE][aet->type] = (aet->top.y != yb);
aet->bundle[ABOVE][!aet->type] = 0;
aet->bstate[ABOVE] = UNBUNDLED;
for (next_edge = aet->next; next_edge; next_edge = next_edge->next) {
/* Set up bundle fields of next edge */
next_edge->bundle[ABOVE][next_edge->type] = (next_edge->top.y != yb);
next_edge->bundle[ABOVE][!next_edge->type] = 0;
next_edge->bstate[ABOVE] = UNBUNDLED;
/* Bundle edges above the scanbeam boundary if they coincide */
if (next_edge->bundle[ABOVE][next_edge->type]) {
if (gpc_eq(e0->xb, next_edge->xb) && gpc_eq(e0->dx, next_edge->dx) &&
(e0->top.y != yb)) {
next_edge->bundle[ABOVE][next_edge->type] ^=
e0->bundle[ABOVE][next_edge->type];
next_edge->bundle[ABOVE][!next_edge->type] =
e0->bundle[ABOVE][!next_edge->type];
next_edge->bstate[ABOVE] = BUNDLE_HEAD;
e0->bundle[ABOVE][CLIP] = 0;
e0->bundle[ABOVE][SUBJ] = 0;
e0->bstate[ABOVE] = BUNDLE_TAIL;
}
e0 = next_edge;
}
}
horiz[CLIP] = NH;
horiz[SUBJ] = NH;
/* Process each edge at this scanbeam boundary */
for (edge = aet; edge; edge = edge->next) {
exists[CLIP] =
edge->bundle[ABOVE][CLIP] + (edge->bundle[BELOW][CLIP] << 1);
exists[SUBJ] =
edge->bundle[ABOVE][SUBJ] + (edge->bundle[BELOW][SUBJ] << 1);
if (exists[CLIP] || exists[SUBJ]) {
/* Set bundle side */
edge->bside[CLIP] = parity[CLIP];
edge->bside[SUBJ] = parity[SUBJ];
/* Determine contributing status and quadrant occupancies */
switch (op) {
case GPC_DIFF:
case GPC_INT:
contributing = (exists[CLIP] && (parity[SUBJ] || horiz[SUBJ])) ||
(exists[SUBJ] && (parity[CLIP] || horiz[CLIP])) ||
(exists[CLIP] && exists[SUBJ] &&
(parity[CLIP] == parity[SUBJ]));
br = (parity[CLIP]) && (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) &&
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) &&
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) &&
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
case GPC_XOR:
contributing = exists[CLIP] || exists[SUBJ];
br = (parity[CLIP]) ^ (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) ^
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) ^
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) ^
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
case GPC_UNION:
contributing = (exists[CLIP] && (!parity[SUBJ] || horiz[SUBJ])) ||
(exists[SUBJ] && (!parity[CLIP] || horiz[CLIP])) ||
(exists[CLIP] && exists[SUBJ] &&
(parity[CLIP] == parity[SUBJ]));
br = (parity[CLIP]) || (parity[SUBJ]);
bl = (parity[CLIP] ^ edge->bundle[ABOVE][CLIP]) ||
(parity[SUBJ] ^ edge->bundle[ABOVE][SUBJ]);
tr = (parity[CLIP] ^ (horiz[CLIP] != NH)) ||
(parity[SUBJ] ^ (horiz[SUBJ] != NH));
tl = (parity[CLIP] ^ (horiz[CLIP] != NH) ^
edge->bundle[BELOW][CLIP]) ||
(parity[SUBJ] ^ (horiz[SUBJ] != NH) ^
edge->bundle[BELOW][SUBJ]);
break;
}
// Update parity
parity[CLIP] ^= edge->bundle[ABOVE][CLIP];
parity[SUBJ] ^= edge->bundle[ABOVE][SUBJ];
/* Update horizontal state */
if (exists[CLIP]) {
horiz[CLIP] = next_h_state[horiz[CLIP]]
[((exists[CLIP] - 1) << 1) + parity[CLIP]];
}
if (exists[SUBJ]) {
horiz[SUBJ] = next_h_state[horiz[SUBJ]]
[((exists[SUBJ] - 1) << 1) + parity[SUBJ]];
}
vclass = tr + (tl << 1) + (br << 2) + (bl << 3);
if (contributing) {
xb = edge->xb;
switch (vclass) {
case EMN:
new_tristrip(&tlist, edge, xb, yb);
cf = edge;
break;
case ERI:
edge->outp[ABOVE] = cf->outp[ABOVE];
if (xb != cf->xb) {
gpc_vertex_create(edge, ABOVE, RIGHT, xb, yb);
}
cf = NULL;
break;
case ELI:
gpc_vertex_create(edge, BELOW, LEFT, xb, yb);
edge->outp[ABOVE] = NULL;
cf = edge;
break;
case EMX:
if (xb != cf->xb) {
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
}
edge->outp[ABOVE] = NULL;
cf = NULL;
break;
case IMN:
if (cft == LED) {
if (cf->bot.y != yb) {
gpc_vertex_create(cf, BELOW, LEFT, cf->xb, yb);
}
new_tristrip(&tlist, cf, cf->xb, yb);
}
edge->outp[ABOVE] = cf->outp[ABOVE];
gpc_vertex_create(edge, ABOVE, RIGHT, xb, yb);
break;
case ILI:
new_tristrip(&tlist, edge, xb, yb);
cf = edge;
cft = ILI;
break;
case IRI:
if (cft == LED) {
if (cf->bot.y != yb) {
gpc_vertex_create(cf, BELOW, LEFT, cf->xb, yb);
}
new_tristrip(&tlist, cf, cf->xb, yb);
}
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
edge->outp[ABOVE] = NULL;
break;
case IMX:
gpc_vertex_create(edge, BELOW, LEFT, xb, yb);
edge->outp[ABOVE] = NULL;
cft = IMX;
break;
case IMM:
gpc_vertex_create(edge, BELOW, LEFT, xb, yb);
edge->outp[ABOVE] = cf->outp[ABOVE];
if (xb != cf->xb) {
gpc_vertex_create(cf, ABOVE, RIGHT, xb, yb);
}
cf = edge;
break;
case EMM:
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
edge->outp[ABOVE] = NULL;
new_tristrip(&tlist, edge, xb, yb);
cf = edge;
break;
case LED:
if (edge->bot.y == yb) {
gpc_vertex_create(edge, BELOW, LEFT, xb, yb);
}
edge->outp[ABOVE] = edge->outp[BELOW];
cf = edge;
cft = LED;
break;
case RED:
edge->outp[ABOVE] = cf->outp[ABOVE];
if (cft == LED) {
if (cf->bot.y == yb) {
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
} else {
if (edge->bot.y == yb) {
gpc_vertex_create(cf, BELOW, LEFT, cf->xb, yb);
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
}
}
} else {
gpc_vertex_create(edge, BELOW, RIGHT, xb, yb);
gpc_vertex_create(edge, ABOVE, RIGHT, xb, yb);
}
cf = NULL;
break;
default:
break;
} /* End of switch */
} /* End of contributing conditional */
} /* End of edge exists conditional */
} // End of AET loop
/* Delete terminating edges from the AET, otherwise compute xt */
for (edge = aet; edge; edge = edge->next) {
if (edge->top.y == yb) {
prev_edge = edge->prev;
next_edge = edge->next;
if (prev_edge) {
prev_edge->next = next_edge;
} else {
aet = next_edge;
}
if (next_edge) {
next_edge->prev = prev_edge;
}
/* Copy bundle head state to the adjacent tail edge if required */
if ((edge->bstate[BELOW] == BUNDLE_HEAD) && prev_edge) {
if (prev_edge->bstate[BELOW] == BUNDLE_TAIL) {
prev_edge->outp[BELOW] = edge->outp[BELOW];
prev_edge->bstate[BELOW] = UNBUNDLED;
if (prev_edge->prev) {
if (prev_edge->prev->bstate[BELOW] == BUNDLE_TAIL) {
prev_edge->bstate[BELOW] = BUNDLE_HEAD;
}
}
}
}
} else {
if (edge->top.y == yt) {
edge->xt = edge->top.x;
} else {
edge->xt = edge->bot.x + edge->dx * (yt - edge->bot.y);
}
}
}
if (scanbeam < sbt_entries) {
/* === SCANBEAM INTERIOR PROCESSING ============================== */
build_intersection_table(&it, aet, dy);
/* Process each node in the intersection table */
for (intersect = it; intersect; intersect = intersect->next) {
e0 = intersect->ie[0];
e1 = intersect->ie[1];
/* Only generate output for contributing intersections */
if ((e0->bundle[ABOVE][CLIP] || e0->bundle[ABOVE][SUBJ]) &&
(e1->bundle[ABOVE][CLIP] || e1->bundle[ABOVE][SUBJ])) {
p = e0->outp[ABOVE];
q = e1->outp[ABOVE];
ix = intersect->point.x;
iy = intersect->point.y + yb;
in[CLIP] = (e0->bundle[ABOVE][CLIP] && !e0->bside[CLIP]) ||
(e1->bundle[ABOVE][CLIP] && e1->bside[CLIP]) ||
(!e0->bundle[ABOVE][CLIP] && !e1->bundle[ABOVE][CLIP] &&
e0->bside[CLIP] && e1->bside[CLIP]);
in[SUBJ] = (e0->bundle[ABOVE][SUBJ] && !e0->bside[SUBJ]) ||
(e1->bundle[ABOVE][SUBJ] && e1->bside[SUBJ]) ||
(!e0->bundle[ABOVE][SUBJ] && !e1->bundle[ABOVE][SUBJ] &&
e0->bside[SUBJ] && e1->bside[SUBJ]);
switch (op) { // Determine quadrant occupancies
case GPC_DIFF:
case GPC_INT:
tr = (in[CLIP]) && (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) &&
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
case GPC_XOR:
tr = (in[CLIP]) ^ (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) ^
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
case GPC_UNION:
tr = (in[CLIP]) || (in[SUBJ]);
tl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ]);
br = (in[CLIP] ^ e0->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e0->bundle[ABOVE][SUBJ]);
bl = (in[CLIP] ^ e1->bundle[ABOVE][CLIP] ^
e0->bundle[ABOVE][CLIP]) ||
(in[SUBJ] ^ e1->bundle[ABOVE][SUBJ] ^
e0->bundle[ABOVE][SUBJ]);
break;
}
vclass = tr + (tl << 1) + (br << 2) + (bl << 3);
switch (vclass) {
case EMN:
new_tristrip(&tlist, e1, ix, iy);
e0->outp[ABOVE] = e1->outp[ABOVE];
break;
case ERI:
if (p) {
gpc_p_edge(prev_edge, e0, ABOVE);
gpc_vertex_create(prev_edge, ABOVE, LEFT, px, iy);
gpc_vertex_create(e0, ABOVE, RIGHT, ix, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
e0->outp[ABOVE] = NULL;
}
break;
case ELI:
if (q) {
gpc_n_edge(next_edge, e1, ABOVE);
gpc_vertex_create(e1, ABOVE, LEFT, ix, iy);
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
e0->outp[ABOVE] = e1->outp[ABOVE];
e1->outp[ABOVE] = NULL;
}
break;
case EMX:
if (p && q) {
gpc_vertex_create(e0, ABOVE, LEFT, ix, iy);
e0->outp[ABOVE] = NULL;
e1->outp[ABOVE] = NULL;
}
break;
case IMN:
gpc_p_edge(prev_edge, e0, ABOVE);
gpc_vertex_create(prev_edge, ABOVE, LEFT, px, iy);
gpc_n_edge(next_edge, e1, ABOVE);
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
new_tristrip(&tlist, prev_edge, px, iy);
e1->outp[ABOVE] = prev_edge->outp[ABOVE];
gpc_vertex_create(e1, ABOVE, RIGHT, ix, iy);
new_tristrip(&tlist, e0, ix, iy);
next_edge->outp[ABOVE] = e0->outp[ABOVE];
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
break;
case ILI:
if (p) {
gpc_vertex_create(e0, ABOVE, LEFT, ix, iy);
gpc_n_edge(next_edge, e1, ABOVE);
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
e1->outp[ABOVE] = e0->outp[ABOVE];
e0->outp[ABOVE] = NULL;
}
break;
case IRI:
if (q) {
gpc_vertex_create(e1, ABOVE, RIGHT, ix, iy);
gpc_p_edge(prev_edge, e0, ABOVE);
gpc_vertex_create(prev_edge, ABOVE, LEFT, px, iy);
e0->outp[ABOVE] = e1->outp[ABOVE];
e1->outp[ABOVE] = NULL;
}
break;
case IMX:
if (p && q) {
gpc_vertex_create(e0, ABOVE, RIGHT, ix, iy);
gpc_vertex_create(e1, ABOVE, LEFT, ix, iy);
e0->outp[ABOVE] = NULL;
e1->outp[ABOVE] = NULL;
gpc_p_edge(prev_edge, e0, ABOVE);
gpc_vertex_create(prev_edge, ABOVE, LEFT, px, iy);
new_tristrip(&tlist, prev_edge, px, iy);
gpc_n_edge(next_edge, e1, ABOVE);
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
next_edge->outp[ABOVE] = prev_edge->outp[ABOVE];
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
}
break;
case IMM:
if (p && q) {
gpc_vertex_create(e0, ABOVE, RIGHT, ix, iy);
gpc_vertex_create(e1, ABOVE, LEFT, ix, iy);
gpc_p_edge(prev_edge, e0, ABOVE);
gpc_vertex_create(prev_edge, ABOVE, LEFT, px, iy);
new_tristrip(&tlist, prev_edge, px, iy);
gpc_n_edge(next_edge, e1, ABOVE);
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
e1->outp[ABOVE] = prev_edge->outp[ABOVE];
gpc_vertex_create(e1, ABOVE, RIGHT, ix, iy);
new_tristrip(&tlist, e0, ix, iy);
next_edge->outp[ABOVE] = e0->outp[ABOVE];
gpc_vertex_create(next_edge, ABOVE, RIGHT, nx, iy);
}
break;
case EMM:
if (p && q) {
gpc_vertex_create(e0, ABOVE, LEFT, ix, iy);
new_tristrip(&tlist, e1, ix, iy);
e0->outp[ABOVE] = e1->outp[ABOVE];
}
break;
default:
break;
} /* End of switch */
} /* End of contributing intersection conditional */
// Swap bundle sides in response to edge crossing
if (e0->bundle[ABOVE][CLIP]) {
e1->bside[CLIP] = !e1->bside[CLIP];
}
if (e1->bundle[ABOVE][CLIP]) {
e0->bside[CLIP] = !e0->bside[CLIP];
}
if (e0->bundle[ABOVE][SUBJ]) {
e1->bside[SUBJ] = !e1->bside[SUBJ];
}
if (e1->bundle[ABOVE][SUBJ]) {
e0->bside[SUBJ] = !e0->bside[SUBJ];
}
/* Swap e0 and e1 bundles in the AET */
prev_edge = e0->prev;
next_edge = e1->next;
if (e1->next) {
e1->next->prev = e0;
}
if (e0->bstate[ABOVE] == BUNDLE_HEAD) {
search = 1;
while (search) {
prev_edge = prev_edge->prev;
if (prev_edge) {
if (prev_edge->bundle[ABOVE][CLIP] ||
prev_edge->bundle[ABOVE][SUBJ] ||
(prev_edge->bstate[ABOVE] == BUNDLE_HEAD)) {
search = 0;
}
} else {
search = 0;
}
}
}
if (!prev_edge) {
e1->next = aet;
aet = e0->next;
} else {
e1->next = prev_edge->next;
prev_edge->next = e0->next;
}
e0->next->prev = prev_edge;
e1->next->prev = e1;
e0->next = next_edge;
} /* End of IT loop*/
/* Prepare for next scanbeam */
for (edge = aet; edge; edge = next_edge) {
next_edge = edge->next;
succ_edge = edge->succ;
if ((edge->top.y == yt) && succ_edge) {
/* Replace AET edge by its successor */
succ_edge->outp[BELOW] = edge->outp[ABOVE];
succ_edge->bstate[BELOW] = edge->bstate[ABOVE];
succ_edge->bundle[BELOW][CLIP] = edge->bundle[ABOVE][CLIP];
succ_edge->bundle[BELOW][SUBJ] = edge->bundle[ABOVE][SUBJ];
prev_edge = edge->prev;
if (prev_edge) {
prev_edge->next = succ_edge;
} else {
aet = succ_edge;
}
if (next_edge) {
next_edge->prev = succ_edge;
}
succ_edge->prev = prev_edge;
succ_edge->next = next_edge;
} else {
/* Update this edge */
edge->outp[BELOW] = edge->outp[ABOVE];
edge->bstate[BELOW] = edge->bstate[ABOVE];
edge->bundle[BELOW][CLIP] = edge->bundle[ABOVE][CLIP];
edge->bundle[BELOW][SUBJ] = edge->bundle[ABOVE][SUBJ];
edge->xb = edge->xt;
}
edge->outp[ABOVE] = NULL;
}
}
} /* === END OF SCANBEAM PROCESSING ================================== */
// Generate result tristrip from tlist
result->strip = NULL;
result->num_strips = count_tristrips(tlist);
if (result->num_strips > 0) {
gpc_malloc<gpc_vertex_list>(result->strip,
result->num_strips * sizeof(gpc_vertex_list),
const_cast<char *>("tristrip list creation"));
s = 0;
for (tn = tlist; tn; tn = tnn) {
tnn = tn->next;
if (tn->active > 2) {
/* Valid tristrip: copy the vertices and free the heap */
result->strip[s].num_vertices = tn->active;
gpc_malloc<gpc_vertex>(result->strip[s].vertex,
tn->active * sizeof(gpc_vertex),
const_cast<char *>("tristrip creation"));
v = 0;
if (0) {
lt = tn->v[RIGHT];
rt = tn->v[LEFT];
} else {
lt = tn->v[LEFT];
rt = tn->v[RIGHT];
}
while (lt || rt) {
if (lt) {
ltn = lt->next;
result->strip[s].vertex[v].x = lt->x;
result->strip[s].vertex[v].y = lt->y;
v++;
gpc_free<vertex_node>(lt);
lt = ltn;
}
if (rt) {
rtn = rt->next;
result->strip[s].vertex[v].x = rt->x;
result->strip[s].vertex[v].y = rt->y;
v++;
gpc_free<vertex_node>(rt);
rt = rtn;
}
}
s++;
} else {
/* Invalid tristrip: just free the heap */
for (lt = tn->v[LEFT]; lt; lt = ltn) {
ltn = lt->next;
gpc_free<vertex_node>(lt);
}
for (rt = tn->v[RIGHT]; rt; rt = rtn) {
rtn = rt->next;
gpc_free<vertex_node>(rt);
}
}
gpc_free<polygon_node>(tn);
}
}
// Tidy up
reset_it(&it);
reset_lmt(&lmt);
gpc_free<edge_node>(c_heap);
gpc_free<edge_node>(s_heap);
gpc_free<double>(sbt);
} // NOLINT
} // namespace gpc
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#pragma once
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
namespace gpc {
typedef enum { // Set operation type
GPC_DIFF, // Difference
GPC_INT, // Intersection
GPC_XOR, // Exclusive or
GPC_UNION // Union
} gpc_op;
typedef struct { // Polygon vertex structure
double x; // Vertex x component
double y; // vertex y component
} gpc_vertex;
typedef struct { // Vertex list structure
int num_vertices; // Number of vertices in list
gpc_vertex *vertex; // Vertex array pointer
} gpc_vertex_list;
typedef struct { // Polygon set structure
int num_contours; // Number of contours in polygon
int *hole; // Hole external contour flags
gpc_vertex_list *contour; // Contour array pointer
} gpc_polygon;
typedef struct { // Tristrip set structure
int num_strips; // Number of tristrips
gpc_vertex_list *strip; // Tristrip array pointer
} gpc_tristrip;
typedef enum { LEFT, RIGHT } gpc_left_right;
typedef enum { ABOVE, BELOW } gpc_above_below;
typedef enum { CLIP, SUBJ } gpc_clip_subj;
typedef enum { /* Edge intersection classes */
NUL, /* Empty non-intersection */
EMX, /* External maximum */
ELI, /* External left intermediate */
TED, /* Top edge */
ERI, /* External right intermediate */
RED, /* Right edge */
IMM, /* Internal maximum and minimum */
IMN, /* Internal minimum */
EMN, /* External minimum */
EMM, /* External maximum and minimum */
LED, /* Left edge */
ILI, /* Internal left intermediate */
BED, /* Bottom edge */
IRI, /* Internal right intermediate */
IMX, /* Internal maximum */
FUL /* Full non-intersection */
} vertex_type;
typedef enum { /* Horizontal edge states */
NH, /* No horizontal edge */
BH, /* Bottom horizontal edge */
TH /* Top horizontal edge */
} h_state;
typedef enum { /* Edge bundle state */
UNBUNDLED, /* Isolated edge not within a bundle */
BUNDLE_HEAD, /* Bundle head node */
BUNDLE_TAIL /* Passive bundle tail node */
} bundle_state;
typedef struct v_shape { /* Internal vertex list datatype */
double x; /* X coordinate component */
double y; /* Y coordinate component */
struct v_shape *next; /* Pointer to next vertex in list */
} vertex_node;
typedef struct p_shape { /* Internal contour / tristrip type */
int active; /* Active flag / vertex count */
int hole; /* Hole / external contour flag */
vertex_node *v[2]; /* Left and right vertex list ptrs */
struct p_shape *next; /* Pointer to next polygon contour */
struct p_shape *proxy; /* Pointer to actual structure used */
} polygon_node;
typedef struct edge_shape {
gpc_vertex vertex; /* Piggy-backed contour vertex data */
gpc_vertex bot; /* Edge lower (x, y) coordinate */
gpc_vertex top; /* Edge upper (x, y) coordinate */
double xb; /* Scanbeam bottom x coordinate */
double xt; /* Scanbeam top x coordinate */
double dx; /* Change in x for a unit y increase */
int type; /* Clip / subject edge flag */
int bundle[2][2]; /* Bundle edge flags */
int bside[2]; /* Bundle left / right indicators */
bundle_state bstate[2]; /* Edge bundle state */
polygon_node *outp[2]; /* Output polygon / tristrip pointer */
struct edge_shape *prev; /* Previous edge in the AET */
struct edge_shape *next; /* Next edge in the AET */
struct edge_shape *pred; /* Edge connected at the lower end */
struct edge_shape *succ; /* Edge connected at the upper end */
struct edge_shape *next_bound; /* Pointer to next bound in LMT */
} edge_node;
inline bool gpc_eq(float a, float b) { return (fabs(a - b) <= 1e-6); }
inline bool gpc_prev_index(float a, float b) { return (fabs(a - b) <= 1e-6); }
inline int gpc_prev_index(int i, int n) { return ((i - 1 + n) % n); }
inline int gpc_next_index(int i, int n) { return ((i + 1) % n); }
inline int gpc_optimal(gpc_vertex *v, int i, int n) {
return (v[(i + 1) % n].y != v[i].y || v[(i - 1 + n) % n].y != v[i].y);
}
inline int gpc_fwd_min(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y > v[i].vertex.y &&
v[(i - 1 + n) % n].vertex.y >= v[i].vertex.y);
}
inline int gpc_not_fmax(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y > v[i].vertex.y);
}
inline int gpc_rev_min(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y >= v[i].vertex.y &&
v[(i - 1 + n) % n].vertex.y > v[i].vertex.y);
}
inline int gpc_not_rmax(edge_node *v, int i, int n) {
return (v[(i - 1 + n) % n].vertex.y > v[i].vertex.y);
}
// inline void gpc_p_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline void gpc_p_edge(edge_node *d, edge_node *e, int p) {
d = e;
do {
d = d->prev;
} while (!d->outp[p]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
// inline void gpc_n_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline void gpc_n_edge(edge_node *d, edge_node *e, int p) {
d = e;
do {
d = d->next;
} while (!d->outp[p]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
template <typename T>
void gpc_malloc(T *&p, int b, char *s) { // NOLINT
if (b > 0) {
p = reinterpret_cast<T *>(malloc(b));
if (!p) {
fprintf(stderr, "gpc malloc failure: %s\n", s);
exit(0);
}
} else {
p = NULL;
}
}
template <typename T>
void gpc_free(T *&p) { // NOLINT
if (p) {
free(p);
p = NULL;
}
}
/*
===========================================================================
Public Function Prototypes
===========================================================================
*/
void add_vertex(vertex_node **t, double x, double y);
void gpc_vertex_create(edge_node *e, int p, int s, double x, double y);
void gpc_add_contour(gpc_polygon *polygon, gpc_vertex_list *contour, int hole);
void gpc_polygon_clip(gpc_op set_operation, gpc_polygon *subject_polygon,
gpc_polygon *clip_polygon, gpc_polygon *result_polygon);
void gpc_tristrip_clip(gpc_op set_operation, gpc_polygon *subject_polygon,
gpc_polygon *clip_polygon,
gpc_tristrip *result_tristrip);
void gpc_polygon_to_tristrip(gpc_polygon *polygon, gpc_tristrip *tristrip);
void gpc_free_polygon(gpc_polygon *polygon);
void gpc_free_tristrip(gpc_tristrip *tristrip);
} // namespace gpc
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#include "operators/math/poly_util.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <class T>
void Array2PointVec(const T* box, const size_t box_size,
std::vector<Point_<T>>* vec) {
size_t pts_num = box_size / 2;
vec->resize(pts_num);
for (size_t i = 0; i < pts_num; i++) {
vec->at(i).x = box[2 * i];
vec->at(i).y = box[2 * i + 1];
}
}
template <class T>
void Array2Poly(const T* box, const size_t box_size, gpc::gpc_polygon* poly) {
size_t pts_num = box_size / 2;
poly->num_contours = 1;
poly->hole = reinterpret_cast<int*>(malloc(sizeof(int)));
poly->hole[0] = 0;
poly->contour = (gpc::gpc_vertex_list*)malloc(sizeof(gpc::gpc_vertex_list));
poly->contour->num_vertices = pts_num;
poly->contour->vertex =
(gpc::gpc_vertex*)malloc(sizeof(gpc::gpc_vertex) * pts_num);
for (size_t i = 0; i < pts_num; ++i) {
poly->contour->vertex[i].x = box[2 * i];
poly->contour->vertex[i].y = box[2 * i + 1];
}
}
template void Array2Poly(const float* box, const size_t box_size,
gpc::gpc_polygon* poly);
template <class T>
void Poly2PointVec(const gpc::gpc_vertex_list& contour,
std::vector<Point_<T>>* vec) {
int pts_num = contour.num_vertices;
vec->resize(pts_num);
for (size_t i = 0; i < pts_num; i++) {
vec->at(i).x = contour.vertex[i].x;
vec->at(i).y = contour.vertex[i].y;
}
}
template <class T>
T GetContourArea(const std::vector<Point_<T>>& vec) {
int pts_num = vec.size();
if (pts_num < 3) return T(0.);
T area = T(0.);
for (size_t i = 0; i < pts_num; ++i) {
area += vec[i].x * vec[(i + 1) % pts_num].y -
vec[i].y * vec[(i + 1) % pts_num].x;
}
return fabs(area / 2.0);
}
template <class T>
T PolyArea(const T* box, const size_t box_size, const bool normalized) {
// If coordinate values are is invalid
// if area size <= 0, return 0.
std::vector<Point_<T>> vec;
Array2PointVec<T>(box, box_size, &vec);
return GetContourArea<T>(vec);
}
template float PolyArea(const float* box, const size_t box_size,
const bool normalized);
template <class T>
T PolyOverlapArea(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
gpc::gpc_polygon poly1;
gpc::gpc_polygon poly2;
Array2Poly<T>(box1, box_size, &poly1);
Array2Poly<T>(box2, box_size, &poly2);
gpc::gpc_polygon respoly;
gpc::gpc_op op = gpc::GPC_INT;
gpc::gpc_polygon_clip(op, &poly2, &poly1, &respoly);
T inter_area = T(0.);
int contour_num = respoly.num_contours;
for (int i = 0; i < contour_num; ++i) {
std::vector<Point_<T>> resvec;
Poly2PointVec<T>(respoly.contour[i], &resvec);
inter_area += GetContourArea<T>(resvec);
}
gpc::gpc_free_polygon(&poly1);
gpc::gpc_free_polygon(&poly2);
gpc::gpc_free_polygon(&respoly);
return inter_area;
}
template float PolyOverlapArea(const float* box1, const float* box2,
const size_t box_size, const bool normalized);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#pragma once
#include <vector>
#include "operators/math/gpc.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <class T>
class Point_ {
public:
// default constructor
Point_() {}
Point_(T _x, T _y) {}
Point_(const Point_& pt) {}
Point_& operator=(const Point_& pt);
// conversion to another data type
// template<typename _T> operator Point_<_T>() const;
// conversion to the old-style C structures
// operator Vec<T, 2>() const;
// checks whether the point is inside the specified rectangle
// bool inside(const Rect_<T>& r) const;
T x; //!< x coordinate of the point
T y; //!< y coordinate of the point
};
template <class T>
void Array2PointVec(const T* box, const size_t box_size,
std::vector<Point_<T>>* vec);
template <class T>
void Array2Poly(const T* box, const size_t box_size, gpc::gpc_polygon* poly);
template <class T>
void Poly2PointVec(const gpc::gpc_vertex_list& contour,
std::vector<Point_<T>>* vec);
template <class T>
T GetContourArea(const std::vector<Point_<T>>& vec);
template <class T>
T PolyArea(const T* box, const size_t box_size, const bool normalized);
template <class T>
T PolyOverlapArea(const T* box1, const T* box2, const size_t box_size,
const bool normalized);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/selected_rows.h"
#define INLINE_FOR2(sizei, sizej) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t j = 0; j < sizej; j++)
namespace paddle_mobile {
namespace operators {
namespace math {
// SelectedRows + SelectedRows will simplely concat value and rows.
// The real computation happens in dealing with LoDTensor.
// template <typename T>
// struct SelectedRowsAdd {
// void operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2,
// framework::SelectedRows* output);
//};
//
// template <typename T>
// struct SelectedRowsAddTensor {
// void operator()(
// const framework::SelectedRows& input1,
// const framework::Tensor& input2, framework::Tensor* output);
//};
// input2 = input1 + input2
template <typename T>
struct SelectedRowsAddTo {
void operator()(const framework::SelectedRows& input1,
const int64_t input2_offset,
framework::SelectedRows* input2) {
auto in1_height = input1.height();
PADDLE_MOBILE_ENFORCE(in1_height == input2->height(), "height error");
auto& in1_rows = input1.rows();
auto& in2_rows = *(input2->mutable_rows());
auto& in1_value = input1.value();
auto* in2_value = input2->mutable_value();
// concat rows
in2_rows.Extend(in1_rows.begin(), in1_rows.end());
// auto in1_place = input1.place();
// PADDLE_ENFORCE(platform::is_cpu_place(in1_place));
// auto in2_place = input2->place();
// PADDLE_ENFORCE(platform::is_cpu_place(in2_place));
auto* in1_data = in1_value.data<T>();
auto* in2_data = in2_value->data<T>();
memory::Copy(in2_data + input2_offset, in1_data,
in1_value.numel() * sizeof(T));
}
};
// input2 = input1 + input2
template <typename T>
struct SelectedRowsAddToTensor {
void operator()(const framework::SelectedRows& input1,
framework::Tensor* input2) {
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0], "height != dims[0]");
auto& in1_value = input1.value();
auto& in1_rows = input1.rows();
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_MOBILE_ENFORCE(in1_row_numel == input2->numel() / in1_height,
"row_numel error");
auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
input2_data[in1_rows[i] * in1_row_numel + j] +=
in1_data[i * in1_row_numel + j];
}
}
}
};
// namespace scatter {
//// functors for manuplating SelectedRows data
// template <typename T>
// struct MergeAdd {
// // unary functor, merge by adding duplicated rows in
// // the input SelectedRows object.
// framework::SelectedRows operator()(
// const framework::SelectedRows& input);
//};
// template <typename T>
// struct Add {
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims(),
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
// e_out.device(*context.eigen_device()) = e_in1 + e_in2;
// return out;
// }
//};
// template <typename T>
// struct Mul {
// // multiply two SelectedRows
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims()
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
// e_out.device(*context.eigen_device()) = e_in1 * e_in2;
// return out;
// }
// // multiply scalar to SelectedRows
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const T input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims(),
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// e_out.device(*context.eigen_device()) = input2 * e_in1;
// return out;
// }
//};
enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY };
// out = seleted_rows_in / tensor
template <typename T>
struct UpdateToTensor {
void operator()(const ScatterOps& op, const framework::SelectedRows& input1,
framework::Tensor* input2);
};
// namespace scatter
} // namespace math
} // namespace operators
} // namespace paddle_mobile
...@@ -25,14 +25,15 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const { ...@@ -25,14 +25,15 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
if (input_scores_dims.size() != 3) { if (input_scores_dims.size() != 3) {
LOG(kLOG_ERROR) << "Input Scores size must be 3"; LOG(kLOG_ERROR) << "Input Scores size must be 3";
} }
if (input_bboxes_dims[2] != 4) { if (input_bboxes_dims[2] % 4 != 0 || input_bboxes_dims[2] < 4) {
LOG(kLOG_ERROR) << "Input BBoxes 2nd dimension must be 4"; LOG(kLOG_ERROR) << "Input BBoxes 2nd dimension must be multiples of 4";
} }
if (input_bboxes_dims[1] != input_scores_dims[2]) { if (input_bboxes_dims[1] != input_scores_dims[2]) {
LOG(kLOG_ERROR) << "Predict bboxes must be equal"; LOG(kLOG_ERROR) << "Predict bboxes must be equal";
} }
// pre size, will change in Compute. // pre size, will change in Compute.
this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6})); this->param_.Out()->Resize(
framework::make_ddim({input_bboxes_dims[1], input_bboxes_dims[2] + 2}));
} }
} // namespace operators } // namespace operators
......
...@@ -35,6 +35,7 @@ using framework::AttributeMap; ...@@ -35,6 +35,7 @@ using framework::AttributeMap;
using framework::LoDTensor; using framework::LoDTensor;
using framework::Scope; using framework::Scope;
using framework::Tensor; using framework::Tensor;
using framework::Variable;
using std::string; using std::string;
using std::vector; using std::vector;
...@@ -182,6 +183,11 @@ class OpParam { ...@@ -182,6 +183,11 @@ class OpParam {
return GetMultiVarValue<T>("X", inputs, scope); return GetMultiVarValue<T>("X", inputs, scope);
} }
static vector<Variable *> InputMultiVarsFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetMultiVar("X", inputs, scope);
}
template <typename T> template <typename T>
static T *OutputBatchGateFrom(const VariableNameMap &outputs, static T *OutputBatchGateFrom(const VariableNameMap &outputs,
const Scope &scope) { const Scope &scope) {
...@@ -216,6 +222,11 @@ class OpParam { ...@@ -216,6 +222,11 @@ class OpParam {
return GetVarValue<T>("Output", outputs, scope); return GetVarValue<T>("Output", outputs, scope);
} }
static Variable *OutVarFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVar("Out", outputs, scope);
}
template <typename T> template <typename T>
static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) { static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<T>("Out", outputs, scope); return GetVarValue<T>("Out", outputs, scope);
...@@ -286,6 +297,19 @@ class OpParam { ...@@ -286,6 +297,19 @@ class OpParam {
} }
} }
static Variable *GetVar(const string &key, const VariableNameMap &var_map,
const Scope &scope) {
PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0,
"%s is not contained in var_map", key.c_str())
auto var_vec = var_map.at(key);
if (!var_vec.empty()) {
auto var = scope.FindVar(var_vec[0]);
return var;
} else {
return nullptr;
}
}
static std::string getkey(const string &key, const VariableNameMap &var_map, static std::string getkey(const string &key, const VariableNameMap &var_map,
int index) { int index) {
auto var_vec = var_map.at(key); auto var_vec = var_map.at(key);
...@@ -319,6 +343,19 @@ class OpParam { ...@@ -319,6 +343,19 @@ class OpParam {
} }
return var_res; return var_res;
} }
static vector<Variable *> GetMultiVar(const string &key,
const VariableNameMap &var_map,
const Scope &scope) {
auto var_vecs = var_map.at(key);
assert(var_vecs.size() > 1);
vector<Variable *> var_res;
for (auto &var_vec : var_vecs) {
auto var = scope.FindVar(var_vec);
var_res.push_back(var);
}
return var_res;
}
}; };
template <typename Dtype> template <typename Dtype>
...@@ -405,11 +442,75 @@ class ElementwiseAddParam : OpParam { ...@@ -405,11 +442,75 @@ class ElementwiseAddParam : OpParam {
#endif #endif
}; };
#ifdef ELEMENTWISEMUL_OP
template <typename Dtype>
class ElementwiseMulParam : OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ElementwiseMulParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_y_ = InputYFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
const GType *InputX() const { return input_x_; }
const GType *InputY() const { return input_y_; }
GType *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
GType *input_x_;
GType *input_y_;
GType *out_;
int axis_;
};
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
template <typename Dtype> template <typename Dtype>
using ElementwiseAddReluParam = ElementwiseAddParam<Dtype>; using ElementwiseAddReluParam = ElementwiseAddParam<Dtype>;
#endif #endif
#ifdef ELEMENTWISESUB_OP
template <typename Dtype>
class ElementwiseSubParam : OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ElementwiseSubParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_y_ = InputYFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
const GType *InputX() const { return input_x_; }
const GType *InputY() const { return input_y_; }
GType *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
GType *input_x_;
GType *input_y_;
GType *out_;
int axis_;
};
#endif
#ifdef MUL_OP #ifdef MUL_OP
template <typename Dtype> template <typename Dtype>
class MulParam : OpParam { class MulParam : OpParam {
...@@ -445,11 +546,11 @@ class MulParam : OpParam { ...@@ -445,11 +546,11 @@ class MulParam : OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -490,6 +591,37 @@ class ConcatParam : public OpParam { ...@@ -490,6 +591,37 @@ class ConcatParam : public OpParam {
}; };
#endif #endif
#ifdef SUM_OP
template <typename Dtype>
class SumParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
SumParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
inputs_vars_ = InputMultiVarsFrom(inputs, scope);
out_var_ = OutVarFrom(outputs, scope);
inputs_ = InputMultiFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
}
vector<Variable *> InputsVars() const { return inputs_vars_; }
Variable *OutVar() const { return out_var_; }
vector<GType *> Inputs() const { return inputs_; }
GType *Out() const { return out_; }
private:
vector<Variable *> inputs_vars_;
Variable *out_var_;
vector<GType *> inputs_;
GType *out_;
};
#endif
#ifdef LRN_OP #ifdef LRN_OP
template <typename Dtype> template <typename Dtype>
class LrnParam : public OpParam { class LrnParam : public OpParam {
...@@ -1269,11 +1401,11 @@ class FusionFcParam : public OpParam { ...@@ -1269,11 +1401,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -1309,11 +1441,11 @@ class FusionConvAddParam : public ConvParam<Dtype> { ...@@ -1309,11 +1441,11 @@ class FusionConvAddParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -1364,11 +1496,11 @@ class FusionConvAddPReluParam : public ConvParam<Dtype> { ...@@ -1364,11 +1496,11 @@ class FusionConvAddPReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1422,11 +1554,11 @@ class FusionConvAddAddPReluParam : public ConvParam<Dtype> { ...@@ -1422,11 +1554,11 @@ class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1497,11 +1629,11 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> { ...@@ -1497,11 +1629,11 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1583,11 +1715,11 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> { ...@@ -1583,11 +1715,11 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1650,11 +1782,11 @@ class FusionConvBNParam : public ConvParam<Dtype> { ...@@ -1650,11 +1782,11 @@ class FusionConvBNParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1725,11 +1857,11 @@ class FusionConvAddBNParam : public ConvParam<Dtype> { ...@@ -1725,11 +1857,11 @@ class FusionConvAddBNParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1851,11 +1983,11 @@ class FusionConvBNReluParam : public ConvParam<Dtype> { ...@@ -1851,11 +1983,11 @@ class FusionConvBNReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::WrapperConvArgs fpga_conv_args; fpga::SplitConvArgs fpga_conv_args;
public: public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#include <vector>
#include "operators/sum_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void SumOp<Dtype, T>::InferShape() const {
auto inputs = this->param_.Inputs();
const size_t n = inputs.size();
std::vector<framework::DDim> inputs_dims;
inputs_dims.reserve(n);
for (int i = 0; i < n; i++) {
inputs_dims.push_back(inputs[i]->dims());
}
if (n == 1) {
DLOG << "Warning: sum op have only one input, "
"may waste memory";
}
framework::DDim in_dim({0});
for (auto& x_dim : inputs_dims) {
if (framework::product(x_dim) == 0) {
continue;
}
if (framework::product(in_dim) == 0) {
in_dim = x_dim;
} else {
PADDLE_MOBILE_ENFORCE(in_dim == x_dim,
"input tensors must have same shape");
}
}
this->param_.Out()->Resize(in_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(sum, ops::SumOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/sum_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class SumOp : public framework::OperatorWithKernel<
DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>> {
public:
SumOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -61,38 +61,11 @@ endif () ...@@ -61,38 +61,11 @@ endif ()
list(FIND NET "FPGAnets" CON) list(FIND NET "FPGAnets" CON)
if (CON GREATER -1) if (CON GREATER -1)
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile)
ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet50 paddle-mobile) target_link_libraries(test-resnet50 paddle-mobile)
ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h) # ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-EW paddle-mobile) # target_link_libraries(test-resnet paddle-mobile)
ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-conv paddle-mobile)
ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-pooling paddle-mobile)
ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-bypass paddle-mobile)
ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-softmax paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-concat paddle-mobile)
ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-tensor-quant paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fpga-concat-op paddle-mobile)
ADD_EXECUTABLE(test-format-data fpga/test_format_data.cpp test_helper.h test_include.h)
target_link_libraries(test-format-data paddle-mobile)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif () endif ()
...@@ -172,6 +145,14 @@ if (NOT FOUND_MATCH) ...@@ -172,6 +145,14 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h)
target_link_libraries(test-elementwiseadd-op paddle-mobile) target_link_libraries(test-elementwiseadd-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-elementwisesub-op operators/test_elementwise_sub_op.cpp test_helper.h test_include.h)
target_link_libraries(test-elementwisesub-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-im2sequence-op operators/test_im2sequence_op.cpp test_helper.h test_include.h)
target_link_libraries(test-im2sequence-op paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-concat-op paddle-mobile) target_link_libraries(test-concat-op paddle-mobile)
...@@ -212,6 +193,10 @@ if (NOT FOUND_MATCH) ...@@ -212,6 +193,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fc-op paddle-mobile) target_link_libraries(test-fc-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-sum-op operators/test_sum_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sum-op paddle-mobile)
# test quantize op # test quantize op
ADD_EXECUTABLE(test-quantize-op operators/test_quantize_op.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-quantize-op operators/test_quantize_op.cpp test_helper.h test_include.h)
target_link_libraries(test-quantize-op paddle-mobile) target_link_libraries(test-quantize-op paddle-mobile)
......
...@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType> ...@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> { class Executor4Test : public Executor<DeviceType> {
public: public:
Executor4Test(Program<DeviceType> p, string op_type, Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false, int predict_op_count = 1) bool use_optimize = false)
: Executor<DeviceType>() { : Executor<DeviceType>() {
this->use_optimize_ = use_optimize; this->use_optimize_ = use_optimize;
this->program_ = p; this->program_ = p;
...@@ -64,7 +64,7 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -64,7 +64,7 @@ class Executor4Test : public Executor<DeviceType> {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int i = 0; i < ops.size(); ++i) { for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i]; auto op = ops[i];
if (op->Type() == op_type && i < predict_op_count) { if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type(); DLOG << "匹配到: " << op->Type();
/// test first meeting op in program /// test first meeting op in program
...@@ -74,6 +74,7 @@ class Executor4Test : public Executor<DeviceType> { ...@@ -74,6 +74,7 @@ class Executor4Test : public Executor<DeviceType> {
op->Type(), op->GetInputs(), op->GetOutputs(), op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope); op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr); this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
} }
} }
} }
......
...@@ -30,7 +30,11 @@ int main() { ...@@ -30,7 +30,11 @@ int main() {
input_tensor.data<float>() + input_tensor.numel()); input_tensor.data<float>() + input_tensor.numel());
paddle_mobile.FeedData(input_tensor); paddle_mobile.FeedData(input_tensor);
for (int i = 0; i < 1000; i++) {
paddle_mobile.Predict_To(-1); paddle_mobile.Predict_To(-1);
if (i % 100 == 0) std::cout << i << std::endl;
}
// paddle_mobile.Predict_From(73); // paddle_mobile.Predict_From(73);
// paddle_mobile.Predict_From_To(72, 73); // paddle_mobile.Predict_From_To(72, 73);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/elementwise_sub_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestElementwiseSubOp {
public:
explicit TestElementwiseSubOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "elementwise_sub" &&
op->Input("X")[0] == "sigmoid_1.tmp_0") {
DLOG << " elementwise_sub attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
std::shared_ptr<operators::ElementwiseSubOp<Dtype, float>> lrn =
std::make_shared<operators::ElementwiseSubOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("tmp_0");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
Variable *x2_feed_value = scope->Var("sigmoid_1.tmp_0");
auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
tensor_x2->ShareDataWith(t2);
Variable *output = scope->Var("tmp_1");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({1, 1, 6, 6});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict_bn(t1, t2, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
void predict_bn(const Tensor &t1, const Tensor &t2, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
template class TestElementwiseSubOp<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run ElementwiseSub Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params");
/// input x1 (1,1,6,6)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {1, 1, 6, 6}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
/// input x2 (1,1,6,6)
paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {1, 1, 6, 6}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>();
paddle_mobile::framework::TestElementwiseSubOp<paddle_mobile::CPU>
testElementwiseSubOp(program);
auto output_op = testElementwiseSubOp.predict_bn(inputx1, inputx2);
auto *output_op_ptr = output_op->data<float>();
auto inputx1_dim = inputx1.numel() / inputx1.dims()[0];
DLOG << " input1 : ";
for (int i = 0; i < inputx1.dims()[0]; ++i) {
for (int j = 0; j < inputx1_dim; ++j) {
DLOGF("%f ", inputx1_ptr[i * inputx1_dim + j]);
}
DLOGF("\n");
}
auto inputx2_dim = inputx2.numel() / inputx2.dims()[0];
DLOG << " input2 : ";
for (int i = 0; i < inputx2.dims()[0]; ++i) {
for (int j = 0; j < inputx2_dim; ++j) {
DLOGF("%f ", inputx2_ptr[i * inputx2_dim + j]);
}
DLOGF("\n");
}
auto output_dim = output_op->numel() / output_op->dims()[0];
DLOG << " output : ";
for (int i = 0; i < output_op->dims()[0]; ++i) {
for (int j = 0; j < output_dim; ++j) {
DLOGF("%f ", output_op_ptr[i * output_dim + j]);
}
DLOGF("\n");
}
return 0;
}
...@@ -12,51 +12,129 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,51 +12,129 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "../executor_for_test.h" #pragma once
#include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
#include "operators/im2sequence_op.h" #include "operators/im2sequence_op.h"
int main() { namespace paddle_mobile {
paddle_mobile::Loader<paddle_mobile::CPU> loader; namespace framework {
auto program = loader.Load(g_ocr_recg);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, template <typename Dtype>
"program file read fail"); class TestIm2SequenceOp {
public:
explicit TestIm2SequenceOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "im2sequence" &&
op->Input("X")[0] == "conv2d_19.tmp_1") {
DLOG << " im2squence attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
std::shared_ptr<operators::Im2SequenceOp<Dtype, float>> lrn =
std::make_shared<operators::Im2SequenceOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
Executor4Test<paddle_mobile::CPU, std::shared_ptr<Tensor> predict_bn(const Tensor &t1) {
paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>> // feed
executor(program, "im2sequence"); auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("conv2d_19.tmp_1");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
// 1. input_tensors; Variable *output = scope->Var("im2sequence_0.tmp_0");
vector<Tensor> input_tensors; auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({2, 12});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
Tensor input1; std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
auto input1_data = CreateInput<float>(&input1, {2, 2, 3, 3}, -1, 1); out_tensor.reset(output_tensor);
input_tensors.push_back(input1);
// 2. input_names predict_bn(t1, 0);
vector<string> input_names({ return out_tensor;
"conv2d_19.tmp_1", }
});
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
// 3. output_names void predict_bn(const Tensor &t1, int block_id) {
vector<string> output_names({"im2sequence_0.tmp_0"}); std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
// 4. out_dims; template class TestIm2SequenceOp<CPU>;
vector<DDim> out_ddims; } // namespace framework
auto out_ddim = paddle_mobile::framework::make_ddim({8, 9}); } // namespace paddle_mobile
out_ddims.push_back(out_ddim);
auto output = executor.Predict<LoDTensor>(input_tensors, input_names, int main() {
output_names, out_ddims); DLOG << "----------**********----------";
DLOG << "begin to run Im2Sequence Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params");
auto output0_data = output[0]->data<float>(); /// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx;
SetupTensor<float>(&inputx, {1, 2, 6, 2}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx_ptr = inputx.data<float>();
for (int j = 0; j < input_tensors[0].numel(); ++j) { paddle_mobile::framework::TestIm2SequenceOp<paddle_mobile::CPU>
DLOG << " value of input: " << input1_data[j]; testIm2SequenceOp(program);
auto output_op = testIm2SequenceOp.predict_bn(inputx);
auto *output_op_ptr = output_op->data<float>();
auto input_dim = inputx.numel() / inputx.dims()[0];
DLOG << " input : ";
for (int i = 0; i < inputx.dims()[0]; ++i) {
for (int j = 0; j < input_dim; ++j) {
DLOGF("%f ", inputx_ptr[i * input_dim + j]);
}
DLOGF("\n");
} }
for (int j = 0; j < output[0]->numel(); ++j) { auto output_dim = output_op->numel() / output_op->dims()[0];
DLOG << " value of output: " << output0_data[j]; DLOG << " output : ";
for (int i = 0; i < output_op->dims()[0]; ++i) {
for (int j = 0; j < output_dim; ++j) {
DLOGF("%f ", output_op_ptr[i * output_dim + j]);
} }
DLOGF("\n");
}
return 0; return 0;
} }
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <stdint-gcc.h>
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
#include "operators/mul_op.h" #include "operators/mul_op.h"
...@@ -73,12 +74,20 @@ int TestMulOP() { ...@@ -73,12 +74,20 @@ int TestMulOP() {
} }
} }
int32_t eq = 0;
int32_t neq = 0;
for (int32_t i = 0; i < m * n; ++i) { for (int32_t i = 0; i < m * n; ++i) {
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
output_data[i] == c[i], "output[%d] = %d, output_cmp[%d] = %d", i, output_data[i] == c[i], "output[%d] = %d, output_cmp[%d] = %d", i,
static_cast<int32_t>(output_data[i]), i, static_cast<int32_t>(c[i])); static_cast<int32_t>(output_data[i]), i, static_cast<int32_t>(c[i]));
if (static_cast<int>(output_data[i] == c[i])) {
++eq;
} else {
++neq;
} }
DLOG << "Run MulOp successfully!"; }
DLOG << "mnk=" << m << " " << n << " " << k << " eq=" << eq
<< " neq=" << neq;
delete op; delete op;
return 0; return 0;
} }
......
...@@ -127,18 +127,25 @@ int main() { ...@@ -127,18 +127,25 @@ int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run MulticlassNMS Test"; DLOG << "begin to run MulticlassNMS Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd")); auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (1,3,300,300)
paddle_mobile::framework::Tensor inputx1; paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {10, 1917, 4}, static_cast<float>(0), SetupTensor<float>(&inputx1, {1, 2, 4}, static_cast<float>(0),
static_cast<float>(1)); static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>(); auto *inputx1_ptr = inputx1.data<float>();
const float x1[] = {0, 0, 100, 100, 50, 50, 150, 150};
for (int i = 0; i < 8; ++i) {
*(inputx1_ptr + i) = x1[i];
}
paddle_mobile::framework::Tensor inputx2; paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {10, 21, 1917}, static_cast<float>(0), SetupTensor<float>(&inputx2, {1, 2, 2}, static_cast<float>(0),
static_cast<float>(1)); static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>(); auto *inputx2_ptr = inputx2.data<float>();
const float x2[] = {0.4, 0.3, 0.6, 0.7};
for (int i = 0; i < 4; ++i) {
*(inputx2_ptr + i) = x2[i];
}
paddle_mobile::framework::TestMultiClassNMSOp<paddle_mobile::CPU> paddle_mobile::framework::TestMultiClassNMSOp<paddle_mobile::CPU>
testMultiClassNMSOp(program); testMultiClassNMSOp(program);
...@@ -146,8 +153,26 @@ int main() { ...@@ -146,8 +153,26 @@ int main() {
auto output = testMultiClassNMSOp.predict(inputx1, inputx2); auto output = testMultiClassNMSOp.predict(inputx1, inputx2);
auto *output_ptr = output->data<float>(); auto *output_ptr = output->data<float>();
for (int i = 0; i < output->numel(); i++) { for (int i = 0; i < output->numel(); ++i) {
DLOG << output_ptr[i]; DLOG << output_ptr[i];
} }
// test multi point
paddle_mobile::framework::Tensor inputx3;
SetupTensor<float>(&inputx3, {1, 2, 8}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx3_ptr = inputx3.data<float>();
const float x3[] = {0, 0, 100, 0, 100, 100, 0, 100,
50, 50, 150, 50, 150, 150, 50, 150};
for (int i = 0; i < 16; ++i) {
*(inputx3_ptr + i) = x3[i];
}
auto output2 = testMultiClassNMSOp.predict(inputx3, inputx2);
auto *output_ptr2 = output2->data<float>();
for (int i = 0; i < output2->numel(); ++i) {
DLOG << output_ptr2[i];
}
return 0; return 0;
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/sum_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestSumOp {
public:
explicit TestSumOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "sum" && op->Input("X")[0] == "fc_2.tmp_0") {
DLOG << " sum attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
std::shared_ptr<operators::SumOp<Dtype, float>> lrn =
std::make_shared<operators::SumOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("fc_2.tmp_0");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
Variable *x2_feed_value = scope->Var("fc_2.tmp_1");
auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
tensor_x2->ShareDataWith(t2);
Variable *output = scope->Var("fc_2.tmp_2");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({2, 96});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict_bn(t1, t2, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
void predict_bn(const Tensor &t1, const Tensor &t2, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
template class TestSumOp<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Sum Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params");
/// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {2, 96}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {2, 96}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>();
paddle_mobile::framework::TestSumOp<paddle_mobile::CPU> testSumOp(program);
auto output_sum = testSumOp.predict_bn(inputx1, inputx2);
auto *output_sum_ptr = output_sum->data<float>();
DLOG << "input1 44: " << inputx1_ptr[44];
DLOG << "input2 44: " << inputx2_ptr[44];
DLOG << "out 44 :" << output_sum_ptr[44];
return 0;
}
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr"; static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_genet_combine = "../models/enet"; static const char *g_genet_combine = "../models/enet";
static const char *g_eng = "../models/eng_20conv_1_9_fc";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_googlenetv1_combined = "../models/googlenetv1_combine"; static const char *g_googlenetv1_combined = "../models/googlenetv1_combine";
...@@ -51,6 +52,7 @@ static const char *g_test_image_1x3x224x224_banana = ...@@ -51,6 +52,7 @@ static const char *g_test_image_1x3x224x224_banana =
static const char *g_test_image_desktop_1_3_416_416_nchw_float = static const char *g_test_image_desktop_1_3_416_416_nchw_float =
"../images/in_put_1_3_416_416_2"; "../images/in_put_1_3_416_416_2";
static const char *g_hand = "../images/hand_image"; static const char *g_hand = "../images/hand_image";
static const char *g_moto = "../images/moto_300x300_float";
static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
static const char *g_imgfssd_ar1 = "../images/003_0001.txt"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
static const char *g_img = "../images/img.bin"; static const char *g_img = "../images/img.bin";
......
...@@ -33,6 +33,7 @@ if (CON GREATER -1) ...@@ -33,6 +33,7 @@ if (CON GREATER -1)
set(POOL_OP ON) set(POOL_OP ON)
set(RESHAPE_OP ON) set(RESHAPE_OP ON)
set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
...@@ -117,12 +118,9 @@ if (CON GREATER -1) ...@@ -117,12 +118,9 @@ if (CON GREATER -1)
set(POOL_OP ON) set(POOL_OP ON)
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(DROPOUT_OP ON)
set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBNRELU_OP ON)
set(FUSION_CONVBN_OP ON) set(FUSION_CONVBN_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
set(MUL_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif() endif()
...@@ -188,6 +186,8 @@ if(NOT FOUND_MATCH) ...@@ -188,6 +186,8 @@ if(NOT FOUND_MATCH)
set(CONV_OP ON) set(CONV_OP ON)
set(DEPTHWISECONV_OP ON) set(DEPTHWISECONV_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(ELEMENTWISESUB_OP ON)
set(IM2SEQUENCE_OP ON)
set(FUSION_CONVADD_OP ON) set(FUSION_CONVADD_OP ON)
set(FUSION_CONVADDPRELU_OP ON) set(FUSION_CONVADDPRELU_OP ON)
set(FUSION_CONVADDRELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
...@@ -220,6 +220,8 @@ if(NOT FOUND_MATCH) ...@@ -220,6 +220,8 @@ if(NOT FOUND_MATCH)
set(SPLIT_OP ON) set(SPLIT_OP ON)
set(FLATTEN_OP ON) set(FLATTEN_OP ON)
set(SHAPE_OP ON) set(SHAPE_OP ON)
set(ELEMENTWISEMUL_OP ON)
set(SUM_OP ON)
endif() endif()
# option(BATCHNORM_OP "" ON) # option(BATCHNORM_OP "" ON)
...@@ -261,6 +263,9 @@ endif() ...@@ -261,6 +263,9 @@ endif()
if (ELEMENTWISEADD_OP) if (ELEMENTWISEADD_OP)
add_definitions(-DELEMENTWISEADD_OP) add_definitions(-DELEMENTWISEADD_OP)
endif() endif()
if (ELEMENTWISESUB_OP)
add_definitions(-DELEMENTWISESUB_OP)
endif()
if (FUSION_CONVADD_OP) if (FUSION_CONVADD_OP)
add_definitions(-DFUSION_CONVADD_OP) add_definitions(-DFUSION_CONVADD_OP)
endif() endif()
...@@ -388,3 +393,11 @@ endif() ...@@ -388,3 +393,11 @@ endif()
if (SHAPE_OP) if (SHAPE_OP)
add_definitions(-DSHAPE_OP) add_definitions(-DSHAPE_OP)
endif() endif()
if (ELEMENTWISEMUL_OP)
add_definitions(-DELEMENTWISEMUL_OP)
endif()
if (SUM_OP)
add_definitions(-DSUM_OP)
endif()
...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0 ...@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \ grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "variant.h"); do
cpplint $file; cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done done
......
...@@ -45,13 +45,13 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): ...@@ -45,13 +45,13 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
print '------------------' print '------------------'
print bgrs_float_array[0] print bgrs_float_array[0]
print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2] print bgrs_float_array[224 * 224 * 2 + 224 * 2 + 2]
# for i in range(0, 9): # for i in range(0, 9):
# print'bs %d' % i # print'bs %d' % i
# print bs[i] / 255. # print bs[i] / 255.
print bs[416 * 2 + 2] / 255. print bs[224 * 2 + 2] / 255.
print '--------------combine_bgrs_nchw-----------------end' print '--------------combine_bgrs_nchw-----------------end'
return bgrs_float_array return bgrs_float_array
...@@ -64,6 +64,6 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): ...@@ -64,6 +64,6 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
# cv2.waitKey(0) # cv2.waitKey(0)
bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3)) bgrs = tools.resize_take_rgbs('datas/jpgs/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg', (224, 224, 3))
array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB) array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB)
tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array) tools.save_to_file('datas/desktop_1_3_224_224_nchw_float', array)
...@@ -15,11 +15,11 @@ from array import array ...@@ -15,11 +15,11 @@ from array import array
# image.resize(shape_h_w) # image.resize(shape_h_w)
data = np.fromfile('datas/img.res') data = np.fromfile('/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/imagetools/datas/jpgs2/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg.input.npfile','f')
print data.size print data.size
print data[0] print data
data.reshape(1, 3, 416, 416) data.reshape(1, 3, 224, 224)
out_array = array('f') out_array = array('f')
print'--------------------' print'--------------------'
print data.size print data.size
...@@ -27,12 +27,12 @@ print data[0] ...@@ -27,12 +27,12 @@ print data[0]
print '如果是nhwc --------' print '如果是nhwc --------'
# rgb rgb rgb rgb rgb # rgb rgb rgb rgb rgb
print data[416 * 3 * 2 + 3 * 2 + 2] print data[224 * 3 * 2 + 3 * 2 + 2]
# print data[2] # print data[2]
print '如果是nchw --------' print '如果是nchw --------'
# rgb rgb rgb rgb rgb # rgb rgb rgb rgb rgb
print data[416 * 416 * 2 + 416 * 2 + 2] print data[224 * 224 * 2 + 224 * 2 + 2]
# print data[2] # print data[2]
# 明明是nchw # 明明是nchw
...@@ -42,6 +42,8 @@ for i in range(0, data.size): ...@@ -42,6 +42,8 @@ for i in range(0, data.size):
print len(out_array) print len(out_array)
print out_array[416 * 416 * 2 + 416 * 2 + 2] print out_array[224 * 224 * 2 + 224 * 2 + 2]
# print out_array
tools.save_to_file('datas/in_put_1_3_416_416_2', out_array) tools.save_to_file('datas/in_put_1_3_224_224_nchw', out_array)
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
/yolo/datas/
/mobilenet/datas/
...@@ -5,22 +5,28 @@ layer_mdl_conv = 'ConvolutionLayer' ...@@ -5,22 +5,28 @@ layer_mdl_conv = 'ConvolutionLayer'
layer_mdl_deepwise_conv = 'DepthwiseConvolutionLayer' layer_mdl_deepwise_conv = 'DepthwiseConvolutionLayer'
layer_mdl_relu = 'ReluLayer' layer_mdl_relu = 'ReluLayer'
layer_mdl_pointwise_add = 'PointwiseConvolutionLayer' layer_mdl_pointwise_add = 'PointwiseConvolutionLayer'
layer_mdl_pooling = 'PoolingLayer'
layer_mdl_softmax = 'SoftmaxLayer'
# fluid ops # fluid ops
op_fluid_fusion_conv_add = 'fusion_conv_add' op_fluid_fusion_conv_add = 'fusion_conv_add'
op_fluid_relu = 'relu' op_fluid_relu = 'relu'
op_fluid_pooling = 'pool2d'
op_fluid_softmax = 'softmax'
# dict mdk layer --- fluid op # dict mdk layer --- fluid op
mdl2fluid_op_layer_dict = { mdl2fluid_op_layer_dict = {
layer_mdl_conv: op_fluid_fusion_conv_add, layer_mdl_conv: op_fluid_fusion_conv_add,
layer_mdl_deepwise_conv: op_fluid_fusion_conv_add, layer_mdl_deepwise_conv: op_fluid_fusion_conv_add,
layer_mdl_relu: op_fluid_relu, layer_mdl_relu: op_fluid_relu,
layer_mdl_pointwise_add: op_fluid_fusion_conv_add layer_mdl_pointwise_add: op_fluid_fusion_conv_add,
layer_mdl_pooling: op_fluid_pooling,
layer_mdl_softmax: op_fluid_softmax
} }
mdl_outputs_key = "outputs" mdl_outputs_key = "outputs"
mdl_inputs_key = "inputs" mdl_inputs_key = "inputs"
mdl_weight_key = "weights" mdl_weight_key = "weight"
mdl_attrs_key = "params" mdl_attrs_key = "params"
# dict of mdl-input _out param to fluid input out attrs # dict of mdl-input _out param to fluid input out attrs
...@@ -39,13 +45,30 @@ fusion_conv_add_dict = { ...@@ -39,13 +45,30 @@ fusion_conv_add_dict = {
relu_dict = { relu_dict = {
mdl_inputs_key: 'X', mdl_inputs_key: 'X',
mdl_outputs_key: 'Out', mdl_outputs_key: 'Out',
mdl_weight_key: () # mdl_weight_key: ()
} }
pool2d_dict = {
mdl_inputs_key: 'X',
mdl_outputs_key: 'Out',
# mdl_weight_key: (),
mdl_attrs_key: ('pooling_type', 'global_pooling')
}
softmax_dict = {
mdl_inputs_key: 'X',
mdl_outputs_key: 'Out',
mdl_weight_key: (),
mdl_attrs_key: ()
}
# mdl layers --- fluid ops # mdl layers --- fluid ops
op_io_dict = { op_io_dict = {
'fusion_conv_add': fusion_conv_add_dict, 'fusion_conv_add': fusion_conv_add_dict,
'relu': relu_dict 'relu': relu_dict,
'pool2d': pool2d_dict,
'softmax': softmax_dict
} }
# fluid attr key --- mdl params key # fluid attr key --- mdl params key
...@@ -54,70 +77,17 @@ fusion_conv_add_attrs_dict = { ...@@ -54,70 +77,17 @@ fusion_conv_add_attrs_dict = {
'strides': 'stride', 'strides': 'stride',
'groups': 'group' 'groups': 'group'
} }
# fluid attr key --- mdl params key
pool2d_attrs_dict = {
'global_pooling': 'global_pooling',
'pooling_type': 'type'
}
# fluid attr key --- mdl params key # fluid attr key --- mdl params key
fluid_attrs_type_dict = { fluid_attrs_type_dict = {
'paddings': 0, 'paddings': 0,
'strides': 6, 'strides': 6,
'groups': 6 'groups': 6
} }
# '': "bias_term", 是不是要add 目前 yolo的模型都是 bias_term = 1
# attrs {
# name: "axis"
# type: INT
# i: 1
# }
# attrs_name = {
# 'name': "workspace_size_MB",
# 'type': 'INT',
# 'i': '4096'
# }
# attrs
# {
# name: "data_format"
# type: STRING
# s: "AnyLayout"
# }
# attrs
# {
# name: "use_mkldnn"
# type: BOOLEAN
# b: false
# }
# attrs
# {
# name: "use_cudnn"
# type: BOOLEAN
# b: true
# }
# attrs
# {
# name: "dilations"
# type: INTS
# ints: 1
# ints: 1
# }
# attrs
# {
# name: "groups"
# type: INT
# i: 1
# }
# attrs
# {
# name: "paddings"
# type: INTS
# ints: 0
# ints: 0
# }
# attrs
# {
# name: "strides"
# type: INTS
# ints: 1
# ints: 1
# }
# coding=utf-8
import json
import os
from core import framework_pb2 as framework_pb2, op_types as types
from mobilenet.swicher import Swichter
import shutil
def load_mdl(mdl_json_path):
# print('mdl json path : ' + mdl_json_path)
with open(mdl_json_path, 'r') as f:
return json.load(f)
def create_if_not_exit(target_dir):
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
os.makedirs(target_dir, 0777)
class Converter:
'convert mdlmodel to fluidmodel'
def __init__(self, base_dir, mdl_json_path):
print 'base_dir: ' + base_dir
self.mdl_json_path = base_dir + mdl_json_path
self.base_dir = base_dir
print mdl_json_path
self.source_weights_dir = self.base_dir + 'datas/sourcemodels/source_weights/'
self.target_weight_dir = self.base_dir + 'datas/target/target_weights/'
create_if_not_exit(self.target_weight_dir)
self.mdl_json = load_mdl(self.mdl_json_path)
self.program_desc = framework_pb2.ProgramDesc()
self.weight_list_ = []
self.deepwise_weight_list_ = []
# print(json_dick)
# layers = (json_dick['layer'])
# for layer in layers:
# print(layer)
def convert(self):
print 'convert begin.....'
# add block_desc
block_desc = self.program_desc.blocks.add()
block_desc.idx = 0
block_desc.parent_idx = -1
self.package_ops(block_desc)
self.package_vars(block_desc)
print 'blocks: '
print self.program_desc.blocks
print 'convert end.....'
desc_serialize_to_string = self.program_desc.SerializeToString()
outputmodel_dir = self.base_dir + 'datas/target/mobilenet_classfication/'
if os.path.exists(outputmodel_dir):
shutil.rmtree(outputmodel_dir)
os.makedirs(outputmodel_dir, 0777)
if os.path.exists(outputmodel_dir):
shutil.rmtree(outputmodel_dir)
# create_if_not_exit(outputmodel_dir)
shutil.copytree(self.target_weight_dir, outputmodel_dir)
f = open(outputmodel_dir + "__model__", "wb")
f.write(desc_serialize_to_string)
f.close()
def package_ops(self, block_desc):
self.add_op_feed(block_desc)
# add ops with layer
if 'layer' in self.mdl_json:
layers_ = self.mdl_json['layer']
for layer in layers_:
if layer['type'] == 'SoftmaxLayer':
pass
else:
desc_ops_add = block_desc.ops.add()
# print layer
# for i in layer:
# print i
if 'name' in layer:
l_name = layer['name']
if 'type' in layer:
self.package_ops_type(desc_ops_add, layer)
if 'weight' in layer:
self.package_ops_weight2inputs(desc_ops_add, layer)
if 'output' in layer:
self.package_ops_outputs(desc_ops_add, layer)
if 'input' in layer:
self.package_ops_inputs(desc_ops_add, layer)
self.package_ops_attrs(desc_ops_add, layer)
self.add_op_fetch(block_desc)
def add_op_feed(self, block_desc):
desc_ops_add = block_desc.ops.add()
inputs_add = desc_ops_add.inputs.add()
inputs_add.parameter = 'X'
inputs_add.arguments.append('feed')
desc_ops_add.type = 'feed'
outputs_add = desc_ops_add.outputs.add()
outputs_add.parameter = 'Out'
outputs_add.arguments.append('data')
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'col'
# boolean
attrs_add.type = 0
attrs_add.i = 0
def add_op_fetch(self, block_desc):
desc_ops_add = block_desc.ops.add()
inputs_add = desc_ops_add.inputs.add()
inputs_add.parameter = 'X'
# todo pick last layer --> op output
inputs_add.arguments.append('fc7')
desc_ops_add.type = 'fetch'
outputs_add = desc_ops_add.outputs.add()
outputs_add.parameter = 'Out'
outputs_add.arguments.append('fetch')
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'col'
# boolean
attrs_add.type = 0
attrs_add.i = 0
@staticmethod
def package_ops_attrs(desc_ops_add, layer):
# print l_params
# print desc_ops_add.type
if desc_ops_add.type == types.op_fluid_fusion_conv_add:
Converter.pack_fusion_conv_add_attr(desc_ops_add, layer)
elif desc_ops_add.type == types.op_fluid_relu:
# fusion_conv_add : attrs
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'use_mkldnn'
# boolean
attrs_add.type = 6
attrs_add.b = 0
elif desc_ops_add.type == types.op_fluid_pooling:
Converter.pack_pooling_attr(desc_ops_add, layer)
pass
elif desc_ops_add.type == types.op_fluid_softmax:
pass
@staticmethod
def pack_pooling_attr(desc_ops_add, layer):
print layer
l_params = layer['param']
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'use_mkldnn'
# boolean
attrs_add.type = 6
attrs_add.b = 0
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'use_cudnn'
# boolean
attrs_add.type = 6
attrs_add.b = 1
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'paddings'
# ints
attrs_add.type = 3
attrs_add.ints.append(0)
attrs_add.ints.append(0)
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'strides'
# ints
attrs_add.type = 3
attrs_add.ints.append(1)
attrs_add.ints.append(1)
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'global_pooling'
# boolean
attrs_add.type = 6
attrs_add.b = (l_params[types.pool2d_attrs_dict.get('global_pooling')])
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'pooling_type'
# 2-->STRING
attrs_add.type = 2
# 注意这里 avg but mdl is ave
attrs_add.s = l_params[types.pool2d_attrs_dict.get('pooling_type')]
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'ceil_mode'
# boolean
attrs_add.type = 6
attrs_add.b = 1
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'ksize'
# ints
attrs_add.type = 3
attrs_add.ints.append(7)
attrs_add.ints.append(7)
# type: "pool2d"
# attrs
# {
# name: "use_mkldnn"
# type: BOOLEAN
# b: false
# }
# attrs
# {
# name: "ceil_mode"
# type: BOOLEAN
# b: true
# }
# attrs
# {
# name: "use_cudnn"
# type: BOOLEAN
# b: true
# }
# attrs
# {
# name: "paddings"
# type: INTS
# ints: 0
# ints: 0
# }
# attrs
# {
# name: "strides"
# type: INTS
# ints: 1
# ints: 1
# }
# attrs
# {
# name: "global_pooling"
# type: BOOLEAN
# b: false
# }
# attrs
# {
# name: "data_format"
# type: STRING
# s: "AnyLayout"
# }
# attrs
# {
# name: "ksize"
# type: INTS
# ints: 7
# ints: 7
# }
# attrs
# {
# name: "pooling_type"
# type: STRING
# s: "avg"
# }
# is_target: false
@staticmethod
def pack_fusion_conv_add_attr(desc_ops_add, layer):
# fusion_conv_add : attrs
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'workspace_size_MB'
# 0-->INT
attrs_add.type = 0
attrs_add.i = 4096
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'data_format'
# 2-->STRING
attrs_add.type = 2
attrs_add.s = 'AnyLayout'
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'use_mkldnn'
# boolean
attrs_add.type = 6
attrs_add.b = 0
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'use_cudnn'
# boolean
attrs_add.type = 6
attrs_add.b = 1
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'dilations'
# ints
attrs_add.type = 3
attrs_add.ints.append(1)
attrs_add.ints.append(1)
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'axis'
# int
attrs_add.type = 0
attrs_add.i = 1
if 'param' in layer:
l_params = layer['param']
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'paddings'
# ints
attrs_add.type = 3
attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')])
attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')])
# attrs_add = desc_ops_add.attrs.add()
# attrs_add.name = 'paddings'
# # ints
# attrs_add.type = 3
# attrs_add.ints.append(0)
# attrs_add.ints.append(0)
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'strides'
# ints
attrs_add.type = 3
attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')])
attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')])
# attrs_add = desc_ops_add.attrs.add()
# attrs_add.name = 'strides'
# # ints
# attrs_add.type = 3
# attrs_add.ints.append(6)
# attrs_add.ints.append(6)
attrs_add = desc_ops_add.attrs.add()
attrs_add.name = 'groups'
# int
attrs_add.type = 0
attrs_add.i = l_params[types.fusion_conv_add_attrs_dict.get('groups')]
# attrs_add.i = 1
#
# op_attrs_tupl = types.op_io_dict.get(desc_ops_add.type) \
# .get(types.mdl_attrs_key)
#
#
#
#
# # group stride padding
# print '----------------------'
# for i, val in enumerate(op_attrs_tupl):
# attrs_add = desc_ops_add.attrs.add()
# attr_name = op_attrs_tupl[i]
# print attr_name
# attrs_add.name = attr_name
# attrs_add.type = types.fluid_attrs_type_dict.get(attr_name)
# attrs_add.
# print l_params[types.fusion_conv_add_attrs_dict.get(attr_name)]
# for p in l_params:
# attrs_add = desc_ops_add.attrs.add()
@staticmethod
def package_ops_inputs(desc_ops_add, layer):
l_inputs = layer['input']
for i in l_inputs:
inputs_add = desc_ops_add.inputs.add()
# print i
inputs_add.parameter = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_inputs_key)
inputs_add.arguments.append(i)
@staticmethod
def package_ops_outputs(desc_ops_add, layer):
l_outputs = layer['output']
for o in l_outputs:
# print o
outputs_add = desc_ops_add.outputs.add()
dict = types.op_io_dict.get(desc_ops_add.type)
# print 'desc_ops_add.type: ' + desc_ops_add.type
# print dict
outputs_add.parameter = dict.get(types.mdl_outputs_key)
outputs_add.arguments.append(o)
def package_ops_weight2inputs(self, desc_ops_add, layer):
l_weights = layer['weight']
for w in l_weights:
self.weight_list_.append(w)
if layer['type'] == types.layer_mdl_deepwise_conv:
# print l_weights[0]
self.deepwise_weight_list_.append(l_weights[0])
op_weight_tup = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_weight_key)
if op_weight_tup is not None:
# print len(op_weight_tup)
for i, val in enumerate(op_weight_tup):
# print i
# print val
inputs_add = desc_ops_add.inputs.add()
inputs_add.parameter = op_weight_tup[i]
inputs_add.arguments.append(l_weights[i])
# for w in l_weights:
# inputs_add = desc_ops_add.inputs.add()
# # print w
# inputs_add.parameter = op_weight_tup[0]
# inputs_add.arguments.append(w)
@staticmethod
def package_ops_type(desc_ops_add, layer):
l_type = layer['type']
# print l_type
# print mdl2fluid_op_layer_dict.get(l_type)
desc_ops_add.type = types.mdl2fluid_op_layer_dict.get(l_type)
def package_vars(self, block_desc):
vars_add = block_desc.vars.add()
vars_add.name = 'feed'
vars_add.type.type = 9 # 9 is FEED_MINIBATCH
vars_add.persistable = 1
# fetch
vars_add = block_desc.vars.add()
vars_add.name = 'fetch'
vars_add.type.type = 10 # 10 is fetch list
vars_add.persistable = 1
json_matrix_ = self.mdl_json['matrix']
# print json_matrix_
for j in json_matrix_:
vars_add = block_desc.vars.add()
vars_add.name = j
vars_add.type.type = 7 # 7 is lodtensor
# print j
tensor = vars_add.type.lod_tensor.tensor
tensor.data_type = 5 # 5 is FP32
# print json_matrix_
dims_of_matrix = json_matrix_.get(j)
# dims_size = len(dims_of_matrix)
# print dims_size
# if dims_size == 4:
# tensor.dims.append(dims_of_matrix[0]) # N
# tensor.dims.append(dims_of_matrix[3]) # C
# tensor.dims.append(dims_of_matrix[1]) # H
# tensor.dims.append(dims_of_matrix[2]) # W
# else:
# issues in mdl model filter swich n and c
if j in self.deepwise_weight_list_ and len(dims_of_matrix) == 4:
print "deep wise issue fit: " + j
tensor.dims.append(dims_of_matrix[1])
tensor.dims.append(dims_of_matrix[0])
tensor.dims.append(dims_of_matrix[2])
tensor.dims.append(dims_of_matrix[3])
print tensor.dims
else:
for dims in dims_of_matrix:
# print dims
tensor.dims.append(dims)
if j in self.weight_list_:
vars_add.persistable = 1
dims_size = len(dims_of_matrix)
# print dims_size
# print 'weight name : ' + j
Swichter().copy_add_head(
self.source_weights_dir + j + '.bin',
self.target_weight_dir + j
)
# if dims_size == 4:
# # convert weight from nhwc to nchw
# Swichter().nhwc2nchw_one_slice_add_head(
# 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin',
# 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j,
# 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp',
# dims_of_matrix[0],
# dims_of_matrix[1],
# dims_of_matrix[2],
# dims_of_matrix[3]
# )
# else:
# Swichter().copy_add_head(
# 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin',
# 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j,
# 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp'
# )
else:
vars_add.persistable = 0
mdl_path = "datas/sourcemodels/source_profile/mobileNetModel.json"
base_dir = "/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/"
converter = Converter(base_dir, mdl_path)
converter.convert()
import os
import shutil
from array import array
class Swichter:
def __init__(self):
pass
def nhwc2nchw_one_slice(self, from_file_name, to_file_name, batch, channel, height, width):
from_file = open(from_file_name, "rb")
to_file = open(to_file_name, "wb")
float_array = array("f")
float_array.fromfile(from_file, width * height * batch * channel)
float_write_array = array("f")
for b in range(batch):
for c in range(channel):
for h in range(height):
for w in range(width):
float_value = float_array[b * channel * width * height
+ channel * (h * width + w) + c]
float_write_array.append(float_value)
float_write_array.tofile(to_file)
from_file.close()
to_file.close()
def copy(self, from_file_name, to_file_name):
from_file = open(from_file_name, "rb")
to_file = open(to_file_name, "wb")
to_file.write(from_file.read())
from_file.close()
to_file.close()
def nhwc2nchw_one_slice_add_head(self, from_file_name, to_file_name, tmp_file_name, batch, channel, height, width):
from_file = open(from_file_name, "rb")
tmp_file = open(tmp_file_name, "wb+")
float_array = array("f")
float_array.fromfile(from_file, width * height * batch * channel)
float_write_array = array("f")
for b in range(batch):
for c in range(channel):
for h in range(height):
for w in range(width):
float_value = float_array[b * channel * width * height
+ channel * (h * width + w) + c]
float_write_array.append(float_value)
float_write_array.tofile(tmp_file)
tmp_file.close()
from_file.close()
tmp_file = open(tmp_file_name, "rb")
to_file = open(to_file_name, "wb")
tmp = tmp_file.read()
head = self.read_head('yolo/datas/yolo/head')
to_file.write(head)
to_file.write(tmp)
tmp_file.close()
to_file.close()
def read_head(self, head_file):
from_file = open(head_file, "rb")
read = from_file.read(24)
# print read
from_file.close()
# print read
return read
def copy_add_head(self, from_file_name, to_file_name):
from_file = open(from_file_name, "rb")
to_file = open(to_file_name, "wb")
# tmp_file = open(tmp_file_name, "wb")
head = self.read_head(
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/datas/sourcemodels/head/head')
to_file.write(head)
to_file.write(from_file.read())
from_file.close()
to_file.close()
pass
def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding):
print'padding = %d' % padding
from_file = open(from_file_name, "rb")
# print len(from_file.read())
from_file.seek(padding, 0)
read = from_file.read()
print len(read)
to_file = open(to_file_name, "wb")
# tmp_file = open(tmp_file_name, "wb")
head = self.read_head('yolo/datas/yolo/head')
to_file.write(head)
to_file.write(read)
from_file.close()
to_file.close()
pass
# Swichter().nhwc2nchw_one_slice_add_head(
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nhwc/conv1_0.bin',
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw_with_head/conv1_0',
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw/.tmp',
# 32,
# 3, 3, 3)
# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/yolo/head')
# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '')
import datetime
import json import json
import os
import google.protobuf as pbg
import framework_pb2 as framework_pb2
def loadmdl(json_path): def loadmdl(json_path):
......
import os import os
import framework_pb2 as framework_pb2 from core import framework_pb2 as framework_pb2
def read_model(model_path): def read_model(model_path):
...@@ -16,7 +16,7 @@ def read_model(model_path): ...@@ -16,7 +16,7 @@ def read_model(model_path):
# print desc.blocks # print desc.blocks
except IOError: except IOError:
print ": File not found. Creating a new file." print ": File not found."
def get_file_size(file_path): def get_file_size(file_path):
...@@ -26,5 +26,5 @@ def get_file_size(file_path): ...@@ -26,5 +26,5 @@ def get_file_size(file_path):
return round(fsize, 2) return round(fsize, 2)
path = "newyolo/__model__" path = '/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/datas/sourcemodels/mobilenet_example/mobilenet/__model__'
read_model(path) read_model(path)
import json import json
import os
import framework_pb2 as framework_pb2 from core import framework_pb2 as framework_pb2, op_types as types
import op_types as types from yolo.swicher import Swichter
from swicher import Swichter
import shutil import shutil
...@@ -40,10 +38,10 @@ class Converter: ...@@ -40,10 +38,10 @@ class Converter:
print self.program_desc.blocks print self.program_desc.blocks
print 'convert end.....' print 'convert end.....'
desc_serialize_to_string = self.program_desc.SerializeToString() desc_serialize_to_string = self.program_desc.SerializeToString()
shutil.rmtree('newyolo/') shutil.rmtree('yolo/datas/newyolo/')
shutil.copytree('multiobjects/float32s_nchw_with_head', 'newyolo/') shutil.copytree('yolo/datas/multiobjects/float32s_nchw_with_head/', 'yolo/datas/newyolo/')
f = open("newyolo/__model__", "wb") f = open("yolo/datas/newyolo/__model__", "wb")
f.write(desc_serialize_to_string) f.write(desc_serialize_to_string)
f.close() f.close()
...@@ -312,9 +310,9 @@ class Converter: ...@@ -312,9 +310,9 @@ class Converter:
if dims_size == 4: if dims_size == 4:
# convert weight from nhwc to nchw # convert weight from nhwc to nchw
Swichter().nhwc2nchw_one_slice_add_head( Swichter().nhwc2nchw_one_slice_add_head(
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin', 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin',
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j, 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j,
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp', 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp',
dims_of_matrix[0], dims_of_matrix[0],
dims_of_matrix[1], dims_of_matrix[1],
dims_of_matrix[2], dims_of_matrix[2],
...@@ -322,14 +320,14 @@ class Converter: ...@@ -322,14 +320,14 @@ class Converter:
) )
else: else:
Swichter().copy_add_head( Swichter().copy_add_head(
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin', 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin',
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j, 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j,
'/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp' 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp'
) )
else: else:
vars_add.persistable = 0 vars_add.persistable = 0
mdl_path = "/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/YOLO_Universal.json" mdl_path = "yolo/datas/multiobjects/YOLO_Universal.json"
converter = Converter(mdl_path) converter = Converter(mdl_path)
converter.convert() converter.convert()
...@@ -58,7 +58,7 @@ class Swichter: ...@@ -58,7 +58,7 @@ class Swichter:
to_file = open(to_file_name, "wb") to_file = open(to_file_name, "wb")
tmp = tmp_file.read() tmp = tmp_file.read()
head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') head = self.read_head('yolo/datas/yolo/head')
to_file.write(head) to_file.write(head)
to_file.write(tmp) to_file.write(tmp)
tmp_file.close() tmp_file.close()
...@@ -77,7 +77,7 @@ class Swichter: ...@@ -77,7 +77,7 @@ class Swichter:
to_file = open(to_file_name, "wb") to_file = open(to_file_name, "wb")
# tmp_file = open(tmp_file_name, "wb") # tmp_file = open(tmp_file_name, "wb")
head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') head = self.read_head('yolo/datas/yolo/head')
to_file.write(head) to_file.write(head)
to_file.write(from_file.read()) to_file.write(from_file.read())
from_file.close() from_file.close()
...@@ -96,7 +96,7 @@ class Swichter: ...@@ -96,7 +96,7 @@ class Swichter:
to_file = open(to_file_name, "wb") to_file = open(to_file_name, "wb")
# tmp_file = open(tmp_file_name, "wb") # tmp_file = open(tmp_file_name, "wb")
head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') head = self.read_head('yolo/datas/yolo/head')
to_file.write(head) to_file.write(head)
to_file.write(read) to_file.write(read)
from_file.close() from_file.close()
...@@ -104,12 +104,12 @@ class Swichter: ...@@ -104,12 +104,12 @@ class Swichter:
pass pass
# Swichter().nhwc2nchw_one_slice_add_head( # Swichter().nhwc2nchw_one_slice_add_head(
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin', # '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nhwc/conv1_0.bin',
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0', # '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw_with_head/conv1_0',
# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp', # '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw/.tmp',
# 32, # 32,
# 3, 3, 3) # 3, 3, 3)
# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') # Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/yolo/head')
# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '') # Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '')
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册