提交 cb46b906 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into dev-latest

......@@ -92,3 +92,4 @@ metal/images/
metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
*.xcuserdatad/
*/xcuserdata/
/venv/
......@@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.0)
project(paddle-mobile)
# select the platform to build
option(CPU "armv7 with neon support" ON)
option(CPU "armv7 with neon support" OFF)
option(MALI_GPU "mali gpu support" OFF)
option(FPGA "fpga support" OFF)
option(FPGA "fpga support" ON)
option(USE_OPENMP "openmp support" OFF)
option(DEBUGING "enable debug mode" ON)
......@@ -20,6 +20,7 @@ set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}")
if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
add_compile_options(-fembed-bitcode)
else()
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
endif()
......@@ -28,7 +29,10 @@ if(DEBUGING)
message(STATUS "debugging mode")
add_definitions(-DPADDLE_MOBILE_DEBUG)
else()
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
if(FPGA)
else()
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif()
endif()
if(USE_EXCEPTION)
......@@ -92,8 +96,7 @@ else()
endif()
if(FPGA)
set(DEBUGING ON)
add_definitions(-DPADDLE_MOBILE_DEBUG)
message("FPGA mode enabled")
add_definitions(-DPADDLE_MOBILE_FPGA)
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
......@@ -176,6 +179,10 @@ if(DEBUGING)
else()
add_subdirectory(test)
endif()
elseif(FPGA)
add_subdirectory(test)
endif()
......@@ -110,7 +110,8 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平
### 开发文档
开发文档主要是关于编译、运行等问题。做为开发者,它可以和贡献文档共同结合使用。
[开发文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_doc.md)
* [iOS](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_ios.md)
* [Android](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_android.md)
### 贡献文档
- [贡献文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/CONTRIBUTING.md)
......
|mobilenet arm v7|1线程|2线程|4线程|
|------------|----|-----|-----|
|麒麟970(ms)|108.180|63.935|37.545|
|麒麟960(ms)|108.588|63.073|36.822|
|高通845(ms)|85.952|48.890|28.641|
|高通835(ms)|105.434|62.752|37.131|
|||||
|mobilenetssd arm v7|1线程|2线程|4线程|
|麒麟970(ms)|212.686|127.205|77.485|
|麒麟960(ms)|212.641|125.338|75.250|
|高通845(ms)|182.863|95.671|56.857|
|高通835(ms)|213.849|127.717|77.006|
|||||
|googlenet(v1) arm v7|1线程|2线程|4线程|
|麒麟970(ms)|335.288|234.559|161.295|
|麒麟960(ms)|354.443|232.642|157.815|
|高通845(ms)|282.007|173.146|122.148|
|高通835(ms)|341.250|233.354|158.554|
|||||
|squeezenet arm v7|1线程|2线程|4线程|
|麒麟970(ms)|83.726|57.944|36.923|
|麒麟960(ms)|85.835|55.762|36.496|
|高通845(ms)|71.301|41.618|28.785|
|高通835(ms)|82.407|56.176|36.455|
|||||
|yolo arm v7|1线程|2线程|4线程|
|麒麟970(ms)|129.658|79.993|49.969|
|麒麟960(ms)|130.208|78.791|48.390|
|高通845(ms)|109.244|61.736|40.600|
|高通835(ms)|130.402|80.863|50.359|
测试机型信息:
麒麟970:荣耀v10 (2.36GHz * 4 + 1.8GHz * 4)
麒麟960:华为mate9 (2.36GHz * 4 + 1.8GHz * 4)
骁龙835:小米6 (2.45GHz * 4 + 1.9GHz * 4)
骁龙845:OPPO FindX (2.80GHz * 4 + 1.8GHz * 4)
\ No newline at end of file
|mobilenetfssd|速度|
|------------|-----|
|A9(ms)|33.78|
|A10(ms)|24.05|
|A11(ms)|17.15|
|||
|genet|速度|
|A9(ms) |3.49|
|A10(ms)|2.54|
|A11(ms)|1.43|
\ No newline at end of file
......@@ -3,7 +3,7 @@
#### 以下是 paddle-mobile 代码的执行流程图:
![执行流程图](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305189473720.png)
![执行流程图](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/flow_chart.png)
#### 主要分为: Loader 模块、 Program 模块、 Executor 模块、 op 模块、 kernel 模块、scope variable Tensor 模块
......@@ -14,12 +14,12 @@
先来看一下模型, 模型分为两种结构:
一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件
![模型描述](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305190629577.png)
![模型描述](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc.png)
另一种为参数文件结合在一起的, 如下图, 红框内为模型结构描述的 protobuf 文件, 另一个文件为结合在一起的参数文件
![模型描述combined](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191057130.png)
![模型描述combined](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc_combined.png)
loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu).
......@@ -161,7 +161,7 @@ sh build.sh android yolo
### 五. kernel
kernel 为 op 的底层运算实现, 主要有两个函数, Init 和 Compute, 分别用来初始化、预处理 和 运算操作, 值得提出的是, kernel 会根据泛型特化到不同的平台, 如图所示:
![设备特化]![](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191401976.png)
![设备特化](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/devices.png)
不同平台的 kernel 实现, 为同一个 kernel 类不同泛型的特化实现, 目前有三个平台, arm、mali、fpga, 图中的 central-arm-func\ 目录为 op kernel 的 arm 实现, 它承担了 arm\ 目录下 kernel 的底层实现, 同时 arm 处理器作为中央处理器, central-arm-func\ 也可以作为其他协处理器的底层实现, 如: fpga 的某一个 op kernel 还没有 fpga 协处理器的实现, 就可以直接调用使用这里的 arm 实现.
......
### iOS&Android开发文档
# iOS开发文档
## 编译
```sh
# 在 paddle-mobile 目录下:
cd tools
sh build.sh ios
# 如果只想编译某个特定模型的 op, 则需执行以下命令
sh build.sh ios googlenet
# 在这个文件夹下, 你可以拿到生成的 .a 库
cd ../build/release/ios/build
```
#### 常见问题:
1. No iOS SDK's found in default search path ...
这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定,
以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
## 集成
```
将上一步生成的:
libpaddle-mobile.a
/src/ios_io/ 下的
PaddleMobile.h
```
拖入工程
#### oc 接口
接口如下:
```
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
```
# Android开发文档
用户可通过如下两种方式,交叉编译Android平台上适用的paddle-mobile库:
......
# iOS开发文档
## CPU
需要: xcode
### 编译
```sh
# 在 paddle-mobile 目录下:
cd tools
sh build.sh ios
# 如果只想编译某个特定模型的 op, 则需执行以下命令
sh build.sh ios googlenet
# 在这个文件夹下, 你可以拿到生成的 .a 库
cd ../build/release/ios/build
```
#### 常见问题:
1. No iOS SDK's found in default search path ...
这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定,
以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
### 集成
```
将上一步生成的:
libpaddle-mobile.a
/src/ios_io/ 下的
PaddleMobile.h
```
拖入工程
#### oc 接口
接口如下:
```
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
```
## GPU
需要: xcode、cocoapods
```
# 在 paddle-mobile 目录下:
cd metal
pod install
open paddle-mobile.xcworkspace
```
## Paddle-Mobile
This folder is used to develop metal version for ios gpu
需要: xcode、 cocoapods
```
pod install
open paddle-mobile.xcworkspace
```
Demo 所需依赖的模型可在[这里](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)下载
......@@ -62,6 +62,8 @@ const char *G_OP_TYPE_CRF = "crf_decoding";
const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_FLATTEN = "flatten";
const char *G_OP_TYPE_SHAPE = "shape";
const char *G_OP_TYPE_ELEMENTWISE_MUL = "elementwise_mul";
const char *G_OP_TYPE_SUM = "sum";
const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
......@@ -115,7 +117,8 @@ std::unordered_map<
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
{G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
{G_OP_TYPE_SUM, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
{G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}};
} // namespace paddle_mobile
......@@ -126,6 +126,8 @@ extern const char *G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU;
extern const char *G_OP_TYPE_SUM;
extern const char *G_OP_TYPE_ELEMENTWISE_MUL;
extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE;
......
......@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cstdlib>
#pragma once
#include <cstdlib>
#include <cstring>
#include <string>
#include "common/enforce.h"
#include "common/log.h"
#pragma once
namespace paddle_mobile {
template <int ID, typename Type>
struct IDToType {
typedef Type type_t;
......@@ -79,13 +81,13 @@ struct Variant {
template <typename T, typename... Args>
void Set(Args &&... args) {
helper::Destroy(type_id, &data.data);
new (&data.data) T(std::forward<Args>(args)...);
helper::Destroy(type_id, data.data);
new (data.data) T(std::forward<Args>(args)...);
type_id = typeid(T).hash_code();
}
void SetString(std::string &string) {
// helper::Destroy(type_id, &data);
helper::Destroy(type_id, data.data);
type_id = typeid(std::string).hash_code();
strcpy(data.data, string.c_str());
}
......@@ -109,7 +111,7 @@ struct Variant {
"stl lib with string copy)");
exit(0);
} else if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data));
return *const_cast<T *>(reinterpret_cast<const T *>(data.data));
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant");
exit(0);
......@@ -122,7 +124,8 @@ struct Variant {
static inline size_t invalid_type() { return typeid(void).hash_code(); }
typedef VariantHelper<Ts...> helper;
size_t type_id;
RawData<helper::size> data;
// todo use an anto size to suite this.
RawData<64> data;
};
template <typename T>
......
......@@ -22,7 +22,7 @@ limitations under the License. */
#include "fpga/filter.h"
#include "fpga/image.h"
#define FPGA_TEST_MODE
#define PADDLE_MOBILE_OS_LINUX
// #define PADDLE_MOBILE_OS_LINUX
namespace paddle_mobile {
namespace fpga {
......@@ -125,6 +125,7 @@ float fp16_2_fp32(half fp16_num) {
}
int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address
......@@ -144,11 +145,11 @@ int ComputeBasicConv(const struct ConvArgs &args) {
<< " stride_w:" << args.kernel.stride_w;
DLOG << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}
int ComputeFpgaConv(const struct WrapperConvArgs &args) {
int ComputeFpgaConv(const struct SplitConvArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFPGAConv===========";
DLOG << " filter_num:" << args.filter_num
......@@ -192,8 +193,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled << " const0:" << args.const0
<< " const1:" << args.const1;
DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address
<< " image0_channels:" << args.image0.channels
......@@ -381,10 +383,10 @@ void format_concat_output(framework::Tensor *out, int height, int width,
out->reset_data_ptr(data_ptr);
}
void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, int stride_w,
int padding_h, int padding_w, float *bs_ptr) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>();
......@@ -401,8 +403,8 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
arg->concat_arg.image_num = arg->split_num;
arg->concat_arg.image_out = out_ptr;
arg->concat_arg.scale_out = out->scale;
arg->concat_arg.height = (uint32_t)filter->dims()[2];
arg->concat_arg.width = (uint32_t)filter->dims()[3];
arg->concat_arg.height = (uint32_t)out->dims()[2];
arg->concat_arg.width = (uint32_t)out->dims()[3];
int n = arg->split_num;
arg->concat_arg.images_in =
......@@ -411,7 +413,6 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
(float **)fpga_malloc(n * sizeof(float *)); // NOLINT
arg->concat_arg.channel_num =
(uint32_t *)fpga_malloc(n * sizeof(uint32_t)); // NOLINT
arg->concat_arg.image_out = out_ptr;
auto channel = (int)out->dims()[1]; // NOLINT
int filter_num_per_div = get_filter_num_per_div(filter, group_num);
......
......@@ -89,7 +89,7 @@ struct ConcatArgs {
uint32_t width;
};
struct WrapperConvArgs {
struct SplitConvArgs {
uint32_t split_num;
uint32_t group_num;
uint32_t filter_num;
......@@ -98,6 +98,14 @@ struct WrapperConvArgs {
struct ConcatArgs concat_arg;
};
struct GroupConvArgs {
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct SplitConvArgs* conv_args;
struct ConcatArgs concat_arg;
};
struct PoolingArgs {
int16_t mode; // mode: 0:max, 1:avg
half kernel_reciprocal;
......@@ -159,30 +167,6 @@ struct MemoryCacheArgs {
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
enum FPGA_ERR_TYPE {
ERR_IOCTL_CMD = -1,
ERR_TIMEOUT = -2,
ERR_COMPLETION_TIMEOUT = -3,
ERR_INVALID_FPGA_ADDR = -4,
ERR_NOMEM = -5,
ERR_NO_RESERVE_MEM = -6,
ERR_COPY_FROM_USER = -7,
ERR_COPY_TO_USER = -8,
ERR_DEL_TIMER = -9,
ERR_ENABLE_MSI = -10,
ERR_REGISTER_IRQ = -11,
ERR_PCIE_REGISTER = -12,
ERR_PCIE_PROBE = -13,
ERR_REGISTER_BLOCK = -14,
ERR_ALLOC_GENDISK = -15,
ERR_INIT_QUEUE = -16,
ERR_WAIT = -17,
ERR_ECC_ERROR = -31,
ERR_FPGA_FAIL_STOP = -64,
ERR_FPGA_DEBUG_STOP = -113,
DEV_TMP_UNAVAILABLE = -128
};
//============================== API =============================
int open_device();
......@@ -195,7 +179,7 @@ int fpga_flush(void* address, size_t size);
int fpga_invalidate(void* address, size_t size);
int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct WrapperConvArgs& args);
int ComputeFpgaConv(const struct SplitConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args);
int ComputeFPGAConcat(const struct ConcatArgs& args);
......@@ -220,10 +204,10 @@ void format_bias_scale_array(float** bias_scale_array,
void format_concat_output(framework::Tensor* out, int height, int width,
int image_num, uint32_t* channel_num);
void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, int stride_w,
int padding_h, int padding_w, float* bs_ptr);
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);
half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);
......
......@@ -27,6 +27,9 @@ void align_element(float **data_in, int num_per_div_before_alignment, int num) {
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, BS_NUM_ALIGNMENT);
if (num_per_div_before_alignment == num_per_div_after_alignment) {
return;
}
int num_element =
2 * div_num * num_per_div_after_alignment; // including bias & scale
float *ptr_aligned =
......
......@@ -21,7 +21,10 @@ namespace paddle_mobile {
namespace fpga {
namespace filter {
int calc_division_capacity(int chw) { return 2048 / ((chw + 15) / 16) * 32; }
int calc_division_capacity(int chw) {
int n = 2048 / ((chw + 15) / 16) * 32;
return n < 2048 ? n : 2048;
}
int calc_split_num(int num, int division_capacity) {
return (num + division_capacity - 1) / division_capacity;
......@@ -210,12 +213,12 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * div_num;
int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT
convert_to_hwc(quantize_data, num, channel, height, width);
align_element(quantize_data, num, chw);
align_num(quantize_data, num_per_div_before_alignment, num, chw);
......
......@@ -199,6 +199,12 @@ LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA);
#ifdef MULTICLASSNMS_OP
LOAD_OP1(multiclass_nms, CPU);
#endif
#ifdef SUM_OP
LOAD_OP1(sum, CPU);
#endif
#ifdef ELEMENTWISEMUL_OP
LOAD_OP1(elementwise_mul, CPU);
#endif
#ifdef SLICE_OP
LOAD_OP2(slice, CPU, MALI_GPU);
#endif
......@@ -206,5 +212,8 @@ LOAD_OP2(slice, CPU, MALI_GPU);
LOAD_OP2(fusion_conv_bn, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn);
#endif
#ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU)
#endif
LOAD_OP1(quantize, CPU);
LOAD_OP1(dequantize, CPU);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <initializer_list>
#include <vector>
#include "framework/tensor.h"
#include "framework/tensor_util.h"
namespace paddle_mobile {
namespace framework {
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
class Vector {
public:
using value_type = T;
// Default ctor. Create empty Vector
Vector() { InitEmpty(); }
// Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T& value = T()) {
InitEmpty();
if (count != 0) {
resize(count);
T* ptr = begin();
for (size_t i = 0; i < count; ++i) {
ptr[i] = value;
}
}
}
// Ctor with init_list
Vector(std::initializer_list<T> init) {
if (init.size() == 0) {
InitEmpty();
} else {
InitByIter(init.size(), init.begin(), init.end());
}
}
// implicit cast from std::vector.
template <typename U>
Vector(const std::vector<U>& dat) { // NOLINT
if (dat.size() == 0) {
InitEmpty();
} else {
InitByIter(dat.size(), dat.begin(), dat.end());
}
}
// Copy ctor
Vector(const Vector<T>& other) { this->operator=(other); }
// Copy operator
Vector<T>& operator=(const Vector<T>& other) {
if (other.size() != 0) {
this->InitByIter(other.size(), other.begin(), other.end());
} else {
InitEmpty();
}
return *this;
}
// Move ctor
Vector(Vector<T>&& other) {
this->size_ = other.size_;
this->flag_ = other.flag_;
if (other.cuda_vec_.memory_size()) {
this->cuda_vec_.ShareDataWith(other.cuda_vec_);
}
if (other.cpu_vec_.memory_size()) {
this->cpu_vec_.ShareDataWith(other.cpu_vec_);
}
}
// CPU data access method. Mutable.
T& operator[](size_t i) {
MutableCPU();
return const_cast<T*>(cpu_vec_.data<T>())[i];
}
// CPU data access method. Immutable.
const T& operator[](size_t i) const {
// ImmutableCPU();
return cpu_vec_.data<T>()[i];
}
// std::vector iterator methods. Based on CPU data access method
size_t size() const { return size_; }
T* begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); }
T* end() {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size());
}
T& front() { return *begin(); }
T& back() {
auto it = end();
--it;
return *it;
}
const T* begin() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](0);
}
const T* end() const {
return capacity() == 0 ? &EmptyDummy() : &this->operator[](size());
}
const T* cbegin() const { return begin(); }
const T* cend() const { return end(); }
const T& back() const {
auto it = end();
--it;
return *it;
}
T* data() { return begin(); }
const T* data() const { return begin(); }
const T& front() const { return *begin(); }
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template <typename Iter>
void assign(Iter begin, Iter end) {
InitByIter(end - begin, begin, end);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void push_back(T elem) {
if (size_ + 1 > capacity()) {
reserve((size_ + 1) << 1);
}
*end() = elem;
++size_;
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template <typename It>
void Extend(It begin, It end) {
size_t pre_size = size_;
resize(pre_size + (end - begin));
T* ptr = this->begin() + pre_size;
for (; begin < end; ++begin, ++ptr) {
*ptr = *begin;
}
}
// resize the vector
void resize(size_t size) {
if (size + 1 <= capacity()) {
size_ = size;
} else {
MutableCPU();
Tensor cpu_tensor;
T* ptr = cpu_tensor.mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(size)}));
const T* old_ptr =
cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + size_, ptr);
}
size_ = size;
cpu_vec_.ShareDataWith(cpu_tensor);
}
}
// clear
void clear() {
size_ = 0;
flag_ = kDirty | kDataInCPU;
}
size_t capacity() const {
return cpu_vec_.memory_size() / SizeOfType(typeid(T));
}
// reserve data
void reserve(size_t size) {
size_t pre_size = size_;
resize(size);
resize(pre_size);
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator std::vector<T>() const {
std::vector<T> result;
result.resize(size());
std::copy(begin(), end(), result.begin());
return result;
}
bool operator==(const Vector<T>& other) const {
if (size() != other.size()) return false;
auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}
}
return true;
}
private:
void InitEmpty() {
size_ = 0;
flag_ = kDataInCPU;
}
template <typename Iter>
void InitByIter(size_t size, Iter begin, Iter end) {
T* ptr = this->cpu_vec_.template mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(size)}));
for (size_t i = 0; i < size; ++i) {
*ptr++ = *begin++;
}
flag_ = kDataInCPU | kDirty;
size_ = size;
}
enum DataFlag {
kDataInCPU = 0x01,
kDataInCUDA = 0x02,
// kDirty means the data has been changed in one device.
kDirty = 0x10
};
void MutableCPU() { flag_ = kDirty | kDataInCPU; }
void UnsetFlag(int flag) const { flag_ &= ~flag; }
void SetFlag(int flag) const { flag_ |= flag; }
static T& EmptyDummy() {
static T dummy = T();
return dummy;
}
mutable int flag_;
mutable Tensor cpu_vec_;
mutable Tensor cuda_vec_;
size_t size_;
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/selected_rows.h"
namespace paddle_mobile {
namespace framework {
struct ReAllocateVisitor {
ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims)
: tensor_(tensor), dims_(dims) {}
template <typename T>
void operator()() const {
framework::Tensor cpu_tensor;
T* ptr = cpu_tensor.mutable_data<T>(dims_);
const T* old_ptr =
tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>();
if (old_ptr != nullptr) {
std::copy(old_ptr, old_ptr + tensor_->numel(), ptr);
}
tensor_->ShareDataWith(cpu_tensor);
}
framework::Tensor* tensor_;
framework::DDim dims_;
};
// TensorCopyVisitor(value, i * value_width, *value_.get(),
// index * value_width, value_width));
struct TensorCopyVisitor {
TensorCopyVisitor(framework::Tensor* dst, int64_t dst_offset,
const framework::Tensor src, int64_t src_offset,
int64_t size)
: dst_(dst),
dst_offset_(dst_offset),
src_(src),
src_offset_(src_offset),
size_(size) {}
template <typename T>
void operator()() const {
// TODO(Yancey1989): support other place
memory::Copy(dst_->mutable_data<T>() + dst_offset_,
src_.data<T>() + src_offset_, size_ * sizeof(T));
}
framework::Tensor* dst_;
int64_t dst_offset_;
framework::Tensor src_;
int64_t src_offset_;
int64_t size_;
};
bool SelectedRows::HasKey(int64_t key) const {
return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false
: true;
}
// std::vector<int64_t> SelectedRows::Get(std::vector<int64_t> keys,
// framework::Tensor* value) const {
// PADDLE_MOBILE_ENFORCE(value->IsInitialized(),
// "The value tensor should be initialized.");
// std::vector<int64_t> non_keys;
// int64_t value_width = value_->numel() / value_->dims()[0];
// PADDLE_MOBILE_ENFORCE(value_width == value->numel() / value->dims()[0],
// "output tensor should have the same shape with table "
// "execpt the dims[0].");
//
// for (size_t i = 0; i < keys.size(); ++i) {
// int64_t index = Index(keys[i]);
// if (index == -1) {
// non_keys.push_back(keys[i]);
// } else {
// framework::VisitDataType(
// framework::ToDataType(value_->type()),
// TensorCopyVisitor(value, i * value_width, *value_.get(),
// index * value_width, value_width));
// }
// }
// return non_keys;
//}
// bool SelectedRows::Set(int64_t key, const framework::Tensor& value) {
// PADDLE_MOBILE_ENFORCE(value.IsInitialized(), "The value should be
// initialized."); if (value_->IsInitialized()) {
// PADDLE_MOBILE_ENFORCE(
// value.type() == value_->type(),
// "The type of the value should be same with the original value");
// }
// PADDLE_MOBILE_ENFORCE(value.dims()[0] == static_cast<size_t>(1),
// "The first dim of value should be 1.");
// auto index = Index(key);
// bool is_new_key = false;
// if (index == -1) {
// rows_.push_back(key);
// index = rows_.size() - 1;
// is_new_key = true;
// // whether need to resize the table
// if (static_cast<int64_t>(rows_.size()) > value_->dims()[0]) {
// auto dims = value_->dims();
// dims[0] = (dims[0] + 1) << 1;
// framework::VisitDataType(framework::ToDataType(value.type()),
// ReAllocateVisitor(value_.get(), dims));
// }
// }
//
// framework::VisitDataType(
// framework::ToDataType(value.type()),
// TensorCopyVisitor(value_.get(),
// index * value_->numel() / value_->dims()[0], value,
// static_cast<int64_t>(0), value.numel()));
// return is_new_key;
//}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "framework/lod_tensor.h"
#include "framework/mixed_vector.h"
#include "framework/tensor.h"
#include "memory/t_malloc.h"
namespace paddle_mobile {
namespace framework {
class SelectedRows {
/*
* @brief We can use the SelectedRows structure to reproduce a sparse table.
* A sparse table is a key-value structure that the key is an `int64_t`
* number,
* and the value is a Tensor which the first dimension is 0.
* You can use the following interface to operate the sparse table, and you
* can find
* some detail information from the comments of each interface:
*
* HasKey(key), whether the sparse table has the specified key.
* Set(key, value), set a key-value pair into the sparse table.
* Get(keys, value*), get value by given key list and apply it to the given
* value pointer
* with the specified offset.
*
*/
public:
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() {
height_ = 0;
value_.reset(new Tensor());
}
// platform::Place place() const { return value_->place(); }
const Tensor& value() const { return *value_; }
Tensor* mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
const Vector<int64_t>& rows() const { return rows_; }
Vector<int64_t>* mutable_rows() { return &rows_; }
void set_rows(const Vector<int64_t>& rows) { rows_ = rows; }
/*
* @brief wheter has the specified key in the table.
*
* @return true if the key is exists.
*/
bool HasKey(int64_t key) const;
/*
* @brief Get value by the key list, if the
*
* @return a list of keys which does not exists in table
*/
std::vector<int64_t> Get(std::vector<int64_t> keys,
framework::Tensor* tensor) const;
/*
* @brief Set a key-value pair into the table.
* This function will double the value memory if it's not engouth.
*
* @note:
* 1. The first dim of the value should be 1
* 2. The value should be initialized and the data type
* should be the same with the table.
*
* @return true if the key is a new one, otherwise false
*
*/
bool Set(int64_t key, const Tensor& value);
/*
* @brief Get the index of key in rows
*
* @return -1 if the key does not exists.
*/
int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) {
return static_cast<int64_t>(-1);
}
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
// Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here.
// SelectedRows are simply concated when adding together. Until a
// SelectedRows add a Tensor, will the duplicate rows be handled.
Vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
/*
* Serialize/Desiralize SelectedRows to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows);
void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows);
} // namespace framework
} // namespace paddle_mobile
......@@ -338,6 +338,8 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
for (int i = 0; i < tensor.numel(); i += stride) {
if (tensor.type() == typeid(float)) {
printer << tensor.data<float>()[i] << " ";
} else if (tensor.type() == typeid(int32_t)) {
printer << tensor.data<int32_t>()[i] << " ";
} else if (tensor.type() == typeid(int64_t)) {
printer << tensor.data<int64_t>()[i] << " ";
} else if (tensor.type() == typeid(int8_t)) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#include "operators/elementwise_mul_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void ElementwiseMulOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "kernel/elementwise_mul_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class ElementwiseMulOp : public framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>> {
public:
ElementwiseMulOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseMulParam<DeviceType>,
operators::ElementwiseMulKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#include "operators/elementwise_sub_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void ElementwiseSubOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "kernel/elementwise_sub_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class ElementwiseSubOp : public framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>> {
public:
ElementwiseSubOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseSubParam<DeviceType>,
operators::ElementwiseSubKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -12,56 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MUL_OP
#ifdef ELEMENTWISEMUL_OP
#include "operators/kernel/mul_kernel.h"
#include "operators/kernel/elementwise_mul_kernel.h"
#include "operators/kernel/central-arm-func/elementwise_mul_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool MulKernel<FPGA, float>::Init(MulParam<FPGA> *param) {
bool relu_enabled = false;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<LoDTensor *>(param->InputY());
auto out = param->Out();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
"Image channel should be equal to weight number");
int channel = (uint32_t)out->dims()[1];
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = 0;
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(),
"Filter element num should be equal to IFM element num");
int height = (uint32_t)input_x->dims()[2];
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value);
int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
param->SetFpgaArgs(conv_arg);
bool ElementwiseMulKernel<CPU, float>::Init(ElementwiseMulParam<CPU> *param) {
return true;
}
template <>
void MulKernel<FPGA, float>::Compute(const MulParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
void ElementwiseMulKernel<CPU, float>::Compute(
const ElementwiseMulParam<CPU> &param) const {
ElementwiseMulCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
}
} // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#include "operators/kernel/elementwise_sub_kernel.h"
#include "operators/kernel/central-arm-func/elementwise_sub_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ElementwiseSubKernel<CPU, float>::Init(ElementwiseSubParam<CPU> *param) {
return true;
}
template <>
void ElementwiseSubKernel<CPU, float>::Compute(
const ElementwiseSubParam<CPU> &param) const {
ElementwiseSubCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -31,6 +31,8 @@ void MulKernel<CPU, float>::Compute(const MulParam<CPU> &param) const {
param.Out()->set_lod(param.InputX()->lod());
}
template class MulKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#include "operators/kernel/sum_kernel.h"
#include "operators/kernel/central-arm-func/sum_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SumKernel<CPU, float>::Init(SumParam<CPU> *param) {
return true;
}
template <>
void SumKernel<CPU, float>::Compute(const SumParam<CPU> &param) const {
SumCompute<float>(param);
param.Out()->set_lod(param.Inputs()[0]->lod());
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct MulFunctor {
inline T operator()(T a, T b) const { return a * b; }
};
template <typename P>
void ElementwiseMulCompute(const ElementwiseMulParam<CPU> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
int axis = param.Axis();
ElementwiseComputeEx<MulFunctor<float>, float>(input_x, input_y, axis,
MulFunctor<float>(), Out);
}
template class ElementwiseMulKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct SubFunctor {
inline T operator()(T a, T b) const { return a - b; }
};
template <typename P>
void ElementwiseSubCompute(const ElementwiseSubParam<CPU> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
int axis = param.Axis();
ElementwiseComputeEx<SubFunctor<float>, float>(input_x, input_y, axis,
SubFunctor<float>(), Out);
}
template class ElementwiseSubKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -20,14 +20,12 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "framework/tensor.h"
#include "operators/math/poly_util.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
constexpr int kOutputDim = 6;
constexpr int kBBoxSize = 4;
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
......@@ -90,6 +88,21 @@ static inline T JaccardOverlap(const T* box1, const T* box2,
}
}
template <class T>
static inline T PolyIoU(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
T bbox1_area = math::PolyArea<T>(box1, box_size, normalized);
T bbox2_area = math::PolyArea<T>(box2, box_size, normalized);
T inter_area = math::PolyOverlapArea<T>(box1, box2, box_size, normalized);
if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
// If coordinate values are is invalid
// if area size <= 0, return 0.
return static_cast<T>(0.);
} else {
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename T>
static inline void NMSFast(const framework::Tensor& bbox,
const framework::Tensor& scores,
......@@ -116,8 +129,14 @@ static inline void NMSFast(const framework::Tensor& bbox,
for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
T overlap = T(0.);
if (box_size == 4) {
overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true);
} else {
overlap = PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, box_size, true);
}
keep = overlap <= adaptive_threshold;
} else {
break;
......@@ -190,6 +209,8 @@ void MultiClassOutput(const framework::Tensor& scores,
const std::map<int, std::vector<int>>& selected_indices,
framework::Tensor* outs) {
int predict_dim = scores.dims()[1];
int box_size = bboxes.dims()[1];
int out_dim = bboxes.dims()[1] + 2;
auto* scores_data = scores.data<T>();
auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>();
......@@ -202,11 +223,11 @@ void MultiClassOutput(const framework::Tensor& scores,
const std::vector<int>& indices = it.second;
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize;
odata[count * kOutputDim] = label; // label
odata[count * kOutputDim + 1] = sdata[idx]; // score
const T* bdata = bboxes_data + idx * box_size;
odata[count * out_dim] = label; // label
odata[count * out_dim + 1] = sdata[idx]; // score
// xmin, ymin, xmax, ymax
std::memcpy(odata + count * kOutputDim + 2, bdata, 4 * sizeof(T));
std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T));
count++;
}
}
......@@ -256,7 +277,8 @@ void MultiClassNMSCompute(const MultiClassNMSParam<CPU>& param) {
float* od = outs->mutable_data<float>({1});
od[0] = -1;
} else {
outs->mutable_data<float>({num_kept, kOutputDim});
int64_t out_dim = box_dim + 2;
outs->mutable_data<float>({num_kept, out_dim});
for (int64_t i = 0; i < batch_size; ++i) {
framework::Tensor ins_score = input_scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim});
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include <vector>
#include "operators/math/selected_rows_functor.h"
namespace paddle_mobile {
namespace operators {
using LoDTensorArray = std::vector<LoDTensor>;
template <typename P>
void SumCompute(const SumParam<CPU> &param) {
auto inputsvars = param.InputsVars();
int N = inputsvars.size();
auto *outvar = param.OutVar();
bool in_place = outvar == inputsvars[0];
if (outvar->IsType<framework::LoDTensor>()) {
auto *out = outvar->GetMutable<LoDTensor>();
if (!in_place) {
out->mutable_data<float>();
}
auto *outptr = out->data<float>();
// auto result = Flatten(*out);
if (!in_place) {
std::fill(out->data<float>(), out->data<float>() + out->numel(), 0);
}
math::SelectedRowsAddToTensor<float> functor;
for (int i = in_place ? 1 : 0; i < N; i++) {
if (inputsvars[i]->IsType<framework::LoDTensor>()) {
auto *in_t = inputsvars[i]->Get<framework::LoDTensor>();
auto *inptr = in_t->data<float>();
if (in_t->numel() == 0) {
continue;
}
for (int j = 0; j < out->numel(); ++j) {
outptr[j] = outptr[j] + inptr[j];
}
} else if (inputsvars[i]->IsType<framework::SelectedRows>()) {
auto *in_t = inputsvars[i]->Get<framework::SelectedRows>();
functor(*in_t, out);
} else {
PADDLE_MOBILE_THROW_EXCEPTION(
"Variable type must be LoDTensor/SelectedRows.");
}
}
} else if (outvar->IsType<framework::SelectedRows>()) {
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto *in_sel0 = inputsvars[0]->Get<framework::SelectedRows>();
auto &rows = in_sel0->rows();
in0.reset(new framework::SelectedRows(rows, in_sel0->height()));
in0->mutable_value()->ShareDataWith(in_sel0->value());
}
auto get_selected_row = [&](size_t i) -> const framework::SelectedRows & {
if (i == 0 && in0) {
return *in0.get();
} else {
return *(inputsvars[i]->Get<framework::SelectedRows>());
}
};
auto *out = outvar->GetMutable<framework::SelectedRows>();
out->mutable_rows()->clear();
auto *out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto &sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<float>();
math::SelectedRowsAddTo<float> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto &sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_MOBILE_ENFORCE(out->height() == sel_row.height(),
"seletrows height != outheight");
functor(sel_row, offset, out);
offset += sel_row.value().numel();
}
} else if (outvar->IsType<LoDTensorArray>()) {
auto &out_array = *outvar->GetMutable<LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < inputsvars.size(); ++i) {
PADDLE_MOBILE_ENFORCE(inputsvars[i]->IsType<LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto *in_array = inputsvars[i]->Get<LoDTensorArray>();
for (size_t i = 0; i < in_array->size(); ++i) {
if ((*in_array)[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy((*in_array)[i], &out_array[i]);
out_array[i].set_lod((*in_array)[i].lod());
} else {
PADDLE_MOBILE_ENFORCE(out_array[i].lod() == (*in_array)[i].lod(),
"outLod != inLod");
auto *inptr = (*in_array)[i].data<float>();
auto *outptr = out_array[i].data<float>();
for (int j = 0; j < (*in_array)[i].numel(); ++j) {
outptr[j] = inptr[j] + outptr[j];
}
}
}
}
}
} else {
PADDLE_MOBILE_THROW_EXCEPTION(
"Unexpected branch, output variable type is %s", outvar->Type().name());
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#pragma once
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class ElementwiseMulKernel
: public framework::OpKernelBase<DeviceType,
ElementwiseMulParam<DeviceType>> {
public:
void Compute(const ElementwiseMulParam<DeviceType> &param) const;
bool Init(ElementwiseMulParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class ElementwiseSubKernel
: public framework::OpKernelBase<DeviceType,
ElementwiseSubParam<DeviceType>> {
public:
void Compute(const ElementwiseSubParam<DeviceType> &param) const;
bool Init(ElementwiseSubParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -66,10 +66,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
......
......@@ -65,10 +65,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -47,10 +47,11 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -59,10 +59,11 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -59,10 +59,11 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -44,6 +44,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value);
......@@ -52,9 +53,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -45,6 +45,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value);
......@@ -53,9 +54,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -27,7 +27,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto float_input = new Tensor;
float_input->mutable_data<float>(input->dims());
float_input->mutable_data<float>({1, input->dims()[1]});
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
......@@ -56,7 +56,6 @@ void SoftmaxKernel<FPGA, float>::Compute(
fpga::fpga_invalidate(
(void *)in_x->data<float>(), // NOLINT
fpga::get_align_image_cw(in_x->dims()[1]) * sizeof(float));
math::SoftmaxFuntor<CPU, float>()(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size());
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class SumKernel
: public framework::OpKernelBase<DeviceType, SumParam<DeviceType>> {
public:
void Compute(const SumParam<DeviceType> &param) const;
bool Init(SumParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -1667,7 +1667,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
const int w_times = (out_w - 2) / 3;
float32x4_t zero = vdupq_n_f32(0.0);
for (int b = batch_size; b > 0; --b) {
#pragma omp parallel for
#pragma omp parallel for
for (int j = 0; j < c; j++) {
const float *input_row_ptr;
float *output_row_ptr;
......@@ -1912,9 +1912,7 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
float w20 = filter_data[6];
float w21 = filter_data[7];
float w22 = filter_data[8];
float32x4_t biasv = vld1q_dup_f32(bias_data);
for (int i = 0; i < output_height; i += 1) {
for (int m = 0; m < output_width - 2; m += 3) {
float *output_ptr = output_data + i * output_width + m;
......@@ -1949,8 +1947,9 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
out0 = vmlaq_n_f32(out0, in4, w20);
out0 = vmlaq_n_f32(out0, tmp4, w21);
out0 = vmlaq_n_f32(out0, tmp5, w22);
out0 = vaddq_f32(out0, biasv);
if (if_bias) {
out0 = vaddq_f32(out0, biasv);
}
vst1q_lane_f32(output_ptr, out0, 0);
vst1q_lane_f32(output_ptr + 1, out0, 1);
vst1q_lane_f32(output_ptr + 2, out0, 2);
......@@ -1960,16 +1959,18 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
}
for (int j = m; j < output_width; j++) {
output_data[i * output_width + j] =
input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 +
input_data[(2 * i - 1) * input_width + 2 * j] * w01 +
input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 +
input_data[(2 * i) * input_width + 2 * j - 1] * w10 +
input_data[(2 * i) * input_width + 2 * j] * w11 +
input_data[(2 * i) * input_width + 2 * j + 1] * w12 +
input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 +
input_data[(2 * i + 1) * input_width + 2 * j] * w21 +
input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22;
output_data[i * output_width + j] += *bias_data;
input_data[(2 * i) * input_width + 2 * j] * w00 +
input_data[(2 * i) * input_width + 2 * j + 1] * w01 +
input_data[(2 * i) * input_width + 2 * j + 2] * w02 +
input_data[(2 * i + 1) * input_width + 2 * j] * w10 +
input_data[(2 * i + 1) * input_width + 2 * j + 1] * w11 +
input_data[(2 * i + 1) * input_width + 2 * j + 2] * w12 +
input_data[(2 * i + 2) * input_width + 2 * j] * w20 +
input_data[(2 * i + 2) * input_width + 2 * j + 1] * w21 +
input_data[(2 * i + 2) * input_width + 2 * j + 2] * w22;
if (if_bias) {
output_data[i * output_width + j] += *bias_data;
}
}
}
}
......
......@@ -187,29 +187,29 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
const float *B, int ldb, float *C, int ldc, float *p,
std::string mode, float *bias, float *bias1);
/************************ 8 bit function cluster ************************/
// 8 bit int small block inner product
// 8 bits function cluster begins
// 8 bits int small block inner product
void AddDot6x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
int32_t ldc);
// 8 bit int inner product
// 8 bits int inner product
void InnerKernelWithBias(int32_t mc, int32_t nc, int8_t alpha,
const int8_t *a, const int8_t *b, int8_t beta,
int32_t *c, int32_t *C, int32_t ldc, bool relu,
int8_t *bias);
// 8 bit int pack function
// 8 bits int pack function
void PackMatrixA_6r(int32_t m, int32_t k, int32_t m_tail, const int8_t *A,
int32_t lda, int8_t *buffer);
void PackMatrixB_8c(int32_t k, int32_t n, int32_t n_tail, const int8_t *B,
int32_t ldb, int8_t *buffer);
// 8 bit int matrix product
// 8 bits int matrix product
void Sgemm(int32_t m, int32_t n, int32_t k, int8_t alpha, const int8_t *A,
int32_t lda, const int8_t *B, int32_t ldb, int8_t beta, int32_t *C,
int32_t ldc, bool relu, int8_t *bias);
// 8 bit int write back
// 8 bits int write back
// C = alpha * A * B + beta * C
void WriteWithAlphaBeta(int32_t mc, int32_t nc, int32_t *c, int32_t *C,
int32_t ldc);
......@@ -239,7 +239,7 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
float *packedC;
float *zero;
// 8 bit int
// 8 bits int
int8_t *packedA_int8;
int8_t *packedB_int8;
int32_t *packedC_int8;
......
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#pragma once
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
namespace gpc {
typedef enum { // Set operation type
GPC_DIFF, // Difference
GPC_INT, // Intersection
GPC_XOR, // Exclusive or
GPC_UNION // Union
} gpc_op;
typedef struct { // Polygon vertex structure
double x; // Vertex x component
double y; // vertex y component
} gpc_vertex;
typedef struct { // Vertex list structure
int num_vertices; // Number of vertices in list
gpc_vertex *vertex; // Vertex array pointer
} gpc_vertex_list;
typedef struct { // Polygon set structure
int num_contours; // Number of contours in polygon
int *hole; // Hole external contour flags
gpc_vertex_list *contour; // Contour array pointer
} gpc_polygon;
typedef struct { // Tristrip set structure
int num_strips; // Number of tristrips
gpc_vertex_list *strip; // Tristrip array pointer
} gpc_tristrip;
typedef enum { LEFT, RIGHT } gpc_left_right;
typedef enum { ABOVE, BELOW } gpc_above_below;
typedef enum { CLIP, SUBJ } gpc_clip_subj;
typedef enum { /* Edge intersection classes */
NUL, /* Empty non-intersection */
EMX, /* External maximum */
ELI, /* External left intermediate */
TED, /* Top edge */
ERI, /* External right intermediate */
RED, /* Right edge */
IMM, /* Internal maximum and minimum */
IMN, /* Internal minimum */
EMN, /* External minimum */
EMM, /* External maximum and minimum */
LED, /* Left edge */
ILI, /* Internal left intermediate */
BED, /* Bottom edge */
IRI, /* Internal right intermediate */
IMX, /* Internal maximum */
FUL /* Full non-intersection */
} vertex_type;
typedef enum { /* Horizontal edge states */
NH, /* No horizontal edge */
BH, /* Bottom horizontal edge */
TH /* Top horizontal edge */
} h_state;
typedef enum { /* Edge bundle state */
UNBUNDLED, /* Isolated edge not within a bundle */
BUNDLE_HEAD, /* Bundle head node */
BUNDLE_TAIL /* Passive bundle tail node */
} bundle_state;
typedef struct v_shape { /* Internal vertex list datatype */
double x; /* X coordinate component */
double y; /* Y coordinate component */
struct v_shape *next; /* Pointer to next vertex in list */
} vertex_node;
typedef struct p_shape { /* Internal contour / tristrip type */
int active; /* Active flag / vertex count */
int hole; /* Hole / external contour flag */
vertex_node *v[2]; /* Left and right vertex list ptrs */
struct p_shape *next; /* Pointer to next polygon contour */
struct p_shape *proxy; /* Pointer to actual structure used */
} polygon_node;
typedef struct edge_shape {
gpc_vertex vertex; /* Piggy-backed contour vertex data */
gpc_vertex bot; /* Edge lower (x, y) coordinate */
gpc_vertex top; /* Edge upper (x, y) coordinate */
double xb; /* Scanbeam bottom x coordinate */
double xt; /* Scanbeam top x coordinate */
double dx; /* Change in x for a unit y increase */
int type; /* Clip / subject edge flag */
int bundle[2][2]; /* Bundle edge flags */
int bside[2]; /* Bundle left / right indicators */
bundle_state bstate[2]; /* Edge bundle state */
polygon_node *outp[2]; /* Output polygon / tristrip pointer */
struct edge_shape *prev; /* Previous edge in the AET */
struct edge_shape *next; /* Next edge in the AET */
struct edge_shape *pred; /* Edge connected at the lower end */
struct edge_shape *succ; /* Edge connected at the upper end */
struct edge_shape *next_bound; /* Pointer to next bound in LMT */
} edge_node;
inline bool gpc_eq(float a, float b) { return (fabs(a - b) <= 1e-6); }
inline bool gpc_prev_index(float a, float b) { return (fabs(a - b) <= 1e-6); }
inline int gpc_prev_index(int i, int n) { return ((i - 1 + n) % n); }
inline int gpc_next_index(int i, int n) { return ((i + 1) % n); }
inline int gpc_optimal(gpc_vertex *v, int i, int n) {
return (v[(i + 1) % n].y != v[i].y || v[(i - 1 + n) % n].y != v[i].y);
}
inline int gpc_fwd_min(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y > v[i].vertex.y &&
v[(i - 1 + n) % n].vertex.y >= v[i].vertex.y);
}
inline int gpc_not_fmax(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y > v[i].vertex.y);
}
inline int gpc_rev_min(edge_node *v, int i, int n) {
return (v[(i + 1) % n].vertex.y >= v[i].vertex.y &&
v[(i - 1 + n) % n].vertex.y > v[i].vertex.y);
}
inline int gpc_not_rmax(edge_node *v, int i, int n) {
return (v[(i - 1 + n) % n].vertex.y > v[i].vertex.y);
}
// inline void gpc_p_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline void gpc_p_edge(edge_node *d, edge_node *e, int p) {
d = e;
do {
d = d->prev;
} while (!d->outp[p]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
// inline void gpc_n_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline void gpc_n_edge(edge_node *d, edge_node *e, int p) {
d = e;
do {
d = d->next;
} while (!d->outp[p]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
template <typename T>
void gpc_malloc(T *&p, int b, char *s) { // NOLINT
if (b > 0) {
p = reinterpret_cast<T *>(malloc(b));
if (!p) {
fprintf(stderr, "gpc malloc failure: %s\n", s);
exit(0);
}
} else {
p = NULL;
}
}
template <typename T>
void gpc_free(T *&p) { // NOLINT
if (p) {
free(p);
p = NULL;
}
}
/*
===========================================================================
Public Function Prototypes
===========================================================================
*/
void add_vertex(vertex_node **t, double x, double y);
void gpc_vertex_create(edge_node *e, int p, int s, double x, double y);
void gpc_add_contour(gpc_polygon *polygon, gpc_vertex_list *contour, int hole);
void gpc_polygon_clip(gpc_op set_operation, gpc_polygon *subject_polygon,
gpc_polygon *clip_polygon, gpc_polygon *result_polygon);
void gpc_tristrip_clip(gpc_op set_operation, gpc_polygon *subject_polygon,
gpc_polygon *clip_polygon,
gpc_tristrip *result_tristrip);
void gpc_polygon_to_tristrip(gpc_polygon *polygon, gpc_tristrip *tristrip);
void gpc_free_polygon(gpc_polygon *polygon);
void gpc_free_tristrip(gpc_tristrip *tristrip);
} // namespace gpc
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#include "operators/math/poly_util.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <class T>
void Array2PointVec(const T* box, const size_t box_size,
std::vector<Point_<T>>* vec) {
size_t pts_num = box_size / 2;
vec->resize(pts_num);
for (size_t i = 0; i < pts_num; i++) {
vec->at(i).x = box[2 * i];
vec->at(i).y = box[2 * i + 1];
}
}
template <class T>
void Array2Poly(const T* box, const size_t box_size, gpc::gpc_polygon* poly) {
size_t pts_num = box_size / 2;
poly->num_contours = 1;
poly->hole = reinterpret_cast<int*>(malloc(sizeof(int)));
poly->hole[0] = 0;
poly->contour = (gpc::gpc_vertex_list*)malloc(sizeof(gpc::gpc_vertex_list));
poly->contour->num_vertices = pts_num;
poly->contour->vertex =
(gpc::gpc_vertex*)malloc(sizeof(gpc::gpc_vertex) * pts_num);
for (size_t i = 0; i < pts_num; ++i) {
poly->contour->vertex[i].x = box[2 * i];
poly->contour->vertex[i].y = box[2 * i + 1];
}
}
template void Array2Poly(const float* box, const size_t box_size,
gpc::gpc_polygon* poly);
template <class T>
void Poly2PointVec(const gpc::gpc_vertex_list& contour,
std::vector<Point_<T>>* vec) {
int pts_num = contour.num_vertices;
vec->resize(pts_num);
for (size_t i = 0; i < pts_num; i++) {
vec->at(i).x = contour.vertex[i].x;
vec->at(i).y = contour.vertex[i].y;
}
}
template <class T>
T GetContourArea(const std::vector<Point_<T>>& vec) {
int pts_num = vec.size();
if (pts_num < 3) return T(0.);
T area = T(0.);
for (size_t i = 0; i < pts_num; ++i) {
area += vec[i].x * vec[(i + 1) % pts_num].y -
vec[i].y * vec[(i + 1) % pts_num].x;
}
return fabs(area / 2.0);
}
template <class T>
T PolyArea(const T* box, const size_t box_size, const bool normalized) {
// If coordinate values are is invalid
// if area size <= 0, return 0.
std::vector<Point_<T>> vec;
Array2PointVec<T>(box, box_size, &vec);
return GetContourArea<T>(vec);
}
template float PolyArea(const float* box, const size_t box_size,
const bool normalized);
template <class T>
T PolyOverlapArea(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
gpc::gpc_polygon poly1;
gpc::gpc_polygon poly2;
Array2Poly<T>(box1, box_size, &poly1);
Array2Poly<T>(box2, box_size, &poly2);
gpc::gpc_polygon respoly;
gpc::gpc_op op = gpc::GPC_INT;
gpc::gpc_polygon_clip(op, &poly2, &poly1, &respoly);
T inter_area = T(0.);
int contour_num = respoly.num_contours;
for (int i = 0; i < contour_num; ++i) {
std::vector<Point_<T>> resvec;
Poly2PointVec<T>(respoly.contour[i], &resvec);
inter_area += GetContourArea<T>(resvec);
}
gpc::gpc_free_polygon(&poly1);
gpc::gpc_free_polygon(&poly2);
gpc::gpc_free_polygon(&respoly);
return inter_area;
}
template float PolyOverlapArea(const float* box1, const float* box2,
const size_t box_size, const bool normalized);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MULTICLASSNMS_OP
#pragma once
#include <vector>
#include "operators/math/gpc.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <class T>
class Point_ {
public:
// default constructor
Point_() {}
Point_(T _x, T _y) {}
Point_(const Point_& pt) {}
Point_& operator=(const Point_& pt);
// conversion to another data type
// template<typename _T> operator Point_<_T>() const;
// conversion to the old-style C structures
// operator Vec<T, 2>() const;
// checks whether the point is inside the specified rectangle
// bool inside(const Rect_<T>& r) const;
T x; //!< x coordinate of the point
T y; //!< y coordinate of the point
};
template <class T>
void Array2PointVec(const T* box, const size_t box_size,
std::vector<Point_<T>>* vec);
template <class T>
void Array2Poly(const T* box, const size_t box_size, gpc::gpc_polygon* poly);
template <class T>
void Poly2PointVec(const gpc::gpc_vertex_list& contour,
std::vector<Point_<T>>* vec);
template <class T>
T GetContourArea(const std::vector<Point_<T>>& vec);
template <class T>
T PolyArea(const T* box, const size_t box_size, const bool normalized);
template <class T>
T PolyOverlapArea(const T* box1, const T* box2, const size_t box_size,
const bool normalized);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/selected_rows.h"
#define INLINE_FOR2(sizei, sizej) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t j = 0; j < sizej; j++)
namespace paddle_mobile {
namespace operators {
namespace math {
// SelectedRows + SelectedRows will simplely concat value and rows.
// The real computation happens in dealing with LoDTensor.
// template <typename T>
// struct SelectedRowsAdd {
// void operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2,
// framework::SelectedRows* output);
//};
//
// template <typename T>
// struct SelectedRowsAddTensor {
// void operator()(
// const framework::SelectedRows& input1,
// const framework::Tensor& input2, framework::Tensor* output);
//};
// input2 = input1 + input2
template <typename T>
struct SelectedRowsAddTo {
void operator()(const framework::SelectedRows& input1,
const int64_t input2_offset,
framework::SelectedRows* input2) {
auto in1_height = input1.height();
PADDLE_MOBILE_ENFORCE(in1_height == input2->height(), "height error");
auto& in1_rows = input1.rows();
auto& in2_rows = *(input2->mutable_rows());
auto& in1_value = input1.value();
auto* in2_value = input2->mutable_value();
// concat rows
in2_rows.Extend(in1_rows.begin(), in1_rows.end());
// auto in1_place = input1.place();
// PADDLE_ENFORCE(platform::is_cpu_place(in1_place));
// auto in2_place = input2->place();
// PADDLE_ENFORCE(platform::is_cpu_place(in2_place));
auto* in1_data = in1_value.data<T>();
auto* in2_data = in2_value->data<T>();
memory::Copy(in2_data + input2_offset, in1_data,
in1_value.numel() * sizeof(T));
}
};
// input2 = input1 + input2
template <typename T>
struct SelectedRowsAddToTensor {
void operator()(const framework::SelectedRows& input1,
framework::Tensor* input2) {
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0], "height != dims[0]");
auto& in1_value = input1.value();
auto& in1_rows = input1.rows();
int64_t in1_row_numel = in1_value.numel() / in1_rows.size();
PADDLE_MOBILE_ENFORCE(in1_row_numel == input2->numel() / in1_height,
"row_numel error");
auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
input2_data[in1_rows[i] * in1_row_numel + j] +=
in1_data[i * in1_row_numel + j];
}
}
}
};
// namespace scatter {
//// functors for manuplating SelectedRows data
// template <typename T>
// struct MergeAdd {
// // unary functor, merge by adding duplicated rows in
// // the input SelectedRows object.
// framework::SelectedRows operator()(
// const framework::SelectedRows& input);
//};
// template <typename T>
// struct Add {
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims(),
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
// e_out.device(*context.eigen_device()) = e_in1 + e_in2;
// return out;
// }
//};
// template <typename T>
// struct Mul {
// // multiply two SelectedRows
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const framework::SelectedRows& input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims()
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// auto e_in2 = framework::EigenVector<T>::Flatten(input2.value());
// e_out.device(*context.eigen_device()) = e_in1 * e_in2;
// return out;
// }
// // multiply scalar to SelectedRows
// framework::SelectedRows operator()(
// const framework::SelectedRows& input1,
// const T input2) {
// framework::SelectedRows out;
// out.set_rows(input1.rows());
// out.set_height(input1.height());
// out.mutable_value()->mutable_data<T>(input1.value().dims(),
// );
// auto e_out = framework::EigenVector<T>::Flatten(*(out.mutable_value()));
// auto e_in1 = framework::EigenVector<T>::Flatten(input1.value());
// e_out.device(*context.eigen_device()) = input2 * e_in1;
// return out;
// }
//};
enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY };
// out = seleted_rows_in / tensor
template <typename T>
struct UpdateToTensor {
void operator()(const ScatterOps& op, const framework::SelectedRows& input1,
framework::Tensor* input2);
};
// namespace scatter
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -25,14 +25,15 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
if (input_scores_dims.size() != 3) {
LOG(kLOG_ERROR) << "Input Scores size must be 3";
}
if (input_bboxes_dims[2] != 4) {
LOG(kLOG_ERROR) << "Input BBoxes 2nd dimension must be 4";
if (input_bboxes_dims[2] % 4 != 0 || input_bboxes_dims[2] < 4) {
LOG(kLOG_ERROR) << "Input BBoxes 2nd dimension must be multiples of 4";
}
if (input_bboxes_dims[1] != input_scores_dims[2]) {
LOG(kLOG_ERROR) << "Predict bboxes must be equal";
}
// pre size, will change in Compute.
this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6}));
this->param_.Out()->Resize(
framework::make_ddim({input_bboxes_dims[1], input_bboxes_dims[2] + 2}));
}
} // namespace operators
......
......@@ -35,6 +35,7 @@ using framework::AttributeMap;
using framework::LoDTensor;
using framework::Scope;
using framework::Tensor;
using framework::Variable;
using std::string;
using std::vector;
......@@ -182,6 +183,11 @@ class OpParam {
return GetMultiVarValue<T>("X", inputs, scope);
}
static vector<Variable *> InputMultiVarsFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetMultiVar("X", inputs, scope);
}
template <typename T>
static T *OutputBatchGateFrom(const VariableNameMap &outputs,
const Scope &scope) {
......@@ -216,6 +222,11 @@ class OpParam {
return GetVarValue<T>("Output", outputs, scope);
}
static Variable *OutVarFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVar("Out", outputs, scope);
}
template <typename T>
static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<T>("Out", outputs, scope);
......@@ -286,6 +297,19 @@ class OpParam {
}
}
static Variable *GetVar(const string &key, const VariableNameMap &var_map,
const Scope &scope) {
PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0,
"%s is not contained in var_map", key.c_str())
auto var_vec = var_map.at(key);
if (!var_vec.empty()) {
auto var = scope.FindVar(var_vec[0]);
return var;
} else {
return nullptr;
}
}
static std::string getkey(const string &key, const VariableNameMap &var_map,
int index) {
auto var_vec = var_map.at(key);
......@@ -319,6 +343,19 @@ class OpParam {
}
return var_res;
}
static vector<Variable *> GetMultiVar(const string &key,
const VariableNameMap &var_map,
const Scope &scope) {
auto var_vecs = var_map.at(key);
assert(var_vecs.size() > 1);
vector<Variable *> var_res;
for (auto &var_vec : var_vecs) {
auto var = scope.FindVar(var_vec);
var_res.push_back(var);
}
return var_res;
}
};
template <typename Dtype>
......@@ -405,11 +442,75 @@ class ElementwiseAddParam : OpParam {
#endif
};
#ifdef ELEMENTWISEMUL_OP
template <typename Dtype>
class ElementwiseMulParam : OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ElementwiseMulParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_y_ = InputYFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
const GType *InputX() const { return input_x_; }
const GType *InputY() const { return input_y_; }
GType *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
GType *input_x_;
GType *input_y_;
GType *out_;
int axis_;
};
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
template <typename Dtype>
using ElementwiseAddReluParam = ElementwiseAddParam<Dtype>;
#endif
#ifdef ELEMENTWISESUB_OP
template <typename Dtype>
class ElementwiseSubParam : OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ElementwiseSubParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_y_ = InputYFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
const GType *InputX() const { return input_x_; }
const GType *InputY() const { return input_y_; }
GType *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
GType *input_x_;
GType *input_y_;
GType *out_;
int axis_;
};
#endif
#ifdef MUL_OP
template <typename Dtype>
class MulParam : OpParam {
......@@ -445,11 +546,11 @@ class MulParam : OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -490,6 +591,37 @@ class ConcatParam : public OpParam {
};
#endif
#ifdef SUM_OP
template <typename Dtype>
class SumParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
SumParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
inputs_vars_ = InputMultiVarsFrom(inputs, scope);
out_var_ = OutVarFrom(outputs, scope);
inputs_ = InputMultiFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
}
vector<Variable *> InputsVars() const { return inputs_vars_; }
Variable *OutVar() const { return out_var_; }
vector<GType *> Inputs() const { return inputs_; }
GType *Out() const { return out_; }
private:
vector<Variable *> inputs_vars_;
Variable *out_var_;
vector<GType *> inputs_;
GType *out_;
};
#endif
#ifdef LRN_OP
template <typename Dtype>
class LrnParam : public OpParam {
......@@ -1269,11 +1401,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
......@@ -1309,11 +1441,11 @@ class FusionConvAddParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
......@@ -1364,11 +1496,11 @@ class FusionConvAddPReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1422,11 +1554,11 @@ class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1497,11 +1629,11 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1583,11 +1715,11 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1650,11 +1782,11 @@ class FusionConvBNParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1725,11 +1857,11 @@ class FusionConvAddBNParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1851,11 +1983,11 @@ class FusionConvBNReluParam : public ConvParam<Dtype> {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::WrapperConvArgs fpga_conv_args;
fpga::SplitConvArgs fpga_conv_args;
public:
const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; }
const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#include <vector>
#include "operators/sum_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void SumOp<Dtype, T>::InferShape() const {
auto inputs = this->param_.Inputs();
const size_t n = inputs.size();
std::vector<framework::DDim> inputs_dims;
inputs_dims.reserve(n);
for (int i = 0; i < n; i++) {
inputs_dims.push_back(inputs[i]->dims());
}
if (n == 1) {
DLOG << "Warning: sum op have only one input, "
"may waste memory";
}
framework::DDim in_dim({0});
for (auto& x_dim : inputs_dims) {
if (framework::product(x_dim) == 0) {
continue;
}
if (framework::product(in_dim) == 0) {
in_dim = x_dim;
} else {
PADDLE_MOBILE_ENFORCE(in_dim == x_dim,
"input tensors must have same shape");
}
}
this->param_.Out()->Resize(in_dim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(sum, ops::SumOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SUM_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/sum_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class SumOp : public framework::OperatorWithKernel<
DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>> {
public:
SumOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SumParam<DeviceType>,
operators::SumKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -61,38 +61,11 @@ endif ()
list(FIND NET "FPGAnets" CON)
if (CON GREATER -1)
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile)
ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet50 paddle-mobile)
ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-EW paddle-mobile)
ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-conv paddle-mobile)
ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-pooling paddle-mobile)
ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-bypass paddle-mobile)
ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-softmax paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-concat paddle-mobile)
ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-tensor-quant paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fpga-concat-op paddle-mobile)
ADD_EXECUTABLE(test-format-data fpga/test_format_data.cpp test_helper.h test_include.h)
target_link_libraries(test-format-data paddle-mobile)
# ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
# target_link_libraries(test-resnet paddle-mobile)
set(FOUND_MATCH ON)
endif ()
......@@ -173,6 +146,14 @@ if (NOT FOUND_MATCH)
target_link_libraries(test-elementwiseadd-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-elementwisesub-op operators/test_elementwise_sub_op.cpp test_helper.h test_include.h)
target_link_libraries(test-elementwisesub-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-im2sequence-op operators/test_im2sequence_op.cpp test_helper.h test_include.h)
target_link_libraries(test-im2sequence-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-concat-op paddle-mobile)
......@@ -212,6 +193,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fc-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-sum-op operators/test_sum_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sum-op paddle-mobile)
# test quantize op
ADD_EXECUTABLE(test-quantize-op operators/test_quantize_op.cpp test_helper.h test_include.h)
target_link_libraries(test-quantize-op paddle-mobile)
......
......@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> {
public:
Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false, int predict_op_count = 1)
bool use_optimize = false)
: Executor<DeviceType>() {
this->use_optimize_ = use_optimize;
this->program_ = p;
......@@ -64,7 +64,7 @@ class Executor4Test : public Executor<DeviceType> {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->Type() == op_type && i < predict_op_count) {
if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
......@@ -74,6 +74,7 @@ class Executor4Test : public Executor<DeviceType> {
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
}
}
}
......
......@@ -30,7 +30,11 @@ int main() {
input_tensor.data<float>() + input_tensor.numel());
paddle_mobile.FeedData(input_tensor);
paddle_mobile.Predict_To(-1);
for (int i = 0; i < 1000; i++) {
paddle_mobile.Predict_To(-1);
if (i % 100 == 0) std::cout << i << std::endl;
}
// paddle_mobile.Predict_From(73);
// paddle_mobile.Predict_From_To(72, 73);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/elementwise_sub_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestElementwiseSubOp {
public:
explicit TestElementwiseSubOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "elementwise_sub" &&
op->Input("X")[0] == "sigmoid_1.tmp_0") {
DLOG << " elementwise_sub attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
std::shared_ptr<operators::ElementwiseSubOp<Dtype, float>> lrn =
std::make_shared<operators::ElementwiseSubOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("tmp_0");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
Variable *x2_feed_value = scope->Var("sigmoid_1.tmp_0");
auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
tensor_x2->ShareDataWith(t2);
Variable *output = scope->Var("tmp_1");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({1, 1, 6, 6});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict_bn(t1, t2, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
void predict_bn(const Tensor &t1, const Tensor &t2, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
template class TestElementwiseSubOp<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run ElementwiseSub Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params");
/// input x1 (1,1,6,6)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {1, 1, 6, 6}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
/// input x2 (1,1,6,6)
paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {1, 1, 6, 6}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>();
paddle_mobile::framework::TestElementwiseSubOp<paddle_mobile::CPU>
testElementwiseSubOp(program);
auto output_op = testElementwiseSubOp.predict_bn(inputx1, inputx2);
auto *output_op_ptr = output_op->data<float>();
auto inputx1_dim = inputx1.numel() / inputx1.dims()[0];
DLOG << " input1 : ";
for (int i = 0; i < inputx1.dims()[0]; ++i) {
for (int j = 0; j < inputx1_dim; ++j) {
DLOGF("%f ", inputx1_ptr[i * inputx1_dim + j]);
}
DLOGF("\n");
}
auto inputx2_dim = inputx2.numel() / inputx2.dims()[0];
DLOG << " input2 : ";
for (int i = 0; i < inputx2.dims()[0]; ++i) {
for (int j = 0; j < inputx2_dim; ++j) {
DLOGF("%f ", inputx2_ptr[i * inputx2_dim + j]);
}
DLOGF("\n");
}
auto output_dim = output_op->numel() / output_op->dims()[0];
DLOG << " output : ";
for (int i = 0; i < output_op->dims()[0]; ++i) {
for (int j = 0; j < output_dim; ++j) {
DLOGF("%f ", output_op_ptr[i * output_dim + j]);
}
DLOGF("\n");
}
return 0;
}
......@@ -12,51 +12,129 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../executor_for_test.h"
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/im2sequence_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_ocr_recg);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
namespace paddle_mobile {
namespace framework {
Executor4Test<paddle_mobile::CPU,
paddle_mobile::operators::ReluOp<paddle_mobile::CPU, float>>
executor(program, "im2sequence");
template <typename Dtype>
class TestIm2SequenceOp {
public:
explicit TestIm2SequenceOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
// 1. input_tensors;
vector<Tensor> input_tensors;
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "im2sequence" &&
op->Input("X")[0] == "conv2d_19.tmp_1") {
DLOG << " im2squence attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
Tensor input1;
auto input1_data = CreateInput<float>(&input1, {2, 2, 3, 3}, -1, 1);
input_tensors.push_back(input1);
std::shared_ptr<operators::Im2SequenceOp<Dtype, float>> lrn =
std::make_shared<operators::Im2SequenceOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
// 2. input_names
vector<string> input_names({
"conv2d_19.tmp_1",
});
std::shared_ptr<Tensor> predict_bn(const Tensor &t1) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("conv2d_19.tmp_1");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
// 3. output_names
vector<string> output_names({"im2sequence_0.tmp_0"});
Variable *output = scope->Var("im2sequence_0.tmp_0");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({2, 12});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
// 4. out_dims;
vector<DDim> out_ddims;
auto out_ddim = paddle_mobile::framework::make_ddim({8, 9});
out_ddims.push_back(out_ddim);
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
predict_bn(t1, 0);
return out_tensor;
}
auto output0_data = output[0]->data<float>();
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
for (int j = 0; j < input_tensors[0].numel(); ++j) {
DLOG << " value of input: " << input1_data[j];
void predict_bn(const Tensor &t1, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
template class TestIm2SequenceOp<CPU>;
} // namespace framework
} // namespace paddle_mobile
for (int j = 0; j < output[0]->numel(); ++j) {
DLOG << " value of output: " << output0_data[j];
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Im2Sequence Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params");
/// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx;
SetupTensor<float>(&inputx, {1, 2, 6, 2}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx_ptr = inputx.data<float>();
paddle_mobile::framework::TestIm2SequenceOp<paddle_mobile::CPU>
testIm2SequenceOp(program);
auto output_op = testIm2SequenceOp.predict_bn(inputx);
auto *output_op_ptr = output_op->data<float>();
auto input_dim = inputx.numel() / inputx.dims()[0];
DLOG << " input : ";
for (int i = 0; i < inputx.dims()[0]; ++i) {
for (int j = 0; j < input_dim; ++j) {
DLOGF("%f ", inputx_ptr[i * input_dim + j]);
}
DLOGF("\n");
}
auto output_dim = output_op->numel() / output_op->dims()[0];
DLOG << " output : ";
for (int i = 0; i < output_op->dims()[0]; ++i) {
for (int j = 0; j < output_dim; ++j) {
DLOGF("%f ", output_op_ptr[i * output_dim + j]);
}
DLOGF("\n");
}
return 0;
}
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdint-gcc.h>
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/mul_op.h"
......@@ -73,12 +74,20 @@ int TestMulOP() {
}
}
int32_t eq = 0;
int32_t neq = 0;
for (int32_t i = 0; i < m * n; ++i) {
PADDLE_MOBILE_ENFORCE(
output_data[i] == c[i], "output[%d] = %d, output_cmp[%d] = %d", i,
static_cast<int32_t>(output_data[i]), i, static_cast<int32_t>(c[i]));
if (static_cast<int>(output_data[i] == c[i])) {
++eq;
} else {
++neq;
}
}
DLOG << "Run MulOp successfully!";
DLOG << "mnk=" << m << " " << n << " " << k << " eq=" << eq
<< " neq=" << neq;
delete op;
return 0;
}
......
......@@ -127,18 +127,25 @@ int main() {
DLOG << "----------**********----------";
DLOG << "begin to run MulticlassNMS Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string("../../test/models/mobilenet+ssd"));
auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (1,3,300,300)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {10, 1917, 4}, static_cast<float>(0),
SetupTensor<float>(&inputx1, {1, 2, 4}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
const float x1[] = {0, 0, 100, 100, 50, 50, 150, 150};
for (int i = 0; i < 8; ++i) {
*(inputx1_ptr + i) = x1[i];
}
paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {10, 21, 1917}, static_cast<float>(0),
SetupTensor<float>(&inputx2, {1, 2, 2}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>();
const float x2[] = {0.4, 0.3, 0.6, 0.7};
for (int i = 0; i < 4; ++i) {
*(inputx2_ptr + i) = x2[i];
}
paddle_mobile::framework::TestMultiClassNMSOp<paddle_mobile::CPU>
testMultiClassNMSOp(program);
......@@ -146,8 +153,26 @@ int main() {
auto output = testMultiClassNMSOp.predict(inputx1, inputx2);
auto *output_ptr = output->data<float>();
for (int i = 0; i < output->numel(); i++) {
for (int i = 0; i < output->numel(); ++i) {
DLOG << output_ptr[i];
}
// test multi point
paddle_mobile::framework::Tensor inputx3;
SetupTensor<float>(&inputx3, {1, 2, 8}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx3_ptr = inputx3.data<float>();
const float x3[] = {0, 0, 100, 0, 100, 100, 0, 100,
50, 50, 150, 50, 150, 150, 50, 150};
for (int i = 0; i < 16; ++i) {
*(inputx3_ptr + i) = x3[i];
}
auto output2 = testMultiClassNMSOp.predict(inputx3, inputx2);
auto *output_ptr2 = output2->data<float>();
for (int i = 0; i < output2->numel(); ++i) {
DLOG << output_ptr2[i];
}
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/sum_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestSumOp {
public:
explicit TestSumOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "sum" && op->Input("X")[0] == "fc_2.tmp_0") {
DLOG << " sum attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
std::shared_ptr<operators::SumOp<Dtype, float>> lrn =
std::make_shared<operators::SumOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(lrn);
}
}
}
}
std::shared_ptr<Tensor> predict_bn(const Tensor &t1, const Tensor &t2) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("fc_2.tmp_0");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
Variable *x2_feed_value = scope->Var("fc_2.tmp_1");
auto tensor_x2 = x2_feed_value->GetMutable<LoDTensor>();
tensor_x2->ShareDataWith(t2);
Variable *output = scope->Var("fc_2.tmp_2");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({2, 96});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict_bn(t1, t2, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
void predict_bn(const Tensor &t1, const Tensor &t2, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
DLOG << "op -> run()";
op->Run();
}
}
};
template class TestSumOp<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Sum Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params");
/// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {2, 96}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
paddle_mobile::framework::Tensor inputx2;
SetupTensor<float>(&inputx2, {2, 96}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx2_ptr = inputx2.data<float>();
paddle_mobile::framework::TestSumOp<paddle_mobile::CPU> testSumOp(program);
auto output_sum = testSumOp.predict_bn(inputx1, inputx2);
auto *output_sum_ptr = output_sum->data<float>();
DLOG << "input1 44: " << inputx1_ptr[44];
DLOG << "input2 44: " << inputx2_ptr[44];
DLOG << "out 44 :" << output_sum_ptr[44];
return 0;
}
......@@ -27,6 +27,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_genet_combine = "../models/enet";
static const char *g_eng = "../models/eng_20conv_1_9_fc";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_googlenetv1_combined = "../models/googlenetv1_combine";
......@@ -51,6 +52,7 @@ static const char *g_test_image_1x3x224x224_banana =
static const char *g_test_image_desktop_1_3_416_416_nchw_float =
"../images/in_put_1_3_416_416_2";
static const char *g_hand = "../images/hand_image";
static const char *g_moto = "../images/moto_300x300_float";
static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
static const char *g_img = "../images/img.bin";
......
......@@ -33,6 +33,7 @@ if (CON GREATER -1)
set(POOL_OP ON)
set(RESHAPE_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADD_OP ON)
set(FOUND_MATCH ON)
......@@ -117,12 +118,9 @@ if (CON GREATER -1)
set(POOL_OP ON)
set(CONCAT_OP ON)
set(SOFTMAX_OP ON)
set(DROPOUT_OP ON)
set(FUSION_CONVBNRELU_OP ON)
set(FUSION_CONVBN_OP ON)
set(FUSION_CONVADD_OP ON)
set(MUL_OP ON)
set(FOUND_MATCH ON)
endif()
......@@ -188,6 +186,8 @@ if(NOT FOUND_MATCH)
set(CONV_OP ON)
set(DEPTHWISECONV_OP ON)
set(ELEMENTWISEADD_OP ON)
set(ELEMENTWISESUB_OP ON)
set(IM2SEQUENCE_OP ON)
set(FUSION_CONVADD_OP ON)
set(FUSION_CONVADDPRELU_OP ON)
set(FUSION_CONVADDRELU_OP ON)
......@@ -220,6 +220,8 @@ if(NOT FOUND_MATCH)
set(SPLIT_OP ON)
set(FLATTEN_OP ON)
set(SHAPE_OP ON)
set(ELEMENTWISEMUL_OP ON)
set(SUM_OP ON)
endif()
# option(BATCHNORM_OP "" ON)
......@@ -261,6 +263,9 @@ endif()
if (ELEMENTWISEADD_OP)
add_definitions(-DELEMENTWISEADD_OP)
endif()
if (ELEMENTWISESUB_OP)
add_definitions(-DELEMENTWISESUB_OP)
endif()
if (FUSION_CONVADD_OP)
add_definitions(-DFUSION_CONVADD_OP)
endif()
......@@ -388,3 +393,11 @@ endif()
if (SHAPE_OP)
add_definitions(-DSHAPE_OP)
endif()
if (ELEMENTWISEMUL_OP)
add_definitions(-DELEMENTWISEMUL_OP)
endif()
if (SUM_OP)
add_definitions(-DSUM_OP)
endif()
......@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | \
grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v ".pb-c.h" | grep -v ".pb-c.c" | \
grep -v "protobuf-c.h" | grep -v "protobuf-c.c"); do
grep -v "protobuf-c.h" | grep -v "protobuf-c.c" | grep -v "variant.h"); do
cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
......
......@@ -45,13 +45,13 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
print '------------------'
print bgrs_float_array[0]
print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2]
print bgrs_float_array[224 * 224 * 2 + 224 * 2 + 2]
# for i in range(0, 9):
# print'bs %d' % i
# print bs[i] / 255.
print bs[416 * 2 + 2] / 255.
print bs[224 * 2 + 2] / 255.
print '--------------combine_bgrs_nchw-----------------end'
return bgrs_float_array
......@@ -64,6 +64,6 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
# cv2.waitKey(0)
bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3))
bgrs = tools.resize_take_rgbs('datas/jpgs/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg', (224, 224, 3))
array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB)
tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array)
tools.save_to_file('datas/desktop_1_3_224_224_nchw_float', array)
......@@ -15,11 +15,11 @@ from array import array
# image.resize(shape_h_w)
data = np.fromfile('datas/img.res')
data = np.fromfile('/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/imagetools/datas/jpgs2/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg.input.npfile','f')
print data.size
print data[0]
print data
data.reshape(1, 3, 416, 416)
data.reshape(1, 3, 224, 224)
out_array = array('f')
print'--------------------'
print data.size
......@@ -27,12 +27,12 @@ print data[0]
print '如果是nhwc --------'
# rgb rgb rgb rgb rgb
print data[416 * 3 * 2 + 3 * 2 + 2]
print data[224 * 3 * 2 + 3 * 2 + 2]
# print data[2]
print '如果是nchw --------'
# rgb rgb rgb rgb rgb
print data[416 * 416 * 2 + 416 * 2 + 2]
print data[224 * 224 * 2 + 224 * 2 + 2]
# print data[2]
# 明明是nchw
......@@ -42,6 +42,8 @@ for i in range(0, data.size):
print len(out_array)
print out_array[416 * 416 * 2 + 416 * 2 + 2]
print out_array[224 * 224 * 2 + 224 * 2 + 2]
# print out_array
tools.save_to_file('datas/in_put_1_3_416_416_2', out_array)
tools.save_to_file('datas/in_put_1_3_224_224_nchw', out_array)
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
/yolo/datas/
/mobilenet/datas/
......@@ -5,22 +5,28 @@ layer_mdl_conv = 'ConvolutionLayer'
layer_mdl_deepwise_conv = 'DepthwiseConvolutionLayer'
layer_mdl_relu = 'ReluLayer'
layer_mdl_pointwise_add = 'PointwiseConvolutionLayer'
layer_mdl_pooling = 'PoolingLayer'
layer_mdl_softmax = 'SoftmaxLayer'
# fluid ops
op_fluid_fusion_conv_add = 'fusion_conv_add'
op_fluid_relu = 'relu'
op_fluid_pooling = 'pool2d'
op_fluid_softmax = 'softmax'
# dict mdk layer --- fluid op
mdl2fluid_op_layer_dict = {
layer_mdl_conv: op_fluid_fusion_conv_add,
layer_mdl_deepwise_conv: op_fluid_fusion_conv_add,
layer_mdl_relu: op_fluid_relu,
layer_mdl_pointwise_add: op_fluid_fusion_conv_add
layer_mdl_pointwise_add: op_fluid_fusion_conv_add,
layer_mdl_pooling: op_fluid_pooling,
layer_mdl_softmax: op_fluid_softmax
}
mdl_outputs_key = "outputs"
mdl_inputs_key = "inputs"
mdl_weight_key = "weights"
mdl_weight_key = "weight"
mdl_attrs_key = "params"
# dict of mdl-input _out param to fluid input out attrs
......@@ -39,13 +45,30 @@ fusion_conv_add_dict = {
relu_dict = {
mdl_inputs_key: 'X',
mdl_outputs_key: 'Out',
mdl_weight_key: ()
# mdl_weight_key: ()
}
pool2d_dict = {
mdl_inputs_key: 'X',
mdl_outputs_key: 'Out',
# mdl_weight_key: (),
mdl_attrs_key: ('pooling_type', 'global_pooling')
}
softmax_dict = {
mdl_inputs_key: 'X',
mdl_outputs_key: 'Out',
mdl_weight_key: (),
mdl_attrs_key: ()
}
# mdl layers --- fluid ops
op_io_dict = {
'fusion_conv_add': fusion_conv_add_dict,
'relu': relu_dict
'relu': relu_dict,
'pool2d': pool2d_dict,
'softmax': softmax_dict
}
# fluid attr key --- mdl params key
......@@ -54,70 +77,17 @@ fusion_conv_add_attrs_dict = {
'strides': 'stride',
'groups': 'group'
}
# fluid attr key --- mdl params key
pool2d_attrs_dict = {
'global_pooling': 'global_pooling',
'pooling_type': 'type'
}
# fluid attr key --- mdl params key
fluid_attrs_type_dict = {
'paddings': 0,
'strides': 6,
'groups': 6
}
# '': "bias_term", 是不是要add 目前 yolo的模型都是 bias_term = 1
# attrs {
# name: "axis"
# type: INT
# i: 1
# }
# attrs_name = {
# 'name': "workspace_size_MB",
# 'type': 'INT',
# 'i': '4096'
# }
# attrs
# {
# name: "data_format"
# type: STRING
# s: "AnyLayout"
# }
# attrs
# {
# name: "use_mkldnn"
# type: BOOLEAN
# b: false
# }
# attrs
# {
# name: "use_cudnn"
# type: BOOLEAN
# b: true
# }
# attrs
# {
# name: "dilations"
# type: INTS
# ints: 1
# ints: 1
# }
# attrs
# {
# name: "groups"
# type: INT
# i: 1
# }
# attrs
# {
# name: "paddings"
# type: INTS
# ints: 0
# ints: 0
# }
# attrs
# {
# name: "strides"
# type: INTS
# ints: 1
# ints: 1
# }
此差异已折叠。
import datetime
import json
import os
import google.protobuf as pbg
import framework_pb2 as framework_pb2
def loadmdl(json_path):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册