提交 8d3d2e58 编写于 作者: C chonwhite

fixed merge conflicts

......@@ -18,7 +18,7 @@
* 测试机器(android ndk ndk-r17c)
* 骁龙855
* xiaomi mi9, snapdragon 855
* xiaomi mi9, snapdragon 855 (enable sdot instruction)
* 4xA76(1@2.84GHz + 3@2.4GHz) + 4xA55@1.78GHz
* 骁龙845
......@@ -33,7 +33,7 @@
* HUAWEI Mate10
* 测试说明
* branch: release/v2.3.0
* branch: release/v2.6.0
* warmup=10, repeats=30,统计平均时间,单位是ms
* 当线程数为1时,```DeviceInfo::Global().SetRunMode```设置LITE_POWER_HIGH,否者设置LITE_POWER_NO_BIND
* 模型的输入图像的维度是{1, 3, 224, 224},输入图像的每一位数值是1
......@@ -48,75 +48,75 @@
骁龙855|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4
mobilenet_v1 |33.27 |19.52 |11.14 |31.72 |18.76 |10.24 |
mobilenet_v2 |29.08 |15.79 |9.25 |25.89 |14.17 |8.38 |
shufflenet_v2 |4.40 |3.09 |2.30 |4.28 |3.02 |2.35 |
squeezenet_v1.1 |19.96 |12.61 |8.76 |18.25 |11.46 |7.97 |
mnasnet |21.00 |12.54 |7.28 |19.65 |11.65 |6.96 |
mobilenet_v1 |35.11 |20.67 |11.83 |30.56 |18.59 |10.44 |
mobilenet_v2 |26.36 |15.83 |9.29 |21.64 |13.25 |7.95 |
shufflenet_v2 |4.56 |3.14 |2.35 |4.07 |2.89 |2.28 |
squeezenet_v1.1 |21.27 |13.55 |8.49 |18.05 |11.51 |7.83 |
mnasnet |21.40 |13.18 |7.63 |18.84 |11.40 |6.80 |
骁龙845|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4
mobilenet_v1 |66.36 |35.97 |19.45 |62.66 |33.87 |17.85 |
mobilenet_v2 |45.86 |25.53 |14.6 |41.58 |23.24 |13.39 |
shufflenet_v2 |7.58 |4.89 |3.41 |7.44 |4.91 |3.58 |
squeezenet_v1.1 |37.15 |22.74 |13.51 |34.69 |21.27 |12.74 |
mnasnet |40.09 |21.73 |11.91 |38.19 |21.02 |12.11 |
mobilenet_v1 |65.56 |37.17 |19.65 |63.23 |32.98 |17.68 |
mobilenet_v2 |45.89 |25.20 |14.39 |41.03 |22.94 |12.98 |
shufflenet_v2 |7.31 |4.66 |3.27 |7.08 |4.71 |3.41 |
squeezenet_v1.1 |36.98 |22.53 |13.45 |34.27 |20.96 |12.60 |
mnasnet |39.85 |23.64 |12.25 |37.81 |20.70 |11.81 |
骁龙835|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4
mobilenet_v1 |96.98 |53.92 |32.24 |89.31 |48.02 |27.58 |
mobilenet_v2 |67.72 |37.66 |23.82 |60.10 |34.36 |21.05 |
shufflenet_v2 |10.72 |6.62 |4.63 |10.10 |6.44 |4.63 |
squeezenet_v1.1 |53.89 |33.28 |20.73 |50.83 |32.31 |19.51 |
mnasnet |59.55 |33.53 |20.32 |56.21 |31.58 |19.06 |
mobilenet_v1 |92.77 |51.56 |30.14 |87.46 |48.02 |26.42 |
mobilenet_v2 |65.78 |36.52 |22.34 |58.31 |33.04 |19.87 |
shufflenet_v2 |10.39 |6.26 |4.46 |9.72 |6.19 |4.41 |
squeezenet_v1.1 |53.59 |33.16 |20.13 |51.56 |31.81 |19.10 |
mnasnet |57.44 |32.62 |19.47 |54.99 |30.69 |17.98 |
#### caffe model
骁龙855|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |33.36 |19.45 |11.26 |31.63 |18.74 |10.31 |
mobilenet_v2 |31.63 |19.21 |11.61 |28.34 |17.14 |10.16 |
shufflenet_v2 |4.46 |3.08 |2.32 |4.26 |2.98 |2.35 |
mobilenet_v1 |32.38 |18.65 |10.69 |30.75 |18.11 |9.88 |
mobilenet_v2 |29.45 |17.86 |10.81 |26.61 |16.26 |9.67 |
shufflenet_v2 |5.04 |3.14 |2.20 |4.09 |2.85 |2.25 |
骁龙845|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |66.32 |35.83 |19.56 |62.52 |33.79 |17.91 |
mobilenet_v2 |58.46 |32.69 |18.56 |53.72 |29.86 |16.80 |
shufflenet_v2 |7.65 |4.82 |3.46 |7.55 |4.97 |3.62 |
mobilenet_v1 |65.26 |35.19 |19.11 |61.42 |33.15 |17.48 |
mobilenet_v2 |55.59 |31.31 |17.68 |51.54 |29.69 |16.00 |
shufflenet_v2 |7.42 |4.73 |3.33 |7.18 |4.75 |3.39 |
骁龙835|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |95.38 |54.09 |32.03 |95.05 |48.33 |27.54 |
mobilenet_v2 |88.46 |48.98 |30.23 |79.28 |44.64 |27.10 |
shufflenet_v2 |10.07 |6.51 |4.61 |10.31 |6.50 |4.66 |
mobilenet_v1 |95.38 |52.16 |30.37 |92.10 |46.71 |26.31 |
mobilenet_v2 |82.89 |45.49 |28.14 |74.91 |41.88 |25.25 |
shufflenet_v2 |10.25 |6.36 |4.42 |9.68 |6.20 |4.42 |
#### int8量化模型测试数据
骁龙855|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |36.80 |21.58 |11.12 | 14.01 |8.13 |4.32 |
mobilenet_v2 |28.72 |19.08 |12.49 | 17.24 |11.55 |7.82 |
mobilenet_v1 |37.18 |21.71 |11.16 | 14.41 |8.34 |4.37 |
mobilenet_v2 |27.95 |16.57 |8.97 | 13.68 |8.16 |4.67 |
骁龙835|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |60.76 |32.25 |16.66 |56.57 |29.84 |15.24 |
mobilenet_v2 |49.38 |31.10 |22.07 |47.52 |28.18 |19.24 |
mobilenet_v1 |61.63 |32.60 |16.49 |57.36 |29.74 |15.50 |
mobilenet_v2 |47.13 |25.62 |13.56 |41.87 |22.42 |11.72 |
麒麟970|armv7 | armv7 | armv7 |armv8 | armv8 |armv8
----| ---- | ---- | ---- | ---- |---- |----
threads num|1 |2 |4 |1 |2 |4 |
mobilenet_v1 |65.95 |34.39 |18.68 |60.86 |30.98 |16.31 |
mobilenet_v2 |68.87 |39.39 |24.43 |65.57 |37.31 |20.87 |
mobilenet_v1 |63.13 |32.63 |16.85 |58.92 |29.96 |15.42 |
mobilenet_v2 |48.60 |25.43 |13.76 |43.06 |22.10 |12.09 |
......@@ -32,14 +32,26 @@ tar zxf mobilenet_v1.tar.gz
![image](https://paddlelite-data.bj.bcebos.com/doc_images/cxx_demo/3inference_model.png)
(2)下载[opt工具](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/opt)。放入同一文件夹,终端输入命令转化模型:
(2)模型转换
```shell
wget https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/opt
chmod +x opt
./opt --model_dir=./mobilenet_v1 --optimize_out_type=naive_buffer --optimize_out=./mobilenet_v1_opt
```
- v2.6.0版本之前
下载[opt工具](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/opt)。放入同一文件夹,终端输入命令转化模型
```shell
wget https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/opt
chmod +x opt
./opt --model_dir=./mobilenet_v1 --optimize_out_type=naive_buffer --optimize_out=./mobilenet_v1_opt
```
- v2.6.0版本以及后续版本
安装paddlelite,终端输入命令转化模型
```shell
python -m pip install paddlelite
paddle_opt_lite --model_dir=./mobilenet_v1 --optimize_out_type=naive_buffer --optimize_out=./mobilenet_v1_opt
```
**结果如下图所示:**
![image](https://paddlelite-data.bj.bcebos.com/doc_images/cxx_demo/2opt_model.png)
......
# Python Demo
## 1. 下载最新版本python预测库
```shell
python -m pip install paddlelite
```
## 2. 转化模型
PaddlePaddle的原生模型需要经过[opt]()工具转化为Paddle-Lite可以支持的naive_buffer格式。
`mobilenet_v1`模型为例:
(1)下载[mobilenet_v1模型](http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz)后解压:
```shell
wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
tar zxf mobilenet_v1.tar.gz
```
(2)使用opt工具:
从磁盘加载模型时,根据模型和参数文件存储方式不同,加载模型和参数的路径有两种形式。
- Linux环境
- 非combined形式:模型文件夹model_dir下存在一个模型文件和多个参数文件时,传入模型文件夹路径,模型文件名默认为__model__。
```shell
paddle_lite_opt --model_dir=./mobilenet_v1 \
--optimize_out=mobilenet_v1_opt \
--optimize_out_type=naive_buffer \
--valid_targets=x86
```
- combined形式:模型文件夹model_dir下只有一个模型文件__model__和一个参数文件__params__时,传入模型文件和参数文件路径
```shell
paddle_lite_opt --model_file=./mobilenet_v1/__model__ \
--param_file=./mobilenet_v1/__params__ \
--optimize_out=mobilenet_v1_opt \
--optimize_out_type=naive_buffer \
--valid_targets=x86
```
- windows环境
windows 暂不支持命令行方式直接运行模型转换器,需要编写python脚本
```python
import paddlelite.lite as lite
a=lite.Opt()
# 非combined形式
a.set_model_dir("D:\\YOU_MODEL_PATH\\mobilenet_v1")
# conmbined形式
# a.set_model_file("D:\\YOU_MODEL_PATH\\mobilenet_v1\\__model__")
# a.set_param_file("D:\\YOU_MODEL_PATH\\mobilenet_v1\\__params__")
a.set_optimize_out("mobilenet_v1_opt")
a.set_valid_places("x86")
a.run()
```
- MAC 环境
Opt工具使用方式同Linux(MAC环境暂不支持python端预测,下个版本会修复该问题)
## 3. 编写预测程序
准备好预测库和模型,我们便可以编写程序来执行预测。我们提供涵盖图像分类、目标检测等多种应用场景的C++示例demo可供参考,创建文件mobilenetV1_light_api.py,
python demo 完整代码位于 [demo/python](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/lite/demo/python/mobilenetv1_light_api.py)
(1) 设置config信息
```python
from paddlelite.lite import *
config = MobileConfig()
config.set_model_from_file(/YOU_MODEL_PATH/mobilenet_v1_opt.nb)
```
(2) 创建predictor
```python
predictor = create_paddle_predictor(config)
```
(3) 设置输入数据
```python
input_tensor = predictor.get_input(0)
input_tensor.resize([1, 3, 224, 224])
input_tensor.set_float_data([1.] * 3 * 224 * 224)
```
(4) 执行预测
```python
predictor.run()
```
(5) 得到输出数据
```python
output_tensor = predictor.get_output(0)
print(output_tensor.shape())
print(output_tensor.float_data()[:10])
```
## 4. 运行文件
```shell
python mobilenetV1_light_api.py
```
......@@ -4,8 +4,6 @@
Paddle-Lite 支持在Docker或Linux环境编译x86预测库。环境搭建参考[环境准备](../user_guides/source_compile)
(注意:非docker Linux环境需要是Ubuntu16.04)
### 编译
1、 下载代码
......@@ -20,10 +18,11 @@ git checkout release/v2.6.0
```bash
cd Paddle-Lite
./lite/tools/build.sh x86
./lite/tools/build.sh --build_python=ON x86
# 其他可选择编译选项
# --with_log=OFF 关闭LOG信息输出
# --build_python=OFF 编译python预测库
```
### 编译结果说明
......@@ -53,8 +52,17 @@ x86编译结果位于 `build.lite.x86/inference_lite_lib`
- `mobilenetv1_full` :使用full_api 执行mobilenet_v1预测的C++ demo
- `mobilenetv1_light` :使用light_api 执行mobilenet_v1预测的C++ demo
5、 `demo/python`文件夹:x86预测库的Python 示例demo
- `mobilenetv1_full_api.py` :使用full_api 执行mobilenet_v1预测的Python demo
- `mobilenetv1_light_api.py` :使用light_api 执行mobilenet_v1预测的Python demo
6、 `python`文件夹:包含python的库文件和对应的.whl包
- `install`文件夹:编译成功的.whl包位于`install/dist/*.whl`
- `lib`文件夹:.whl包依赖的库文件
**(若不需要编译python预测库,则将编译命令替换为`./lite/tools/build.sh x86`)**
### x86预测API使用示例
......@@ -64,7 +72,8 @@ x86编译结果位于 `build.lite.x86/inference_lite_lib`
mobilenetv1_full/
|-- CMakeLists.txt
|-- build.sh
`-- mobilenet_full_api.cc
|-- build.bat
-- mobilenet_full_api.cc
```
本demo使用cmake构建`CMakeLists.txt`为cmake脚本,`mobilenet_full_api.cc`是x86示例的源代码、`build.sh`为编译的脚本。
......@@ -168,8 +177,8 @@ int main(int argc, char** argv) {
#### 编译环境需求
- Windows 10 专业版
- 目前Windows暂不支持GPU模式
- *Python 版本 2.7/3.5.1+/3.6/3.7 (64 bit)*
- 目前Windows暂不支持GPU编译
- *Python 版本 2.7/3.5.1+ (64 bit)*
- *pip 或 pip3 版本 9.0.1+ (64 bit)*
- *Visual Studio 2015 Update3*
......@@ -187,15 +196,15 @@ int main(int argc, char** argv) {
```bash
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
# 切换到release分支
git checkout release/v2.3
git checkout release/v2.6.0
```
2、 源码编译
2、 源码编译(需要按照提示输入对应的参数)
```bash
```dos
cd Paddle-Lite
lite/tools/build_windows.bat with_extra with_python with_profile
lite\tools\build_windows.bat with_extra with_python with_profile
```
编译脚本`lite/tools/build.bat`,追加参数说明:
编译脚本`build_windows.bat`,追加参数说明:
| 参数 | 介绍 | 值 |
|-----------|-------------|-------------|
......@@ -203,40 +212,62 @@ lite/tools/build_windows.bat with_extra with_python with_profile
| with_python | 可选,是否编译python预测库(默认为OFF) 。 | `ON``OFF` |
| with_profile | 可选,是否支持分析器模式(默认为OFF) 。 | `ON``OFF` |
### 编译结果
### 编译结果说明
x86编译结果位于 `build.lite.x86/inference_lite_lib`
**具体内容**说明:
1、 `bin`文件夹:可执行工具文件 `test_model_bin`
2、 `cxx`文件夹:包含c++的库文件与相应的头文件
1、 `cxx`文件夹:包含c++的库文件与相应的头文件
- `include` : 头文件
- `lib` : 库文件
- 打包的静态库文件:
- 静态库文件:
- `libpaddle_api_full_bundled.lib` :full_api 静态库
- `libpaddle_api_light_bundled.lib` :light_api 静态库
3、 `third_party` 文件夹:第三方库文件
2、 `third_party` 文件夹:依赖的第三方预测库mklml
- mklml : Paddle-Lite预测库依赖的mklml数学库
3、 `demo/cxx`文件夹:x86预测库的C++ 示例demo
- `mobilenetv1_full` :使用full_api 执行mobilenet_v1预测的C++ demo
- `mobilenetv1_light` :使用light_api 执行mobilenet_v1预测的C++ demo
4、 `demo/python`: x86预测库的Python示例demo
- `mobilenetv1_full_api.py`:使用full_api 执行mobilenet_v1预测的Python demo
- `mobilenetv1_light_api.py`:使用full_api 执行mobilenet_v1预测的Python demo
5、 `python`文件夹:包含python的库文件和对应的.whl包
- `install`文件夹:编译成功的.whl包位于`install/dist/*.whl`
- `lib`文件夹:.whl包依赖的库文件
### x86预测API使用示例
1、我们提供Windows环境下x86 API运行mobilenet_v1的示例:[mobilenet_full_x86demo](https://paddlelite-data.bj.bcebos.com/x86/mobilenet_full_x86demo.zip)。下载解压后内容如下>:
1、`mobilenetv1_full`目录结构
![](https://paddlelite-data.bj.bcebos.com/x86/x86-doc/demo.png)
```bash
mobilenetv1_full/
|-- CMakeLists.txt
|-- build.sh
|-- build.bat
`-- mobilenet_full_api.cc
```
`mobilenet_v1`为模型文件、`lib``include`分别是Paddle-Lite的预测库和头文件、`third_party`下是编译时依赖的第三方库`mklml``mobilenet_full_api.cc`是x86示例的源代码、`build.bat`为编译的脚本。
本demo使用cmake构建`CMakeLists.txt`为cmake脚本,`mobilenet_full_api.cc`是x86示例的源代码、`build.sh`为Linux x86编译的脚本,`build.bat`为windows x86编译脚本。
2、demo内容与使用方法
2、demo使用方法
``` bash
# 1、编译(需在vs2015的命令窗口执行该脚本)
# 1、编译
cd mobilenetv1_full
build.bat
cd build
```
编译结果为当前目录下的 `Release\\mobilenet_full_api.exe`
``` bash
编译结果为当前目录下的 `Release\mobilenet_full_api.exe `
``` dos
# 2、执行预测
Release\\mobilenet_full_api.exe ..\mobilenet_v1
Release\mobilenet_full_api.exe mobilenet_v1
```
`mobilenet_v1`为模型路径,`mobilenet_full_api.exe`为第一步编译出的可执行文件
下载并解压模型[`mobilenet_v1`](http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz)到当前`build`目录,执行以上命令进行预测
......@@ -47,6 +47,7 @@ Welcome to Paddle-Lite's documentation!
demo_guides/cpp_demo
demo_guides/java_demo
demo_guides/python_demo
demo_guides/android_app_demo
demo_guides/ios_app_demo
demo_guides/x86
......
# `build_extra`参数说明:
# `with_extra`参数说明:
Lite预测库分为**基础预测库****全量预测库(with_extra)**:基础预测库只包含基础CV算子(OP),体积较小;全量预测库包含所有Lite算子,体积较大,支持模型较多。
......
......@@ -3,49 +3,48 @@
## 编译版本介绍
- ARM_Version=`armv7/armv8` arm版本,可选择armv7或者armv8
- arch=`armv7/armv8` arm版本,可选择armv7或者armv8
- arm_os=`android\ios\armlinux` 安装平台,支持的arm端移动平台包括 `ios``armlinux``android`
- toolchain=`gcc/clang` 源码编译时的编译器,默认为`gcc`编译器
- android_stl=`c++_static/c++_shared` Lite预测库链接STL库的方式,支持静态或动态链接
- with_extra=`ON/OFF` 是否编译全量OP,OFF时只编译CV相关基础OP,[参数详情](library)
- with_cv=`ON/OFF` 是否编译编译Paddle-Lite CV 相关API
- arm_os=`android\ios\ios64\armlinux` 安装平台,支持的arm端移动平台包括 `ios\ios64``armlinux``android`
- arm_lang=`gcc/clang` 源码编译时的编译器,默认为`gcc`编译器
## Android(toolchain=gcc)
- arm_stl=`c++_static/c++_shared` Lite预测库链接STL库的方式,支持静态或动态链接
- build_extra=`ON/OFF` 是否编译全量OP,OFF时只编译CV相关基础OP,[参数详情](library)
- `tiny_publish/full_publish` 编译模式,`tiny_publish`编译移动端部署库、`full_publish`编译部署库的同时编译第三方依赖库
## Android
|ARM Version|build_extra|arm_stl|target|下载|
| Arch |with_extra|arm_stl|with_cv|下载|
|:-------:|:-----:|:-----:|:-----:|:-------:|
|armv7|OFF|c++_static|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_static.tiny_publish.tar.gz)|
|armv7|OFF|c++_static|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_static.full_publish.tar.gz)|
|armv7|OFF|c++_shared|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_shared.tiny_publish.tar.gz)|
|armv7|OFF|c++_shared|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_shared.full_publish.tar.gz)|
|armv7|ON|c++_static|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.tiny_publish.tar.gz)|
|armv7|ON|c++_static|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.full_publish.tar.gz)|
|armv7|ON|c++_shared|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.tiny_publish.tar.gz)|
|armv7|ON|c++_shared|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.full_publish.tar.gz)|
|armv8|OFF|c++_static|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_static.tiny_publish.tar.gz)|
|armv8|OFF|c++_static|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_static.full_publish.tar.gz)|
|armv8|OFF|c++_shared|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_shared.tiny_publish.tar.gz)|
|armv8|OFF|c++_shared|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_shared.full_publish.tar.gz)|
|armv8|ON|c++_static|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.tiny_publish.tar.gz)|
|armv8|ON|c++_static|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.full_publish.tar.gz)|
|armv8|ON|c++_shared|tiny_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.tiny_publish.tar.gz)|
|armv8|ON|c++_shared|full_publish|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.full_publish.tar.gz)|
|armv7|OFF|c++_shared|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_shared.tiny_publish.tar.gz)|
|armv7|OFF|c++_shared|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_shared.with_cv.tiny_publish.tar.gz)|
|armv7|ON|c++_shared|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.tiny_publish.tar.gz)|
|armv7|ON|c++_shared|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.with_cv.tiny_publish.tar.gz)|
|armv7|OFF|c++_static|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_static.tiny_publish.tar.gz)|
|armv7|OFF|c++_static|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_cv.tiny_publish.tar.gz)|
|armv7|ON|c++_static|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.tiny_publish.tar.gz)|
|armv7|ON|c++_static|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.with_cv.tiny_publish.tar.gz)|
|armv8|OFF|c++_shared|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_shared.tiny_publish.tar.gz)|
|armv8|OFF|c++_shared|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_shared.with_cv.tiny_publish.tar.gz)|
|armv8|ON|c++_shared|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.tiny_publish.tar.gz)|
|armv8|ON|c++_shared|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tiny_publish.tar.gz)|
|armv8|OFF|c++_static|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_static.tiny_publish.tar.gz)|
|armv8|OFF|c++_static|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_cv.tiny_publish.tar.gz)|
|armv8|ON|c++_static|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.tiny_publish.tar.gz)|
|armv8|ON|c++_static|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv.tiny_publish.tar.gz)|
## iOS
|ARM Version|arm_os|with_extra|下载|
|ARM Version|with_extra|with_cv|下载|
|:-------:|:-----:|:-----:|:-----:|
|armv7|ios|OFF|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.ios.armv7.tar.gz)|
|armv7|ios|ON|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.ios.armv7.with_extra.tar.gz)|
|armv8|ios64|OFF|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.ios64.armv8.tar.gz)|
|armv8|ios64|ON|[release/v2.3](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.3.0/inference_lite_lib.ios64.armv8.with_extra.tar.gz)|
|armv7|OFF|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv7.tiny_publish.tar.gz)|
|armv7|OFF|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv7.with_cv.tiny_publish.tar.gz)|
|armv7|ON|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz)|
|armv7|ON|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv7.with_extra.tiny_publish.tar.gz)|
|armv8|OFF|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv8.tiny_publish.tar.gz)|
|armv8|OFF|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv8.with_cv.tiny_publish.tar.gz)|
|armv8|ON|OFF|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)|
|armv8|ON|ON|[release/v2.6](https://paddlelite-data.bj.bcebos.com/Release/2.6.0/iOS/inference_lite_lib.ios.armv8.with_extra.tiny_publish.tar.gz)|
## opt 工具
......@@ -55,7 +54,13 @@
| Linux | [release/v2.3](https://paddlelite-data.bj.bcebos.com/model_optimize_tool/opt) |
| MacOs | [release/v2.3](https://paddlelite-data.bj.bcebos.com/model_optimize_tool/opt_mac) |
## 安装Paddle-Lite python 库方法
- 支持平台: windows10、Ubuntu、Mac
- python version: 2.7、3.5、3.6、 3.7
```
pip install paddlelite
```
## 对应源码编译方法
......
......@@ -10,11 +10,12 @@ PaddleLite 提供了移动端的一键源码编译脚本 `lite/tools/build.sh`
## 一、环境准备
目前支持种编译的环境:
目前支持种编译的环境:
1. Docker 容器环境,
2. Linux(推荐 Ubuntu 16.04)环境,
3. Mac OS 环境。
3. Mac OS 环境,
4. [Windows 环境](../demo_guides/x86.html#windows)
### 1、 Docker开发环境
......
......@@ -224,11 +224,11 @@ if (LITE_WITH_X86)
add_dependencies(publish_inference publish_inference_x86_cxx_lib)
add_custom_target(publish_inference_x86_cxx_demos ${TARGET}
COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_BINARY_DIR}/third_party/install" "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_BINARY_DIR}/third_party/eigen3" "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/third_party/mklml"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_BINARY_DIR}/third_party/install/mklml" "${INFER_LITE_PUBLISH_ROOT}/third_party/mklml"
COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/lite/demo/cxx" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/lite/demo/cxx/x86_mobilenetv1_light_demo" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobilenetv1_light"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/lite/demo/cxx/x86_mobilenetv1_full_demo" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobilenetv1_full"
)
add_dependencies(publish_inference_x86_cxx_lib publish_inference_x86_cxx_demos)
add_dependencies(publish_inference_x86_cxx_demos paddle_api_full_bundled eigen3)
......@@ -327,7 +327,6 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
add_dependencies(publish_inference tiny_publish_cxx_lib)
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
add_custom_command(TARGET tiny_publish_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "-s" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/libpaddle_api_light_bundled.a
COMMAND ${CMAKE_STRIP} "-s" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/libpaddle_light_api_shared.so)
endif()
endif()
......
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR (NOT LITE_WITH_LOG))
lite_cc_library(place SRCS paddle_place.cc DEPS logging)
else()
......@@ -282,17 +281,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
--model_dir=${LITE_MODEL_DIR}/resnet50 SERIAL)
add_dependencies(test_resnet50 extern_lite_download_resnet50_tar_gz)
lite_cc_test(test_ssd_fpga SRCS test_ssd_fpga.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
lite_cc_test(test_ssd_fpga SRCS test_ssd_fpga.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
lite_cc_test(test_inceptionv3_fpga SRCS inceptionv3_test_fpga.cc
lite_cc_test(test_resnet50_fpga SRCS resnet50_test_fpga.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
......@@ -304,10 +293,6 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
--model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL)
add_dependencies(test_inceptionv4 extern_lite_download_inception_v4_simple_tar_gz)
lite_cc_test(test_ocr_attention_fpga SRCS ocr_attention_test_fpga.cc
DEPS ${lite_model_test_DEPS})
# brief: we comment ocr_test_ut because we do not supply ocr model to test, it is the reference to infer nlp model
# lite_cc_test(test_ocr_attention SRCS ocr_attention_test.cc
# DEPS ${lite_model_test_DEPS})
......
......@@ -91,6 +91,8 @@ void OutputOptModel(const std::string& save_optimized_model_dir) {
}
std::vector<Place> vaild_places = {
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt32)},
Place{TARGET(kARM), PRECISION(kInt64)},
};
config.set_valid_places(vaild_places);
auto predictor = lite_api::CreatePaddlePredictor(config);
......
......@@ -7,20 +7,8 @@ if(WIN32)
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(lite_pybind ${os_dependency_modules})
elseif(APPLE)
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
set(LINK_MAP_FILE "${PADDLE_SOURCE_DIR}/lite/core/exported_symbols.lds")
set(LINK_FLAGS "-Wl,-exported_symbols_list, ${LINK_MAP_FILE}")
add_custom_command(OUTPUT ${LINK_MAP_FILE} COMMAND ...)
set_target_properties(lite_pybind PROPERTIES LINK_FLAGS ${LINK_FLAGS})
add_dependencies(lite_pybind custom_linker_map)
else()
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
set(LINK_MAP_FILE "${PADDLE_SOURCE_DIR}/lite/core/lite.map")
set(LINK_FLAGS "-Wl,--version-script ${LINK_MAP_FILE}")
add_custom_command(OUTPUT ${LINK_MAP_FILE} COMMAND ...)
set_target_properties(lite_pybind PROPERTIES LINK_FLAGS ${LINK_FLAGS})
add_dependencies(lite_pybind custom_linker_map)
endif(WIN32)
if (LITE_ON_TINY_PUBLISH)
......
......@@ -47,7 +47,7 @@ if '${WITH_MKL}' == 'ON':
PACKAGE_DATA['paddlelite.libs'] += ['libmklml.dylib', 'libiomp5.dylib']
# link lite.so to paddlelite.libs
COMMAND = "install_name_tool -id \"@loader_path/libs/\" ${PADDLE_BINARY_DIR}\
COMMAND = "install_name_tool -add_rpath \"@loader_path/libs/\" ${PADDLE_BINARY_DIR}\
/inference_lite_lib/python/install/lite/lite.so"
if os.system(COMMAND) != 0:
raise Exception("patch third_party libs failed, command: %s" % COMMAND)
......
......@@ -30,7 +30,8 @@ void ConvElementwiseFuser::BuildPattern() {
auto* bias = VarNode("bias")
->assert_is_op_input("elementwise_add", "Y")
->AsInput()
->assert_is_persistable_var();
->assert_is_persistable_var()
->assert_only_one_output();
// create op nodes
auto* conv2d = OpNode("conv2d", conv_type_)->assert_is_op(conv_type_);
......
......@@ -225,8 +225,8 @@ void DequantOpFuser::InsertNewNode(SSAGraph* graph,
#ifndef LITE_WITH_FPGA
op_desc.SetAttr("enable_int8", true);
#endif
op_desc.SetAttr("weight_scale", weight_scale);
// change the weight from the float type to int8 type.
......
......@@ -105,6 +105,7 @@ class ChannelWiseDequantOpFuser : public FuseBase {
*/
class DeleteQuantDequantOpFuser : public FuseBase {
public:
void BuildPattern() override;
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;
......@@ -118,6 +119,7 @@ class DynamicQuantDequantOpFuser : public FuseBase {
const std::string& op_type,
int i)
: op_type_(op_type), quant_type_(quantized_op_type), times_(i) {}
void BuildPattern() override;
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override;
......
......@@ -364,6 +364,11 @@ PMNode *PMNode::assert_is_op() {
return this;
}
PMNode *PMNode::assert_only_one_output() {
asserts_.emplace_back([](const Node *x) { return x->outlinks.size() == 1; });
return this;
}
PMNode *PMNode::assert_is_op(const std::string &op_type) {
asserts_.emplace_back([op_type](const Node *x) {
if (x && x->IsStmt()) {
......
......@@ -127,6 +127,7 @@ struct PMNode {
PMNode* assert_is_var();
PMNode* assert_var_not_persistable();
PMNode* assert_is_persistable_var();
PMNode* assert_only_one_output();
PMNode* assert_is_op_output(const std::string& op_type);
PMNode* assert_is_op_input(const std::string& op_type);
PMNode* assert_is_op_input(const std::string& op_type,
......
......@@ -178,7 +178,6 @@ void SSAGraph::Build(const Program &program,
arg_node->AsArg(name, node_storage_.size() - 1);
arg_update_node_map_[name] = arg_node;
}
if (var_types.count(name)) {
if (!arg_node->arg()->type) {
arg_node->arg()->type = LiteType::GetTensorTy(
......@@ -192,7 +191,6 @@ void SSAGraph::Build(const Program &program,
"data_type", static_cast<int>(var_types[name]));
}
}
if (is_weights(name)) arg_node->AsArg().is_weight = true;
CHECK(arg_node->IsRoleSet());
DirectedLink(arg_node, op_node);
......@@ -202,12 +200,10 @@ void SSAGraph::Build(const Program &program,
auto *arg_node = &node_storage_.back();
arg_node->AsArg(name, node_storage_.size() - 1);
arg_update_node_map_[name] = arg_node;
/*
if (var_types.count(name) && !arg_node->arg()->type) {
arg_node->arg()->type = LiteType::GetTensorTy(
TARGET(kUnk), var_types[name], DATALAYOUT(kUnk));
}
*/
if (is_weights(name)) arg_node->AsArg().is_weight = true;
CHECK(arg_node->IsRoleSet());
......
......@@ -134,6 +134,7 @@ class Optimizer {
"mlu_postprocess_pass"}};
if (passes.size() == 1) {
// multi_stream_analysis_pass must be in the front of
// runtime_context_assign_pass
......
......@@ -24,10 +24,16 @@ namespace profile {
namespace {
auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) {
return (c1.target < c2.target) || (c1.op_type < c2.op_type) ||
(c1.kernel_name < c2.kernel_name) || (c1.remark < c2.remark);
if (c1.kernel_func_name == "NotImpl" && c2.kernel_func_name == "NotImpl") {
return (c1.target < c2.target) || (c1.op_type < c2.op_type) ||
(c1.kernel_name < c2.kernel_name) || (c1.remark < c2.remark);
} else { // compare with ch.kernel_func_name
return (c1.target < c2.target) || (c1.op_type < c2.op_type) ||
(c1.kernel_name < c2.kernel_name) ||
(c1.kernel_func_name < c2.kernel_func_name);
}
};
}
} // namespace
std::map<Type, std::string> TypeStr{
{Type::kUnk, "Unknown"},
......@@ -88,6 +94,36 @@ void Profiler::StopTiming(Type type, const int index, KernelContext* ctx) {
#endif
}
int Profiler::GetKernelFuncCalledTimes(const std::string& op_type,
const std::string& kernel_func_name) {
int count = 0;
for (size_t i = 0; i < units_.size(); ++i) {
if ((units_[i].character.kernel_func_name == kernel_func_name) &&
(units_[i].character.kernel_func_name != "NotImpl")) {
++count;
} else if ((units_[i].character.kernel_func_name == "NotImpl") &&
(units_[i].character.op_type == op_type)) {
++count;
}
}
return count;
}
float Profiler::GetKernelFuncSummaryGOPs(const std::string& op_type,
const std::string& kernel_func_name) {
float GOPs = 0;
for (size_t i = 0; i < units_.size(); ++i) {
if ((units_[i].character.kernel_func_name == kernel_func_name) &&
(units_[i].character.kernel_func_name != "NotImpl")) {
GOPs += units_[i].character.macs;
} else if ((units_[i].character.kernel_func_name == "NotImpl") &&
(units_[i].character.op_type == op_type)) {
GOPs += units_[i].character.macs;
}
}
return GOPs * 1e-9f;
}
std::string Profiler::Summary(Type type, bool concise, size_t w) {
using std::setw;
using std::left;
......@@ -108,13 +144,11 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< " warm-ups =====" << std::endl;
}
ss << setw(20) << left << "OperatorType"
<< " " << setw(30) << left << "KerneAttr";
if (!concise) {
ss << " " << setw(24) << left << "KernelName";
}
ss << " " << setw(16) << left << "Remark";
<< " " << setw(30) << left << "KerneAttr(Place)"
<< " " << setw(24) << left << "KernelFuncName";
if (!concise) {
ss << " " << setw(15) << left << "InDim"
ss << " " << setw(26) << left << "Remark"
<< " " << setw(15) << left << "InDim"
<< " " << setw(15) << left << "FilterDim"
<< " " << setw(15) << left << "OutDim";
}
......@@ -124,10 +158,13 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
if (!concise) {
ss << " " << setw(7) << left << "Last(ms)";
}
ss << " " << setw(7) << left << "Avg(%)";
ss << " " << setw(7) << left << "Avg(%)"
<< " " << setw(7) << left << "GOPs";
if (!concise) {
ss << " " << setw(7) << left << "GOPs"
<< " " << setw(7) << left << "GOPS";
ss << " " << setw(7) << left << "GOPS";
}
if (concise) {
ss << " " << setw(11) << left << "CalledTimes";
}
#ifdef LITE_WITH_OPENCL
ss << " " << setw(9) << left << "clAvg(ms)"
......@@ -185,14 +222,20 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
// clang-format off
ss << setw(20) << left << fixed << item.first.op_type
<< " " << setw(30) << left << fixed << item.first.kernel_attr
<< " " << setw(16) << left << fixed << item.first.remark
<< " " << setw(24) << left << fixed << item.first.kernel_func_name
<< " " << setw(7) << left << fixed << setprecision(3)
<< item.second.avg
<< " " << setw(7) << left << fixed << setprecision(3)
<< item.second.min
<< " " << setw(7) << left << fixed << setprecision(3)
<< item.second.max
<< " " << setprecision(2) << percent << "% ";
<< item.second.max
<< " " << setprecision(2) << percent << "% "
<< " " << setw(7) << left << fixed << setprecision(3)
<< GetKernelFuncSummaryGOPs(item.first.op_type,
item.first.kernel_func_name)
<< " " << setw(11) << left << fixed
<< GetKernelFuncCalledTimes(item.first.op_type,
item.first.kernel_func_name);
#ifdef LITE_WITH_OPENCL
float cl_percent = 0;
if (cl_total > 0) {
......@@ -204,7 +247,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< item.second.cl_min
<< " " << setw(9) << left << fixed << setprecision(3)
<< item.second.cl_max
<< " " << left << fixed <<setprecision(2) << cl_percent << "% ";
<< " " << left << fixed << setprecision(2) << cl_percent << "% ";
#endif
ss << std::endl;
// clang-format on
......@@ -244,7 +287,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< " " << setw(30) << left << fixed << unit.Character().kernel_attr
<< " " << setw(24) << left << fixed
<< unit.Character().kernel_func_name
<< " " << setw(16) << left << fixed << unit.Character().remark
<< " " << setw(26) << left << fixed << unit.Character().remark
<< " " << setw(15) << left << fixed << unit.Character().input_shape
<< " " << setw(15) << left << fixed << unit.Character().filter_shape
<< " " << setw(15) << left << fixed << unit.Character().output_shape
......@@ -253,7 +296,7 @@ std::string Profiler::Summary(Type type, bool concise, size_t w) {
<< " " << setw(7) << left << fixed << setprecision(3) << times.Max(w)
<< " " << setw(7) << left << fixed << setprecision(3) << times.Last(w)
<< " " << left << setprecision(2) << percent << "% "
<< " " << setw(7) << left << fixed << setprecision(2)
<< " " << setw(7) << left << fixed << setprecision(3)
<< 1e-9f * unit.Character().macs
<< " " << setw(7) << left << fixed << setprecision(2)
<< 1e-6f * unit.Character().macs / times.Avg(w);
......
......@@ -101,6 +101,10 @@ class Profiler final {
void StartTiming(Type type, const int index, KernelContext* ctx);
void StopTiming(Type type, const int index, KernelContext* ctx);
std::string Summary(Type type, bool concise = true, size_t warm_up = 10);
int GetKernelFuncCalledTimes(const std::string& op_type,
const std::string& kernel_func_name);
float GetKernelFuncSummaryGOPs(const std::string& op_type,
const std::string& kernel_func_name);
OpCharacter* GetOpCharacter(const size_t index);
private:
......
......@@ -73,7 +73,7 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
std::unordered_map<std::string, cpp::VarDesc> origin_var_maps;
auto& main_block = *desc->GetBlock<cpp::BlockDesc>(0);
auto var_size = main_block.VarsSize();
for (size_t i = 0; i < var_size; i++) {
for (int i = 0; i < var_size; i++) {
auto v = main_block.GetVar<cpp::VarDesc>(i);
auto name = v->Name();
origin_var_maps.emplace(name, *v);
......@@ -86,16 +86,12 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
auto* scope = op->scope();
auto in_names = op->op_info()->input_names();
auto out_names = op->op_info()->output_names();
std::vector<std::string> var_names;
var_names.insert(var_names.end(), in_names.begin(), in_names.end());
var_names.insert(var_names.end(), out_names.begin(), out_names.end());
std::sort(var_names.begin(), var_names.end());
var_names.erase(std::unique(var_names.begin(), var_names.end()),
var_names.end());
for (auto& var_name : var_names) {
auto it = origin_var_maps.find(var_name);
in_names.insert(in_names.end(), out_names.begin(), out_names.end());
std::sort(in_names.begin(), in_names.end());
in_names.erase(std::unique(in_names.begin(), in_names.end()),
in_names.end());
for (auto& in_name : in_names) {
auto it = origin_var_maps.find(in_name);
if (it != origin_var_maps.end()) {
auto* v = main_block.AddVar<cpp::VarDesc>();
v->SetName((it->second).Name());
......@@ -108,30 +104,37 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
} else {
// New created vars must be LOD_TENSOR
auto* v = main_block.AddVar<cpp::VarDesc>();
v->SetName(var_name);
v->SetName(in_name);
v->SetType(cpp::VarDesc::Type::LOD_TENSOR);
std::string in_arg_name;
op->op_info()->GetInputArgname(var_name, &in_arg_name);
auto type = kernel->GetInputDeclType(in_arg_name);
const Type* type;
if (op->op_info()->GetInputArgname(in_name, &in_arg_name)) {
type = kernel->GetInputDeclType(in_arg_name);
} else {
op->op_info()->GetOutputArgname(in_name, &in_arg_name);
type = kernel->GetOutputDeclType(in_arg_name);
}
if (type->IsTensor()) {
auto tensor = scope->FindVar(var_name)->GetMutable<Tensor>();
auto tensor = scope->FindVar(in_name)->GetMutable<Tensor>();
v->SetPersistable(tensor->persistable());
if ((it->second).Name() != "feed" && (it->second).Name() != "fetch") {
if (in_name != "feed" && in_name != "fetch") {
v->SetShape(tensor->dims().data());
switch (tensor->precision()) {
#define SET_DATATYPE(precision__, data_type) \
case PrecisionType::precision__: \
v->SetDataType(data_type); \
#define SET_DATATYPE(precision__, data_type) \
case PrecisionType::precision__: \
v->SetDataType(data_type); \
LOG(INFO) << "update var" << (it->second).Name() << "done"; \
break
SET_DATATYPE(kBool, VarDescAPI::VarDataType::BOOL);
SET_DATATYPE(kFloat, VarDescAPI::VarDataType::FP32);
SET_DATATYPE(kFP16, VarDescAPI::VarDataType::FP16);
SET_DATATYPE(kInt8, VarDescAPI::VarDataType::INT8);
SET_DATATYPE(kInt16, VarDescAPI::VarDataType::INT16);
SET_DATATYPE(kInt32, VarDescAPI::VarDataType::INT32);
SET_DATATYPE(kInt64, VarDescAPI::VarDataType::INT64);
#undef SET_DATATYPE
default:
LOG(FATAL) << "unknown precision type";
VLOG(4) << "warning! unknown precision type";
}
}
} else {
......@@ -141,7 +144,6 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
}
}
}
void RuntimeProgram::Run() {
#ifdef LITE_WITH_PRECISION_PROFILE
auto inst_precision_profiler = paddle::lite::profile::PrecisionProfiler();
......@@ -153,12 +155,6 @@ void RuntimeProgram::Run() {
#ifndef LITE_WITH_FPGA
if (inst.is_feed_fetch_op()) continue;
#endif
std::string op_type = inst.op()->op_info()->Type();
VLOG(4) << ">> Running kernel: " << inst.op()->op_info()->Repr()
<< " on Target " << TargetToStr(inst.kernel()->target());
#ifdef LITE_WITH_CUDA
if (inst.need_sync()) {
inst.Sync();
......
......@@ -6,16 +6,44 @@ set(TARGET mobilenet_full_api)
set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx")
set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/")
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
if (MSVC_STATIC_CRT)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
endif()
endif()
# 2. link mklml and Paddle-Lite directory
link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib)
include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include)
# 3. compile options
add_definitions(-std=c++11 -g -O3 -pthread)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
if (NOT WIN32)
add_definitions(-std=c++11 -g -O3 -pthread)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
endif()
# 4.add executable output
add_executable(${TARGET} ${TARGET}.cc)
target_link_libraries(${TARGET} -lpaddle_full_api_shared)
target_link_libraries(${TARGET} -liomp5)
target_link_libraries(${TARGET} -ldl)
if (WIN32)
set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX}
${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX})
target_link_libraries(${TARGET} libpaddle_api_full_bundled.lib)
target_link_libraries(${TARGET} shlwapi.lib)
target_link_libraries(${TARGET} ${MATH_LIB})
add_custom_command(TARGET ${TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release
COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release
)
else()
target_link_libraries(${TARGET} -lpaddle_full_api_shared)
target_link_libraries(${TARGET} -liomp5)
target_link_libraries(${TARGET} -ldl)
endif()
@echo off
setlocal
setlocal enabledelayedexpansion
set source_path=%~dp0
set build_directory=%source_path%\build
if EXIST "%build_directory%" (
call:rm_rebuild_dir "%build_directory%"
)
md "%build_directory%"
set vcvarsall_dir=C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat
IF NOT EXIST "%vcvarsall_dir%" (
goto set_vcvarsall_dir
) else (
goto cmake
)
:set_vcvarsall_dir
SET /P vcvarsall_dir="Please input the path of visual studio command Prompt, such as C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat =======>"
set tmp_var=!vcvarsall_dir!
call:remove_space
set vcvarsall_dir=!tmp_var!
IF NOT EXIST "!vcvarsall_dir!" (
echo "------------!vcvarsall_dir! not exist------------"
goto set_vcvarsall_dir
)
:cmake
D:
cd "%build_directory%"
cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64
call "%vcvarsall_dir%" amd64
msbuild /maxcpucount:8 /p:Configuration=Release mobilenet_full_api.vcxproj
goto:eof
:rm_rebuild_dir
del /f /s /q "%~1\*.*" >nul 2>&1
rd /s /q "%~1" >nul 2>&1
goto:eof
:remove_space
:remove_left_space
if "%tmp_var:~0,1%"==" " (
set "tmp_var=%tmp_var:~1%"
goto remove_left_space
)
:remove_right_space
if "%tmp_var:~-1%"==" " (
set "tmp_var=%tmp_var:~0,-1%"
goto remove_left_space
)
goto:eof
......@@ -16,6 +16,11 @@
#include <vector>
#include "paddle_api.h" // NOLINT
#ifdef _WIN32
#include "paddle_use_kernels.h" // NOLINT
#include "paddle_use_ops.h" // NOLINT
#endif
using namespace paddle::lite_api; // NOLINT
int64_t ShapeProduction(const shape_t& shape) {
......
......@@ -6,16 +6,44 @@ set(TARGET mobilenet_light_api)
set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx")
set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/")
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
if (MSVC_STATIC_CRT)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
endif()
endif()
# 2. link mklml and Paddle-Lite directory
link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib)
include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include)
# 3. compile options
add_definitions(-std=c++11 -g -O3 -pthread)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
if (NOT WIN32)
add_definitions(-std=c++11 -g -O3 -pthread)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
endif()
# 4.add executable output
add_executable(${TARGET} ${TARGET}.cc)
target_link_libraries(${TARGET} -lpaddle_light_api_shared)
target_link_libraries(${TARGET} -liomp5)
target_link_libraries(${TARGET} -ldl)
if (WIN32)
set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX}
${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX})
target_link_libraries(${TARGET} libpaddle_api_light_bundled.lib)
target_link_libraries(${TARGET} shlwapi.lib)
target_link_libraries(${TARGET} ${MATH_LIB})
add_custom_command(TARGET ${TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release
COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release
)
else()
target_link_libraries(${TARGET} -lpaddle_light_api_shared)
target_link_libraries(${TARGET} -liomp5)
target_link_libraries(${TARGET} -ldl)
endif()
@echo off
setlocal
setlocal enabledelayedexpansion
set source_path=%~dp0
set build_directory=%source_path%\build
if EXIST "%build_directory%" (
call:rm_rebuild_dir "%build_directory%"
)
md "%build_directory%"
set vcvarsall_dir=C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat
IF NOT EXIST "%vcvarsall_dir%" (
goto set_vcvarsall_dir
) else (
goto cmake
)
:set_vcvarsall_dir
SET /P vcvarsall_dir="Please input the path of visual studio command Prompt, such as C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat =======>"
set tmp_var=!vcvarsall_dir!
call:remove_space
set vcvarsall_dir=!tmp_var!
IF NOT EXIST "!vcvarsall_dir!" (
echo "------------!vcvarsall_dir! not exist------------"
goto set_vcvarsall_dir
)
:cmake
D:
cd "%build_directory%"
cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64
call "%vcvarsall_dir%" amd64
msbuild /maxcpucount:8 /p:Configuration=Release mobilenet_light_api.vcxproj
goto:eof
:rm_rebuild_dir
del /f /s /q "%~1\*.*" >nul 2>&1
rd /s /q "%~1" >nul 2>&1
goto:eof
:remove_space
:remove_left_space
if "%tmp_var:~0,1%"==" " (
set "tmp_var=%tmp_var:~1%"
goto remove_left_space
)
:remove_right_space
if "%tmp_var:~-1%"==" " (
set "tmp_var=%tmp_var:~0,-1%"
goto remove_left_space
)
goto:eof
......@@ -15,6 +15,9 @@
#pragma once
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
namespace paddle {
namespace lite {
......@@ -36,6 +39,13 @@ class ConvCompute : public KernelLite<TARGET(kARM), Ptype> {
impl_->Run();
}
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
impl_->SetProfileRuntimeKernelInfo(ch);
}
#endif
~ConvCompute() {
if (impl_ != nullptr) {
delete impl_;
......
......@@ -50,6 +50,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
flag_trans_weights_ = true;
}
impl_ = lite::arm::math::conv_depthwise_3x3_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_fp32";
#endif
} else if (kw == 5) {
// VLOG(5) << "invoke 5x5 dw conv fp32";
auto strides = param.strides;
......@@ -67,6 +70,9 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
w_data_in, w_data, oc, 1, cblock, kh * kw);
flag_trans_weights_ = true;
impl_ = lite::arm::math::conv_depthwise_5x5_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_fp32";
#endif
} else {
LOG(FATAL)
<< "5x5 depthwise conv only support stride == 1 or stride == 2";
......@@ -103,6 +109,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -113,6 +122,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel fp32 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_fp32;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_fp32";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -162,6 +174,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 3x3 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_3x3_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_3x3_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -172,6 +187,9 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
// trans weights
// VLOG(5) << "invoke 5x5 dw conv int8 kernel int8 out";
impl_ = lite::arm::math::conv_depthwise_5x5_int8_int8;
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_depthwise_5x5_int8_int8";
#endif
int cround = ROUNDUP(w_dims[0], 8);
weights_.Resize({cround / 8, 1, kh * kw, 8});
auto wptr = param.filter->data<int8_t>();
......@@ -183,6 +201,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -225,6 +251,14 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
w_scale_.data());
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -267,6 +301,14 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
w_scale_.data());
}
#ifdef LITE_WITH_PROFILE
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h"
......@@ -48,6 +49,15 @@ class DepthwiseConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDw"};
#endif
private:
using param_t = operators::ConvParam;
Tensor weights_;
......
......@@ -19,6 +19,14 @@ namespace lite {
namespace kernels {
namespace arm {
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -62,6 +70,9 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_fp32";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_fp32(i_data,
o_data,
......@@ -76,9 +87,20 @@ void DirectConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_fp32";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -117,6 +139,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data,
......@@ -132,9 +157,20 @@ void DirectConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......@@ -173,6 +209,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s1_direct_int8";
#endif
} else {
lite::arm::math::conv_3x3s2_direct_int8(i_data,
o_data,
......@@ -188,6 +227,9 @@ void DirectConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_3x3s2_direct_int8";
#endif
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/context.h"
......@@ -180,6 +181,15 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvDirect"};
#endif
/// todo, support inplace weights transform
protected:
Tensor weights_;
......
......@@ -81,6 +81,14 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -111,12 +119,26 @@ void GemmLikeConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
if (flag_1x1gemm_) {
lite::arm::math::conv1x1s1_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm";
#endif
} else {
lite::arm::math::conv_im2col_gemm(
din, dout, bs, oc, oh, ow, ic, ih, iw, weights, bias, param, &ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -159,6 +181,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else {
lite::arm::math::conv_im2col_gemm_int8(din,
dout,
......@@ -174,9 +199,20 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
}
}
#ifdef LITE_WITH_PROFILE
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
auto& param = this->Param<param_t>();
......@@ -219,6 +255,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv1x1s1_gemm_int8";
#endif
} else {
lite::arm::math::conv_im2col_gemm_int8(din,
dout,
......@@ -234,6 +273,9 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
param,
&ctx,
w_scale_.data());
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_im2col_gemm_int8";
#endif
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include <vector>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/backends/arm/math/funcs.h"
......@@ -94,6 +95,15 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvGemm"};
#endif
/// todo, support inplace weights transform
protected:
using param_t = operators::ConvParam;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <string>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/kernel.h"
#include "lite/operators/conv_transpose_op.h"
......@@ -33,6 +34,14 @@ class Conv2DTransposeCompute
~Conv2DTransposeCompute() = default;
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvTranspose"};
#endif
protected:
int workspace_size_{0};
};
......
......@@ -94,6 +94,14 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
ReInitWhenNeeded();
}
#ifdef LITE_WITH_PROFILE
template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::
SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
#endif
template <>
void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
......@@ -130,6 +138,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_6x6_3x3";
#endif
} else {
int tile_block = 8;
int block_count =
......@@ -148,6 +159,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3";
#endif
} else {
lite::arm::math::conv_compute_2x2_3x3_small(i_data,
o_data,
......@@ -162,6 +176,9 @@ void WinogradConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
b_data,
param,
&ctx);
#ifdef LITE_WITH_PROFILE
kernel_func_name_ = "conv_compute_2x2_3x3_small";
#endif
}
}
}
......
......@@ -15,6 +15,7 @@
#pragma once
#include <cmath>
#include <string>
#include "lite/backends/arm/math/conv_impl.h"
#include "lite/core/context.h"
#include "lite/core/kernel.h"
......@@ -34,6 +35,13 @@ class WinogradConv : public KernelLite<TARGET(kARM), Ptype> {
virtual void PrepareForRun();
virtual void ReInitWhenNeeded();
virtual void Run();
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForConvWino"};
#endif
protected:
using param_t = operators::ConvParam;
......
......@@ -66,5 +66,5 @@ REGISTER_LITE_KERNEL(fill_constant,
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("ShapeTensorList",
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.Finalize();
# if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA))
# return()
# endif()
if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_PYTHON) AND (NOT LITE_WITH_FPGA))
return()
endif()
set(fpga_deps fpga_target_wrapper kernel_fpga)
......
......@@ -5,11 +5,6 @@ add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kerne
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps})
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(one_hot_compute_host Host extra SRCS one_hot_compute.cc DEPS ${lite_kernel_deps})
#lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any)
#lite_cc_test(test_multiclass_nms_compute_host SRCS multiclass_nms_compute_test.cc DEPS multiclass_nms_compute_host any)
add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
......@@ -20,3 +15,4 @@ add_kernel(ctc_align_compute_host Host extra SRCS ctc_align_compute.cc DEPS ${li
add_kernel(write_to_array_compute_host Host extra SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps})
add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute.cc DEPS ${lite_kernel_deps})
add_kernel(assign_compute_host Host extra SRCS assign_compute.cc DEPS ${lite_kernel_deps})
add_kernel(one_hot_compute_host Host extra SRCS one_hot_compute.cc DEPS ${lite_kernel_deps})
......@@ -13,10 +13,6 @@
// limitations under the License.
#include "lite/kernels/host/assign_compute.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/target_wrapper.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
......
......@@ -34,8 +34,6 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = x_dims.size();
CHECK_GE(x_rank, 2);
auto out_name = op_info->Output("Out").front();
......@@ -45,9 +43,6 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (dropout_implementation == "upscale_in_train") {
scale = 1.f;
}
// HiAI only support [n, c, 1, 1] for the shape of scale
std::vector<int64_t> scale_shape = {
1, x_rank < 3 ? 1 : x_dims[x_rank - 3], 1, 1};
// X node
std::shared_ptr<Node> x_node = nullptr;
......@@ -61,11 +56,7 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scale_node = graph->Add<ge::op::Scale>(out_name);
auto scale_op = scale_node->data<ge::op::Scale>();
scale_op->set_input_x(*x_node->data());
scale_op->set_attr_axis(1);
// Add filter node(fill with scale)
auto filter_node = graph->Add(out_name + "/filter", scale, scale_shape);
scale_op->set_input_filter(*filter_node->data());
scale_op->set_attr_filler_value(scale);
return REBUILD_WHEN_SHAPE_CHANGED;
}
......
......@@ -120,7 +120,9 @@ int SubgraphEngine::BuildDeviceProgram() {
return subgraph::FAILED;
}
auto device_program = std::make_shared<device_program_t>(device_client);
device_program_map_[inputs_shape_] = device_program;
if (!inputs_shape_.empty()) {
device_program_map_[inputs_shape_] = device_program;
}
// Query and check the dimensions of valid input and output tensors
std::vector<hiai::TensorDimension> device_idims, device_odims;
......
......@@ -324,7 +324,7 @@ void SaveCombinedParamsPb(const std::string &path,
std::sort(paramlist.begin(), paramlist.end());
// Load vars
std::ofstream file(path);
std::ofstream file(path, std::ios::binary);
CHECK(file.is_open());
for (size_t i = 0; i < paramlist.size(); ++i) {
SerializeTensor(file, exec_scope, paramlist[i]);
......
......@@ -158,8 +158,6 @@ add_operator(__xpu__multi_encoder_op extra SRCS __xpu__multi_encoder_op.cc DEPS
add_operator(__xpu__embedding_with_eltwise_add_op extra SRCS __xpu__embedding_with_eltwise_add_op.cc DEPS ${op_DEPS})
add_operator(__xpu__fc_op extra SRCS __xpu__fc_op.cc DEPS ${op_DEPS})
add_operator(one_hot basic SRCS one_hot_op.cc DEPS ${op_DEPS})
if (NOT LITE_WITH_X86)
lite_cc_test(test_fc_op SRCS fc_op_test.cc
DEPS fc_op memory
......
......@@ -15,6 +15,9 @@
#pragma once
#include <string>
#include "lite/core/op_lite.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle {
namespace lite {
......@@ -34,6 +37,58 @@ class ActivationOp : public OpLite {
std::string DebugString() const override { return "activation_op"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter* ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = ActivationTypeToStr(param_.active_type);
switch (param_.active_type) {
case lite_api::ActivationType::kRelu:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kRelu6:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kLeakyRelu:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kPRelu:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kSwish:
ch->macs = param_.X->numel() * 4.0;
break;
case lite_api::ActivationType::kSigmoid:
ch->macs = param_.X->numel() * 3.0;
break;
case lite_api::ActivationType::kTanh:
ch->macs = param_.X->numel() * 5.0;
break;
case lite_api::ActivationType::kExp:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kAbs:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kHardSwish:
ch->macs = param_.X->numel() * 5.0;
break;
case lite_api::ActivationType::kReciprocal:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kIndentity:
break;
default:
LOG(FATAL) << "This Type of Activation:"
<< static_cast<int>(param_.active_type)
<< ActivationTypeToStr(param_.active_type)
<< " doesn't support";
}
}
#endif
private:
mutable operators::ActivationParam param_;
};
......
......@@ -39,6 +39,17 @@ class AffineChannelOpLite : public OpLite {
std::string DebugString() const override { return "affine_channel"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = param_.data_layout;
ch->macs = param_.X->numel() * 2.0;
}
#endif
private:
mutable AffineChannelParam param_;
};
......
......@@ -39,6 +39,27 @@ class ArgmaxOpLite : public OpLite {
std::string DebugString() const override { return "argmax"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "axis" + std::to_string(param_.Axis);
auto axis = param_.Axis;
if (axis < 0) {
axis += input_dims.size();
}
int max_num = 1;
for (int64_t i = axis + 1; i < input_dims.size(); i++)
max_num *= input_dims[i];
float gops = 1.0f;
for (int i = 1; i <= max_num; i++) gops *= i;
ch->macs = gops * output_dims.production();
}
#endif
private:
mutable ArgmaxParam param_;
};
......
......@@ -37,6 +37,17 @@ class AssignOpLite : public OpLite {
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "assign"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
// ch->remark = "";
ch->macs = param_.X->numel() * 1.0;
}
#endif
private:
mutable AssignParam param_;
};
......
......@@ -39,6 +39,17 @@ class AssignValueOpLite : public OpLite {
std::string DebugString() const override { return "assign value"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
// auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
// ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "dtype" + std::to_string(param_.dtype);
ch->macs = param_.Out->numel() * 1.0;
}
#endif
private:
mutable AssignValueParam param_;
};
......
......@@ -39,6 +39,17 @@ class AxpyOpLite : public OpLite {
std::string DebugString() const override { return "axpy"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
// ch->remark = "";
ch->macs = param_.X->numel() * 2.0;
}
#endif
private:
mutable AxpyParam param_;
};
......
......@@ -37,6 +37,17 @@ class BatchNormOp : public OpLite {
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "batch_norm"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.x->dims();
auto output_dims = param_.y->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
// ch->remark = "";
ch->macs = param_.y->numel() * 2.0;
}
#endif
private:
mutable BatchNormParam param_;
};
......
......@@ -39,6 +39,17 @@ class BoxClipOpLite : public OpLite {
std::string DebugString() const override { return "box clip"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.Input->dims();
auto output_dims = param_.Output->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
// ch->remark = "";
ch->macs = param_.Output->numel() * 2.0;
}
#endif
private:
mutable BoxClipParam param_;
};
......
......@@ -34,8 +34,21 @@ class BoxCoderOpLite : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "box_coder"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
// auto input_dims = param_.Input->dims();
// auto output_dims = param_.Output->dims();
// ch->input_shape = ch->DimToStr(input_dims);
// ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "proposals" + std::to_string(param_.proposals->dims()[0]) +
"x" + std::to_string(param_.proposals->dims()[1]);
ch->macs = param_.proposals->dims()[0] * param_.proposals->dims()[1] * 30.f;
}
#endif
private:
mutable BoxCoderParam param_;
};
......
......@@ -50,6 +50,17 @@ class CalibOpLite : public OpLite {
std::string DebugString() const override { return "calib"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.input->dims();
auto output_dims = param_.output->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "scale" + std::to_string(param_.scale);
ch->macs = param_.output->numel() * 1.0f;
}
#endif
private:
mutable CalibParam param_;
};
......
......@@ -38,6 +38,18 @@ class CompareOp : public OpLite {
std::string DebugString() const override { return "binary logical"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto output_dims = param_.Out->dims();
ch->input_shape = "X:" + ch->DimToStr(param_.X->dims()) + "Y:" +
ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "axis" + std::to_string(param_.axis) + "force_cpu" +
std::to_string(param_.force_cpu);
ch->macs = param_.Out->numel() * 1.0f;
}
#endif
private:
mutable CompareParam param_;
};
......
......@@ -37,6 +37,21 @@ class ConcatOpLite : public OpLite {
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "concat"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto output_dims = param_.output->dims();
std::string inputs_shape = "";
for (size_t i = 0; i < param_.x.size(); ++i) {
inputs_shape += ch->DimToStr(param_.x[i]->dims());
if (i != param_.x.size() - 1) inputs_shape += "/";
}
ch->input_shape = inputs_shape;
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "axis" + std::to_string(param_.axis);
ch->macs = 0.f; // no calc. only io operation
}
#endif
private:
mutable ConcatParam param_;
};
......
......@@ -22,6 +22,9 @@
#include "lite/core/tensor.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle {
namespace lite {
......@@ -44,12 +47,13 @@ class ConvOpLite : public OpLite {
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->filter_shape = ch->DimToStr(filter_dims);
ch->remark = std::to_string(filter_dims[2]) + "x" +
std::to_string(filter_dims[3]) + "p" +
std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]);
ch->remark =
std::to_string(filter_dims[2]) + "x" + std::to_string(filter_dims[3]) +
"p" + std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]) + (param_.bias ? "Bias" : "") +
ActivationTypeToStr(param_.activation_param.active_type);
// MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group
// GMACs = 1e-9f * MACs
// GMACPS = 1e-6f * MACs / predict_ms
......
......@@ -21,6 +21,9 @@
#include "lite/core/tensor.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle {
namespace lite {
......@@ -42,6 +45,29 @@ class ConvTransposeOpLite : public OpLite {
std::string DebugString() const override { return "conv_transpose"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto filter_dims = param_.filter->dims();
auto input_dims = param_.x->dims();
auto output_dims = param_.output->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->filter_shape = ch->DimToStr(filter_dims);
ch->remark =
std::to_string(filter_dims[2]) + "x" + std::to_string(filter_dims[3]) +
"p" + std::to_string((*param_.paddings)[0]) + "s" +
std::to_string(param_.strides[0]) + "g" +
std::to_string(param_.groups) + "d" +
std::to_string((*param_.dilations)[0]) + (param_.bias ? "Bias" : "") +
ActivationTypeToStr(param_.activation_param.active_type);
// MACs = 2.f * kw * kh * batchsize * out_c * out_h * out_w * in_c / group
// GMACs = 1e-9f * MACs
// GMACPS = 1e-6f * MACs / predict_ms
ch->macs = 2.f * filter_dims[2] * filter_dims[3] *
output_dims.production() * input_dims[1] / param_.groups;
}
#endif
private:
mutable ConvParam param_;
std::string padding_algorithm_{""};
......
......@@ -35,6 +35,17 @@ class ElementwiseOp : public OpLite {
std::string DebugString() const override { return "elementwise_op"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter* ch) {
auto output_dims = param_.Out->dims();
ch->input_shape = "X" + ch->DimToStr(param_.X->dims()) + "Y" +
ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "axis" + std::to_string(param_.axis);
ch->macs = 1.0f * param_.Out->numel();
}
#endif
private:
mutable operators::ElementwiseParam param_;
};
......
......@@ -43,6 +43,17 @@ class FcOpLite : public OpLite {
std::string DebugString() const override { return "fc"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto m = param_.input->dims().count(0, param_.in_num_col_dims);
ch->input_shape = ch->DimToStr(param_.input->dims());
ch->filter_shape = ch->DimToStr(param_.w->dims());
ch->output_shape = ch->DimToStr(param_.output->dims());
ch->remark = (param_.bias ? "Bias" : "") + param_.activation_type;
ch->macs = m * param_.w->dims()[0] * param_.w->dims()[1] * 3.0f;
}
#endif
private:
mutable FcParam param_;
};
......
......@@ -38,6 +38,15 @@ class IncrementOp : public OpLite {
std::string DebugString() const override { return "increment"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "step" + std::to_string(param_.step);
ch->macs = param_.X->numel() * 1.0f;
}
#endif
private:
mutable IncrementParam param_;
};
......
......@@ -36,8 +36,22 @@ class InstanceNormOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "instance_norm"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.x->dims());
ch->output_shape = ch->DimToStr(param_.out->dims());
// ch->remark = "";
auto x_dims = param_.x->dims();
auto nc = x_dims[0] * x_dims[1];
auto hw = x_dims[2] * x_dims[3];
auto nchw = x_dims.production();
ch->macs = 5.f * nchw + 3.f * (nc + hw);
}
#endif
private:
mutable InstanceNormParam param_;
};
......
......@@ -36,8 +36,18 @@ class InterpolateOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "interpolate"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = param_.interp_method;
ch->macs = param_.Out->numel() * 14.f;
}
#endif
private:
mutable InterpolateParam param_;
};
......
......@@ -38,6 +38,15 @@ class LayerNormOp : public OpLite {
std::string DebugString() const override { return "layer_norm"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Y->dims());
ch->remark = "begin_norm_axis" + std::to_string(param_.begin_norm_axis);
ch->macs = param_.Y->numel() * 7.f;
}
#endif
private:
mutable LayerNormParam param_;
};
......
......@@ -38,6 +38,16 @@ class BinaryLogicalOp : public OpLite {
std::string DebugString() const override { return "binary logical"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = "X" + ch->DimToStr(param_.X->dims()) + "Y" +
ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
// ch->remark = "";
ch->macs = param_.Out->numel() * 3.f;
}
#endif
private:
mutable LogicalParam param_;
};
......@@ -57,6 +67,16 @@ class UnaryLogicalOp : public OpLite {
std::string DebugString() const override { return "binary logical"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = "X" + ch->DimToStr(param_.X->dims()) + "Y" +
ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
// ch->remark = "";
ch->macs = param_.Out->numel() * 3.f;
}
#endif
private:
mutable LogicalParam param_;
};
......
......@@ -33,8 +33,18 @@ class LrnOpLite : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "lrn"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "n" + std::to_string(param_.n) + param_.norm_region;
ch->macs = param_.Out->numel() * param_.k * 2.f;
}
#endif
private:
mutable LrnParam param_;
};
......
......@@ -41,6 +41,31 @@ class MatMulOpLite : public OpLite {
std::string DebugString() const override { return "matmul"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->filter_shape = ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "alpha" + std::to_string(param_.alpha) + "trans_x" +
std::to_string(param_.transpose_X) + "trans_y" +
std::to_string(param_.transpose_Y);
auto x_dims = param_.X->dims();
auto y_dims = param_.Y->dims();
auto m = x_dims[x_dims.size() - 2];
auto k = x_dims[x_dims.size() - 1];
auto n = y_dims[y_dims.size() - 1];
if (param_.transpose_X) {
m = x_dims[x_dims.size() - 1];
k = x_dims[x_dims.size() - 2];
}
if (param_.transpose_Y) {
n = y_dims[y_dims.size() - 2];
}
ch->macs = 3.f * m * n * k;
}
#endif
private:
mutable MatMulParam param_;
};
......
......@@ -35,6 +35,15 @@ class MeanOp : public OpLite {
std::string DebugString() const override { return "mean"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
// ch->remark = "";
ch->macs = param_.X->numel() * 1.f;
}
#endif
private:
mutable operators::MeanParam param_;
};
......
......@@ -63,6 +63,20 @@ class MulOpLite : public OpLite {
std::string DebugString() const override { return "mul"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.x->dims());
ch->filter_shape = ch->DimToStr(param_.y->dims());
ch->output_shape = ch->DimToStr(param_.output->dims());
// ch->remark = "";
auto x_dims = param_.x->dims();
auto y_dims = param_.y->dims();
auto x_mat_dims = x_dims.Flatten2D(param_.x_num_col_dims);
auto y_mat_dims = y_dims.Flatten2D(param_.y_num_col_dims);
ch->macs = 1.f * x_mat_dims[0] * x_mat_dims[1] * y_mat_dims[1];
}
#endif
private:
mutable MulParam param_;
};
......
......@@ -35,8 +35,18 @@ class NegativeOpLite : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "negative"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
// ch->remark = "";
ch->macs = 1.f * param_.Out->numel();
}
#endif
private:
mutable NegativeParam param_;
};
......
......@@ -36,8 +36,18 @@ class PowerOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "power"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
// ch->remark = "";
ch->macs = param_.Out->numel() * 3.0f;
}
#endif
private:
mutable PowerParam param_;
};
......
......@@ -32,8 +32,29 @@ class ReduceMaxOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "reduce_max"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "keep_dim" + std::to_string(param_.keep_dim);
auto dims = param_.dim;
auto in_sum = param_.X->numel();
if (dims.size() == 0 || dims.size() == 1) {
ch->macs = 1.f * in_sum;
} else if (dims.size() == 2) {
ch->macs = 2.f * in_sum;
} else {
LOG(FATAL) << "This dims size of ReduceMaxParm: " << dims.size()
<< " doesn't support";
ch->macs = 0.f;
}
}
#endif
private:
mutable ReduceMaxParam param_;
};
......
......@@ -26,14 +26,41 @@ namespace operators {
class ReduceMeanOp : public OpLite {
public:
ReduceMeanOp() {}
explicit ReduceMeanOp(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "reduce_mean"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "keep_dim" + std::to_string(param_.keep_dim);
auto dims = param_.dim;
auto in_sum = param_.X->numel();
if (dims.size() == 0) {
ch->macs = 1.f * in_sum;
} else if (dims.size() == 1) {
ch->macs = 2.f * in_sum;
} else if (dims.size() == 2) {
ch->macs = 4.f * in_sum;
} else {
LOG(FATAL) << "This dims size of ReduceMean: " << dims.size()
<< " doesn't support";
ch->macs = 0.f;
}
}
#endif
private:
mutable ReduceMeanParam param_;
};
......
......@@ -37,6 +37,27 @@ class ReduceProdOpLite : public OpLite {
std::string DebugString() const override { return "reduce_prod"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.x->dims());
ch->output_shape = ch->DimToStr(param_.output->dims());
ch->remark = "keep_dim" + std::to_string(param_.keep_dim) + "reduce_all" +
std::to_string(param_.reduce_all);
auto dims = param_.dim;
auto in_sum = param_.x->numel();
if (dims.size() == 0 || dims.size() == 1) {
ch->macs = 1.f * in_sum;
} else if (dims.size() == 2) {
ch->macs = 2.f * in_sum;
} else {
LOG(FATAL) << "This dims size of ReduceProd: " << dims.size()
<< " doesn't support";
ch->macs = 0.f;
}
}
#endif
private:
mutable ReduceParam param_;
};
......
......@@ -18,6 +18,9 @@
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/utils/all.h"
#ifdef LITE_WITH_PROFILE
#include "lite/api/paddle_place.h"
#endif
namespace paddle {
namespace lite {
......@@ -35,8 +38,61 @@ class ReluOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "relu"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.X->dims();
auto output_dims = param_.Out->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = ActivationTypeToStr(param_.active_type);
switch (param_.active_type) {
case lite_api::ActivationType::kRelu:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kRelu6:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kLeakyRelu:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kPRelu:
ch->macs = param_.X->numel() * 2.0;
break;
case lite_api::ActivationType::kSwish:
ch->macs = param_.X->numel() * 4.0;
break;
case lite_api::ActivationType::kSigmoid:
ch->macs = param_.X->numel() * 3.0;
break;
case lite_api::ActivationType::kTanh:
ch->macs = param_.X->numel() * 5.0;
break;
case lite_api::ActivationType::kExp:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kAbs:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kHardSwish:
ch->macs = param_.X->numel() * 5.0;
break;
case lite_api::ActivationType::kReciprocal:
ch->macs = param_.X->numel();
break;
case lite_api::ActivationType::kIndentity:
break;
default:
LOG(FATAL) << "This Type of Activation:"
<< static_cast<int>(param_.active_type)
<< ActivationTypeToStr(param_.active_type)
<< " doesn't support";
}
}
#endif
private:
mutable ActivationParam param_;
};
......
......@@ -35,8 +35,19 @@ class ScaleOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "scale"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.x->dims());
ch->output_shape = ch->DimToStr(param_.output->dims());
ch->remark =
param_.activation_type + "alpha" + std::to_string(param_.alpha);
ch->macs = param_.x->numel() * 1.f;
}
#endif
private:
mutable ScaleParam param_;
};
......
......@@ -27,17 +27,48 @@ class SearchAlignedMatMulOpLite : public OpLite {
public:
SearchAlignedMatMulOpLite() {}
explicit SearchAlignedMatMulOpLite(const std::string &type) : OpLite(type) {}
explicit SearchAlignedMatMulOpLite(const std::string& type) : OpLite(type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); }
bool AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) override;
bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override;
std::string DebugString() const override { return "search_aligned_mat_mul"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter* ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->filter_shape = ch->DimToStr(param_.Y->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "alpha" + std::to_string(param_.alpha) + "trans_x" +
std::to_string(param_.transpose_X) + "trans_y" +
std::to_string(param_.transpose_Y);
const auto x_dims = param_.X->dims();
const auto y_dims = param_.Y->dims();
const auto& x_lod = param_.X->lod();
const auto& y_lod = param_.Y->lod();
const auto& x_lod_0 = x_lod[0];
const auto& y_lod_0 = y_lod[0];
int x_inner_size = x_dims[1];
int y_inner_size = y_dims[1];
int x_batch_size = x_lod_0[1];
int y_batch_size = y_lod_0[1];
int M = param_.transpose_X ? x_inner_size : x_batch_size;
int N = param_.transpose_Y ? y_batch_size : y_inner_size;
int X_K = param_.transpose_X ? x_batch_size : x_inner_size;
int Y_K = param_.transpose_Y ? y_inner_size : y_batch_size;
CHECK_EQ(X_K, Y_K) << "K of Input(X) and Input(Y) is not equal";
int K = X_K;
ch->macs = 2.0 * M * N * K;
}
#endif
private:
mutable MatMulParam param_;
};
......
......@@ -35,8 +35,21 @@ class SearchFcOpLite : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "search_fc"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->filter_shape = ch->DimToStr(param_.W->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->remark = "out_size" + std::to_string(param_.out_size);
auto x_dims = param_.X->dims();
auto w_dims = param_.W->dims();
ch->macs = 2.f * x_dims[0] * x_dims[1] * w_dims[0];
}
#endif
private:
mutable SearchFcParam param_;
};
......
......@@ -36,8 +36,21 @@ class SearchSeqFcOpLite : public OpLite {
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override;
std::string DebugString() const override { return "search_seq_fc"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.x->dims());
ch->filter_shape = ch->DimToStr(param_.w->dims());
ch->output_shape = ch->DimToStr(param_.out->dims());
ch->remark = "out_size" + std::to_string(param_.out_size);
auto x_dims = param_.x->dims();
auto w_dims = param_.w->dims();
ch->macs = 2.f * x_dims[0] * x_dims[1] * w_dims[0];
}
#endif
private:
mutable SearchSeqFcParam param_;
};
......
......@@ -36,8 +36,20 @@ class SearchSeqSoftmaxOp : public OpLite {
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "search_seq_softmax_op"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
auto input_dims = param_.x->dims();
auto output_dims = param_.output->dims();
ch->input_shape = ch->DimToStr(input_dims);
ch->output_shape = ch->DimToStr(output_dims);
ch->remark = "axis" + std::to_string(param_.axis);
ch->macs = 4.f * param_.x->numel();
}
#endif
private:
mutable SoftmaxParam param_;
};
......
......@@ -38,10 +38,17 @@ static bool IsFileExists(const std::string& path) {
// ARM mobile not support mkdir in C++
static void MkDirRecur(const std::string& path) {
#ifndef LITE_WITH_ARM
#ifdef _WIN32
if (system(string_format("md %s", path.c_str()).c_str()) != 0) {
LOG(ERROR) << "Cann't mkdir " << path;
}
#else
if (system(string_format("mkdir -p %s", path.c_str()).c_str()) != 0) {
LOG(ERROR) << "Cann't mkdir " << path;
}
#else // On ARM
#endif // _WIN32
#else // On ARM
CHECK_NE(mkdir(path.c_str(), S_IRWXU), -1) << "Cann't mkdir " << path;
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册