提交 abb4bb07 编写于 作者: Y Yao,kun

Merge remote-tracking branch 'upstream/develop' into develop

# Conflicts:
#	src/common/types.h
[submodule "src/operators/kernel/mali/ACL_Android"]
path = src/operators/kernel/mali/ACL_Android
url = https://github.com/halsay/ACL_Android.git
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
project(paddle-mobile) project(paddle-mobile)
add_definitions(-DPADDLE_MOBILE_DEBUG)
add_definitions(-DENABLE_EXCEPTION)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") option(DEBUGING "enable debug mode" ON)
set(CMAKE_BUILD_TYPE RelWithDebInfo) option(USE_OPENMP "openmp support" OFF)
set(CMAKE_VERBOSE_MAKEFILE ON) option(USE_EXCEPTION "use std exception" ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) option(LOG_PROFILE "log profile" ON)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build) # select the platform to build
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) option(CPU "armv7 with neon" ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) option(MALI_GPU "mali gpu" ON)
option(FPGA "fpga" OFF)
set(DEBUGING ON)
if (CPU)
add_definitions(-DPADDLE_MOBILE_CPU)
endif()
if (MALI_GPU)
add_definitions(-DPADDLE_MOBILE_MALI_GPU)
add_definitions(-DUSE_ACL=1)
add_definitions(-DUSE_OPENCL)
set(ACL_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/ACL_Android)
include_directories(${ACL_ROOT} ${ACL_ROOT}/include)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_core")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_graph")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build/opencl-1.2-stubs")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ACL=1")
endif()
if(FPGA)
add_definitions(-DPADDLE_MOBILE_FPGA)
endif()
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
if (DEBUGING)
message(STATUS "debug")
set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_FLAGS_DEBUG "-g -DNDEBUG")
add_definitions(-DPADDLE_MOBILE_DEBUG)
if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
add_definitions(-DARMV7)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
endif ()
else ()
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif ()
if (USE_EXCEPTION)
message(STATUS "use exception")
add_definitions(-DENABLE_EXCEPTION)
add_definitions(-fexceptions)
else()
add_definitions(-fno-exceptions)
endif ()
if (LOG_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE)
endif()
if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
# include headers if (NOT ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.cpp)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
endif ()
include_directories(src/) include_directories(src/)
#include(ExternalProject) set(CMAKE_VERBOSE_MAKEFILE ON)
#ExternalProject_Add(openblas_proj set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# GIT_REPOSITORY "https://github.com/xianyi/OpenBLAS.git" set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
# GIT_TAG "v0.2.20" set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
# SOURCE_DIR "openblas/" set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# BUILD_IN_SOURCE 1
# CONFIGURE_COMMAND "" include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
# BUILD_COMMAND "make" "ONLY_CBLAS=1"
# INSTALL_COMMAND "make" "PREFIX=${CMAKE_BINARY_DIR}/" "install"
# )
#set_target_properties(openblas_proj PROPERTIES EXCLUDE_FROM_ALL 1)
#add_dependencies(paddle-mobile openblas_proj) # if (IS_IOS)
# add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
else ()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif ()
# gen static if(DEBUGING)
ADD_LIBRARY(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_subdirectory(test)
endif()
#add_dependencies(paddle-mobile openblas_proj)
add_subdirectory(test)
# 贡献代码
欢迎您对Paddle-Mobile项目的贡献。
我们诚挚的感谢你的贡献,这个文档描述了我们的工作方式和工作流程。Paddle-Mobile在PaddlePaddle org下,和服务器版本的Paddle工程的代码规范基本相同,开发者也可以同时参考Paddle的相关文档。
## Workflow
Paddle-Mobile 开发中使用到的几种模型在这个链接下载 [点我](https://mms-mis.cdn.bcebos.com/paddle-mobile/models.zip).
之后是贡献代码的主要流程。
### Fork
* Paddle-Mobile采用Pull Request的方式提交代码,禁止直接push,所有的代码都需要人工review。首先要fork一份Paddle-Moble的代码 ["Fork" button](https://help.github.com/articles/fork-a-repo/).
* 跳转到[Paddle-Mobile](https://github.com/PaddlePaddle/paddle-mobile) GitHub首页,然后单击 `Fork` 按钮,生成自己目录下的仓库,比如 <https://github.com/你的用户名/paddle-mobile>
### Clone(克隆)
将远程仓库 clone 到本地:
```bash
➜ git clone https://github.com/你的用户名/paddle-mobile
cd Paddle
```
### 创建本地分支
Paddle-Mobile 和Paddle一样,目前使用[Git流分支模型](http://nvie.com/posts/a-successful-git-branching-model/)进行开发,测试,发行和维护,具体请参考 [Paddle 分支规范](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/releasing_process.md#paddle-分支规范)
所有的 feature 和 bug fix 的开发工作都应该在一个新的分支上完成,一般从 `develop` 分支上创建新分支。
使用 `git checkout -b` 创建并切换到新分支。
```bash
➜ git checkout -b my-cool-stuff
```
值得注意的是,在 checkout 之前,需要保持当前分支目录 clean,否则会把 untracked 的文件也带到新分支上,这可以通过 `git status` 查看。
### 使用 `pre-commit` 钩子
Paddle 开发人员使用 [pre-commit](http://pre-commit.com/) 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码(C++,Python),在提交(commit)前自动检查一些基本事宜(如每个文件只有一个 EOL,Git 中不要添加大文件等)。
`pre-commit`测试是 Travis-CI 中单元测试的一部分,不满足钩子的 PR 不能被提交到 Paddle,首先安装并在当前目录运行它:
```bash
pip install pre-commit
pre-commit -v -a
```
Paddle-Mobile 使用 `clang-format` 来调整 C/C++ 源代码格式,在格式化代码时不同的`clang-format`版本会有不同的表现形态,和Paddle不同的是,Paddle-Mobile开发人员使用的是更的5.0版本的llvm工具集。所以为了防止无法CI,请确保 `clang-format` 版本是 5.0 版本。
> 另外:通过`pip install pre-commit`和`conda install -c conda-forge pre-commit`安装的`yapf`稍有不同的,Paddle 开发人员使用的是`pip install pre-commit`。
## 开始开发
在本例中,我删除了 README.md 中的一行,并创建了一个新文件。
通过 `git status` 查看当前状态,这会提示当前目录的一些变化,同时也可以通过 `git diff` 查看文件具体被修改的内容。
```bash
➜ git status
On branch test
Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git checkout -- <file>..." to discard changes in working directory)
modified: README.md
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
no changes added to commit (use "git add" and/or "git commit -a")
```
## 构建
paddle-mobile是为了移动端版本开发的,而移动端大多以arm平台为主。所以我们要交叉编译到arm平台。以cpu为例:
1. 安装NDK最新版
2. 配置ANDROID_NDK和NDK_ROOT环境变量
3. 开发,并写单元测试
4. sh build.sh
## 提交(commit)
接下来我们取消对 README.md 文件的改变,然后提交新添加的 test 文件。
```bash
➜ git checkout -- README.md
➜ git status
On branch test
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
nothing added to commit but untracked files present (use "git add" to track)
➜ git add test
```
Git 每次提交代码,都需要写提交说明,这可以让其他人知道这次提交做了哪些改变,这可以通过`git commit` 完成。
```bash
▶ pre-commit run -a -v
[remove-crlf] CRLF end-lines remover........................................Passed
[remove-tabs] Tabs remover..................................................Passed
[check-added-large-files] Check for added large files.......................Passed
[check-merge-conflict] Check for merge conflicts............................Passed
[check-symlinks] Check for broken symlinks..................................Passed
[detect-private-key] Detect Private Key.....................................Passed
[end-of-file-fixer] Fix End of Files........................................Passed
[trailing-whitespace] Trim Trailing Whitespace..............................Passed
[copyright] copyright.......................................................Passed
[clang-format] clang-format.................................................Passed
[cpplint] cpplint...........................................................Passed
hookid: cpplint
Ignoring build_bak.sh; not a valid file name (c, cc, h, hpp, c++, h++, cu, cpp, hxx, cxx, cuh)
Done processing build_bak.sh
Ignoring build_bak.sh; not a valid file name (c, cc, h, hpp, c++, h++, cu, cpp, hxx, cxx, cuh)
Done processing build_bak.sh
```
## 保持本地仓库最新
在准备发起 Pull Request 之前,需要同步原仓库(<https://github.com/PaddlePaddle/paddle-mobile>)最新的代码。
首先通过 `git remote` 查看当前远程仓库的名字。
```bash
➜ git remote
origin
➜ git remote -v
origin https://github.com/USERNAME/paddle-mobile (fetch)
origin https://github.com/USERNAME/paddle-mobile (push)
```
这里 origin 是我们 clone 的远程仓库的名字,也就是自己用户名下的 paddle-mobile,接下来我们创建一个原始 paddle-mobile 仓库的远程主机,命名为 upstream。
```bash
➜ git remote add upstream https://github.com/PaddlePaddle/paddle-mobile
➜ git remote
origin
upstream
```
获取 upstream 的最新代码并更新当前分支。
```bash
➜ git fetch upstream
➜ git pull upstream develop
```
## Push 到远程仓库
将本地的修改推送到 GitHub 上,也就是 https://github.com/USERNAME/paddle-mobile。
```bash
# 推送到远程仓库 origin 的 my-cool-stuff 分支上
➜ git push origin my-cool-stuff
```
## 建立 Issue 并完成 Pull Request
建立一个 Issue 描述问题,并记录它的编号。
切换到所建分支,然后点击 `New pull request`
在 PR 的描述说明中,填写 `resolve #Issue编号` 可以在这个 PR 被 merge 后,自动关闭对应的 Issue
> 具体请见 <https://help.github.com/articles/closing-issues-via-commit-messages/>
## review
在接到PR后,可以看到该pr页面内正在运行CI。如果运行出现问题,可以点Details进入Travis平台上看详细内容。
![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294833030073.jpg)
可以在travis上看到更加详细的信息。
![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294833651326.jpg)
接下来等待 review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。
## 删除远程分支
在 PR 被 merge 进主仓库后,我们可以在 PR 的页面删除远程仓库的分支。
<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
也可以使用 `git push origin :分支名` 删除远程分支,如:
```bash
➜ git push origin :my-cool-stuff
```
## 删除本地分支
最后,删除本地分支。
```bash
# 切换到 develop 分支
➜ git checkout develop
# 删除 my-cool-stuff 分支
➜ git branch -D my-cool-stuff
```
至此,我们就完成了一次代码贡献的过程。
## 提交代码的一些约定
为了使评审人在评审代码时更好地专注于代码本身,请您每次提交代码时,遵守以下约定:
1. 请保证Travis-CI 中单元测试能顺利通过。如果没过,说明提交的代码存在问题,评审人一般不做评审。
2. 提交Pull Request前:
- 请注意commit的数量:
- 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。
- 建议:每次提交时,保持尽量少的commit,可以通过`git commit --amend`补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](http://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)
- 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。
3. 如果解决了某个Issue的问题,请在该Pull Request的**第一个**评论框中加上:`fix #issue_number`,这样当该PUll Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)
此外,在回复评审人意见时,请您遵守以下约定:
1. 评审人的每个意见都必须回复(这是开源社区的基本礼貌,别人帮了忙,应该说谢谢):
- 对评审意见同意且按其修改完的,给个简单的`Done`即可;
- 对评审意见不同意的,请给出您自己的反驳理由。
2. 如果评审意见比较多:
- 请给出总体的修改情况。
- 请采用[start a review](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/)进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。
FROM ubuntu:16.04
RUN echo '\
deb <mirror> <version> main restricted universe multiverse\n\
deb <mirror> <version>-updates main restricted universe multiverse\n\
deb <mirror> <version>-backports main restricted universe multiverse\n\
deb <mirror> <version>-security main restricted universe multiverse\n'\
> /etc/apt/sources.list
RUN sed -ie 's|<mirror>|http://mirrors.tuna.tsinghua.edu.cn/ubuntu/|' /etc/apt/sources.list
RUN sed -ie 's|<version>|xenial|' /etc/apt/sources.list
RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y --no-install-recommends \
curl \
unzip \
git \
make \
cmake \
cmake-curses-gui \
python \
python-pip \
python-setuptools \
clang-format-5.0 \
graphviz \
g++-arm-linux-gnueabi \
gcc-arm-linux-gnueabi
RUN apt-get autoremove -y && apt-get clean
RUN pip install --upgrade pip
RUN pip install wheel && pip install pre-commit
RUN ln -s clang-format-5.0 /usr/bin/clang-format
# RUN cd /tmp && curl -O http://mirrors.neusoft.edu.cn/android/repository/android-ndk-r17b-linux-x86_64.zip
# RUN cd /opt && unzip /tmp/android-ndk-r17b-linux-x86_64.zip
# ENV NDK_ROOT /opt/android-ndk-r17b
# 环境搭建
## 使用 docker
### 1. 安装 docker
安装 docker 的方式,参考官方文档 [https://docs.docker.com/install/](https://docs.docker.com/install/)
### 2. 使用 docker 搭建构建环境
首先进入 paddle-mobile 的目录下,执行 `docker build`
以 Linux/Mac 为例 (windows 建议在 'Docker Quickstart Terminal' 中执行)
```
$ docker build -t paddle-mobile:dev - < Dockerfile
```
使用 `docker images` 可以看到我们新建的 image
```
$ docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
paddle-mobile dev 33b146787711 45 hours ago 372MB
```
### 3. 使用 docker 构建
进入 paddle-mobile 目录,执行 docker run
```
$ docker run -it --mount type=bind,source=$PWD,target=/paddle-mobile paddle-mobile:dev
root@5affd29d4fc5:/ # cd /paddle-mobile
# 生成构建 android 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-android-neon.cmake
# 生成构建 linux 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-linux-gnueabi.cmake
```
### 4. 设置编译选项
可以通过 ccmake 设置编译选项
```
root@5affd29d4fc5:/ # ccmake .
Page 1 of 1
CMAKE_ASM_FLAGS
CMAKE_ASM_FLAGS_DEBUG
CMAKE_ASM_FLAGS_RELEASE
CMAKE_BUILD_TYPE
CMAKE_INSTALL_PREFIX /usr/local
CMAKE_TOOLCHAIN_FILE /paddle-mobile/tools/toolchains/arm-android-neon.cmake
CPU ON
DEBUGING ON
FPGA OFF
LOG_PROFILE ON
MALI_GPU OFF
NET googlenet
USE_EXCEPTION ON
USE_OPENMP OFF
```
修改选项后,按 `c`, `g` 更新 Makefile
### 5. 构建
使用 make 命令进行构建
```
root@5affd29d4fc5:/ # make
```
### 6. 查看构建产出
构架产出可以在 host 机器上查看,在 paddle-mobile 的目录下,build 以及 test/build 下,可以使用 adb 指令或者 scp 传输到 device 上执行
## 不使用 docker
不使用 docker 的方法,可以直接用 cmake 生成 makefile 后构建。使用 ndk 构建 android 应用需要正确设置 NDK_ROOT。构建 linux 应用需要安装 arm-linux-gnueabi-gcc 或者类似的交叉编译工具,可能需要设置 CC,CXX 环境变量,或者在 tools/toolchains/ 中修改 arm-linux-gnueabi.cmake,或者增加自己需要的 toolchain file。
\ No newline at end of file
#!/usr/bin/env sh
push_fn () {
MODELS_PATH="../test/models/*"
EXE_FILE="../test/build/*"
EXE_DIR="data/local/tmp/bin"
MODELS_DIR="data/local/tmp/models"
LIB_PATH="../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR}
echo "test files sync completed"
}
push_fn
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <string>
#include <unordered_map>
#include <vector>
#include "framework/operator.h"
namespace paddle_mobile {
class depCore {
public:
template <typename Dtype>
void analysisDep(
const std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>& ops) {
std::unordered_map<std::string, int> vars;
size_t nop = ops.size();
deps.resize(nop);
next.resize(nop);
for (size_t i = 0; i < nop; i++) {
const auto& op = ops[i];
for (const auto& kv : op->Inputs()) {
for (const auto& v : kv.second) {
if (vars.find(v) == vars.end()) {
continue;
}
int di = vars[v];
if (di == i) {
continue;
}
if (std::find(deps[i].begin(), deps[i].end(), di) != deps[i].end()) {
continue;
}
deps[i].push_back(di);
next[di].push_back(i);
}
}
for (const auto& kv : op->Outputs()) {
for (const auto& v : kv.second) {
vars[v] = i;
}
}
}
}
const std::vector<int>& getNext(int i) { return next[i]; }
const std::vector<int>& getDeps(int i) { return deps[i]; }
std::vector<std::vector<int>> deps;
std::vector<std::vector<int>> next;
};
} // namespace paddle_mobile
#endif
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
#ifdef ENABLE_EXCEPTION #ifdef ENABLE_EXCEPTION
#include <stdio.h> #include <stdio.h>
#include <exception> #include <exception>
#include <sstream>
#include <stdexcept>
#include <string> #include <string>
#endif #endif
...@@ -32,12 +30,11 @@ struct PaddleMobileException : public std::exception { ...@@ -32,12 +30,11 @@ struct PaddleMobileException : public std::exception {
PaddleMobileException(const char *header, const char *detail, PaddleMobileException(const char *header, const char *detail,
const char *file, const int line) { const char *file, const int line) {
std::stringstream ss; char buffer[1500];
ss << exception_prefix << "| " << header << "\n"; snprintf(buffer, sizeof(buffer),
ss << "| [in file] : " << file << " \n"; "%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail] : %s\n",
ss << "| [on line] : " << line << " \n"; exception_prefix.c_str(), header, file, line, detail);
ss << "| [detail] : " << detail; message = std::string(buffer);
message = ss.str();
} }
const char *what() const noexcept { return message.c_str(); } const char *what() const noexcept { return message.c_str(); }
}; };
......
...@@ -16,15 +16,43 @@ limitations under the License. */ ...@@ -16,15 +16,43 @@ limitations under the License. */
#include <vector> #include <vector>
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
#include <cstring>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <string> #include <string>
#endif #endif
#ifdef ANDROID
#include <android/log.h>
#endif
namespace paddle_mobile { namespace paddle_mobile {
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
#ifdef ANDROID
extern const char *ANDROID_LOG_TAG;
#define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
printf(__VA_ARGS__)
#define ANDROIDLOGW(...) \
__android_log_print(ANDROID_LOG_WARNING, ANDROID_LOG_TAG, __VA_ARGS__); \
printf(__VA_ARGS__)
#define ANDROIDLOGD(...) \
__android_log_print(ANDROID_LOG_DEBUG, ANDROID_LOG_TAG, __VA_ARGS__); \
printf(__VA_ARGS__)
#define ANDROIDLOGE(...) \
__android_log_print(ANDROID_LOG_ERROR, ANDROID_LOG_TAG, __VA_ARGS__); \
printf(__VA_ARGS__)
#else
#define ANDROIDLOGI(...)
#define ANDROIDLOGW(...)
#define ANDROIDLOGD(...)
#define ANDROIDLOGE(...)
#endif
enum LogLevel { enum LogLevel {
kNO_LOG, kNO_LOG,
kLOG_ERROR, kLOG_ERROR,
...@@ -88,26 +116,29 @@ struct ToLog { ...@@ -88,26 +116,29 @@ struct ToLog {
Print printer_; Print printer_;
}; };
#define LOG(level) \ #define LOG(level) \
if (level > paddle_mobile::log_level) { \ if (level > paddle_mobile::log_level) { \
} else \ } else \
paddle_mobile::ToLog( \ paddle_mobile::ToLog( \
level, \ level, static_cast<std::stringstream &>( \
(std::stringstream() \ std::stringstream() \
<< "[file: " \ << "[file: " \
<< (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \ << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \
<< "] [line: " << __LINE__ << "] ") \ : __FILE__) \
.str()) << "] [line: " << __LINE__ << "] ") \
.str())
#define DLOG \
if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) { \ #define DLOG \
} else \ if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) { \
paddle_mobile::ToLog( \ } else \
paddle_mobile::kLOG_DEBUG, \ paddle_mobile::ToLog( \
(std::stringstream() \ paddle_mobile::kLOG_DEBUG, \
<< "[file: " \ static_cast<std::stringstream &>( \
<< (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \ std::stringstream() \
<< "] [line: " << __LINE__ << "] ") \ << "[file: " \
<< (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \
: __FILE__) \
<< "] [line: " << __LINE__ << "] ") \
.str()) .str())
#define LOGF(level, format, ...) \ #define LOGF(level, format, ...) \
...@@ -122,6 +153,11 @@ struct ToLog { ...@@ -122,6 +153,11 @@ struct ToLog {
#else #else
#define ANDROIDLOGI(...)
#define ANDROIDLOGW(...)
#define ANDROIDLOGD(...)
#define ANDROIDLOGE(...)
enum LogLevel { enum LogLevel {
kNO_LOG, kNO_LOG,
kLOG_ERROR, kLOG_ERROR,
......
...@@ -12,6 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "log.h" #pragma once
namespace paddle_mobile {} #define EXPORT __attribute__((visibility("default")))
...@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #ifdef PADDLE_MOBILE_USE_OPENMP
/**
// Disable the copy and assignment operator for a class. * android-ndk-r17 has a problem when linking with openmp.
#ifndef DISABLE_COPY_AND_ASSIGN * if paddle-mobile enables -fopenmp, but didn't use those omp_* functions,
#define DISABLE_COPY_AND_ASSIGN(classname) \ * after linking another binary with libpaddle-mobile.so, the omp_get_thread_num
private: \ * will not work. see test/common/test_openmp.cc the detailed reason is still
classname(const classname &) = delete; \ * unclear, but this trick will work. a better solution is hacking the linker,
classname(classname &&) = delete; \ * try some flags to make it link omp_* functions, but I didn't find out how to
classname &operator=(const classname &) = delete; \ * make it work.
classname &operator=(classname &&) = delete */
#include <omp.h>
static int _ = omp_get_num_procs();
#endif #endif
此差异已折叠。
...@@ -798,76 +798,6 @@ uint32_t protobuf_c_version_number(void); ...@@ -798,76 +798,6 @@ uint32_t protobuf_c_version_number(void);
*/ */
#define PROTOBUF_C_MIN_COMPILER_VERSION 1000000 #define PROTOBUF_C_MIN_COMPILER_VERSION 1000000
/**
* Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by name.
*
* \param desc
* The `ProtobufCEnumDescriptor` object.
* \param name
* The `name` field from the corresponding `ProtobufCEnumValue` object to
* match.
* \return
* A `ProtobufCEnumValue` object.
* \retval NULL
* If not found or if the optimize_for = CODE_SIZE option was set.
*/
PROTOBUF_C__API
const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value_by_name(
const ProtobufCEnumDescriptor *desc, const char *name);
/**
* Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by numeric
* value.
*
* \param desc
* The `ProtobufCEnumDescriptor` object.
* \param value
* The `value` field from the corresponding `ProtobufCEnumValue` object to
* match.
*
* \return
* A `ProtobufCEnumValue` object.
* \retval NULL
* If not found.
*/
PROTOBUF_C__API
const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value(
const ProtobufCEnumDescriptor *desc, int value);
/**
* Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
* the name of the field.
*
* \param desc
* The `ProtobufCMessageDescriptor` object.
* \param name
* The name of the field.
* \return
* A `ProtobufCFieldDescriptor` object.
* \retval NULL
* If not found or if the optimize_for = CODE_SIZE option was set.
*/
PROTOBUF_C__API
const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field_by_name(
const ProtobufCMessageDescriptor *desc, const char *name);
/**
* Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
* the tag value of the field.
*
* \param desc
* The `ProtobufCMessageDescriptor` object.
* \param value
* The tag value of the field.
* \return
* A `ProtobufCFieldDescriptor` object.
* \retval NULL
* If not found.
*/
PROTOBUF_C__API
const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field(
const ProtobufCMessageDescriptor *desc, unsigned value);
/** /**
* Determine the number of bytes required to store the serialised message. * Determine the number of bytes required to store the serialised message.
* *
...@@ -947,33 +877,6 @@ PROTOBUF_C__API ...@@ -947,33 +877,6 @@ PROTOBUF_C__API
void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor, void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
void *message); void *message);
/**
* Free a service.
*
* \param service
* The service object to free.
*/
PROTOBUF_C__API
void protobuf_c_service_destroy(ProtobufCService *service);
/**
* Look up a `ProtobufCMethodDescriptor` by name.
*
* \param desc
* Service descriptor.
* \param name
* Name of the method.
*
* \return
* A `ProtobufCMethodDescriptor` object.
* \retval NULL
* If not found or if the optimize_for = CODE_SIZE option was set.
*/
PROTOBUF_C__API
const ProtobufCMethodDescriptor *
protobuf_c_service_descriptor_get_method_by_name(
const ProtobufCServiceDescriptor *desc, const char *name);
/** /**
* Initialise a `ProtobufCBufferSimple` object. * Initialise a `ProtobufCBufferSimple` object.
*/ */
...@@ -1011,18 +914,6 @@ PROTOBUF_C__API ...@@ -1011,18 +914,6 @@ PROTOBUF_C__API
void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len, void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len,
const unsigned char *data); const unsigned char *data);
PROTOBUF_C__API
void protobuf_c_service_generated_init(
ProtobufCService *service, const ProtobufCServiceDescriptor *descriptor,
ProtobufCServiceDestroy destroy);
PROTOBUF_C__API
void protobuf_c_service_invoke_internal(ProtobufCService *service,
unsigned method_index,
const ProtobufCMessage *input,
ProtobufCClosure closure,
void *closure_data);
/**@}*/ /**@}*/
PROTOBUF_C__END_DECLS PROTOBUF_C__END_DECLS
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <thread>
#include <vector>
namespace paddle_mobile {
class ThreadPool {
public:
static ThreadPool& getThreadPool();
static int getThreadPoolThreadId();
explicit ThreadPool(size_t);
template <class F, class... Args>
auto enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>;
~ThreadPool();
int getTid(const std::thread::id& id) {
for (int i = 0; i < workers.size(); i++) {
if (workers[i].get_id() == id) {
return i;
}
}
return -1;
}
private:
// need to keep track of threads so we can join them
std::vector<std::thread> workers;
// the task queue
std::queue<std::function<void()>> tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
// the constructor just launches some amount of workers
inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
for (size_t i = 0; i < threads; ++i)
workers.emplace_back([this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(
lock, [this] { return this->stop || !this->tasks.empty(); });
// for (;;) {
// if (this->stop || !this->tasks.empty()) {
// break;
// }
// lock.unlock();
// lock.lock();
// }
if (this->stop && this->tasks.empty()) return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
});
}
// add new work item to the pool
template <class F, class... Args>
auto ThreadPool::enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type> {
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared<std::packaged_task<return_type()>>(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
// if(stop)
// throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
// the destructor joins all threads
inline ThreadPool::~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread& worker : workers) worker.join();
}
ThreadPool& ThreadPool::getThreadPool() {
static ThreadPool threadPool(3);
return threadPool;
}
int ThreadPool::getThreadPoolThreadId() {
return getThreadPool().getTid(std::this_thread::get_id());
}
} // namespace paddle_mobile
...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once; #pragma once
#include <functional>
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_set>
#include <vector> #include <vector>
#include "framework/attribute.h" #include "framework/attribute.h"
#include "framework/scope.h" #include "framework/scope.h"
...@@ -40,13 +40,6 @@ using OpCreator = std::function<framework::OperatorBase<Dtype> *( ...@@ -40,13 +40,6 @@ using OpCreator = std::function<framework::OperatorBase<Dtype> *(
const framework::AttributeMap & /*attrs*/, const framework::AttributeMap & /*attrs*/,
std::shared_ptr<framework::Scope> /*scope*/)>; std::shared_ptr<framework::Scope> /*scope*/)>;
using GradOpMakerFN =
std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
const framework::OpDesc &,
const std::unordered_set<std::string> & /*no_grad_set*/,
std::unordered_map<std::string, std::string> * /*grad_to_var*/,
const std::vector<framework::BlockDesc *> &grad_block)>;
using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/, using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
framework::BlockDesc * /*block*/)>; framework::BlockDesc * /*block*/)>;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/types.h"
#include <vector>
namespace paddle_mobile {
const std::string G_OP_TYPE_CONV = "conv2d";
const std::string G_OP_TYPE_BATCHNORM = "batch_norm";
const std::string G_OP_TYPE_BOX_CODER = "box_coder";
const std::string G_OP_TYPE_CONCAT = "concat";
const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
const std::string G_OP_TYPE_FC = "fc";
const std::string G_OP_TYPE_CONV_ADD = "conv_add";
const std::string G_OP_TYPE_LRN = "lrn";
const std::string G_OP_TYPE_MUL = "mul";
const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
const std::string G_OP_TYPE_POOL2D = "pool2d";
const std::string G_OP_TYPE_PRIOR_BOX = "prior_box";
const std::string G_OP_TYPE_RELU = "relu";
const std::string G_OP_TYPE_RESHAPE = "reshape";
const std::string G_OP_TYPE_SIGMOID = "sigmoid";
const std::string G_OP_TYPE_SOFTMAX = "softmax";
const std::string G_OP_TYPE_TRANSPOSE = "transpose";
const std::string G_OP_TYPE_SPLIT = "split";
const std::string G_OP_TYPE_FEED = "feed";
const std::string G_OP_TYPE_FETCH = "fetch";
const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d";
const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_CONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE, {{"X"}, {"Out"}}},
{G_OP_TYPE_BOX_CODER,
{{"PriorBox", "PriorBoxVar", "TargetBox"}, {"OutputBox"}}},
{G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}},
{G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
{G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
{G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile
...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once; #pragma once
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <vector>
namespace paddle_mobile { namespace paddle_mobile {
enum class Precision : int { FP32 = 0 }; enum class Precision : int { FP32 = 0 };
...@@ -72,50 +72,32 @@ enum PMStatus { ...@@ -72,50 +72,32 @@ enum PMStatus {
PMWrongDevice = 0x08 /*!< un-correct device. */ PMWrongDevice = 0x08 /*!< un-correct device. */
}; };
static const std::string G_OP_TYPE_CONV = "conv2d"; extern const std::string G_OP_TYPE_CONV;
static const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; extern const std::string G_OP_TYPE_BATCHNORM;
static const std::string G_OP_TYPE_BOX_CODER = "box_coder"; extern const std::string G_OP_TYPE_BOX_CODER;
static const std::string G_OP_TYPE_CONCAT = "concat"; extern const std::string G_OP_TYPE_CONCAT;
static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; extern const std::string G_OP_TYPE_ELEMENTWISE_ADD;
static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU;
"fusion_conv_add_relu"; extern const std::string G_OP_TYPE_FC;
static const std::string G_OP_TYPE_FC = "fc"; extern const std::string G_OP_TYPE_CONV_ADD;
static const std::string G_OP_TYPE_LRN = "lrn"; extern const std::string G_OP_TYPE_LRN;
static const std::string G_OP_TYPE_MUL = "mul"; extern const std::string G_OP_TYPE_MUL;
static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; extern const std::string G_OP_TYPE_MULTICLASS_NMS;
static const std::string G_OP_TYPE_POOL2D = "pool2d"; extern const std::string G_OP_TYPE_POOL2D;
static const std::string G_OP_TYPE_PRIOR_BOX = "prior_box"; extern const std::string G_OP_TYPE_PRIOR_BOX;
static const std::string G_OP_TYPE_RELU = "relu"; extern const std::string G_OP_TYPE_RELU;
static const std::string G_OP_TYPE_RESHAPE = "reshape"; extern const std::string G_OP_TYPE_RESHAPE;
static const std::string G_OP_TYPE_SIGMOID = "sigmoid"; extern const std::string G_OP_TYPE_SIGMOID;
static const std::string G_OP_TYPE_SOFTMAX = "softmax"; extern const std::string G_OP_TYPE_SOFTMAX;
static const std::string G_OP_TYPE_TRANSPOSE = "transpose"; extern const std::string G_OP_TYPE_TRANSPOSE;
static const std::string G_OP_TYPE_SPLIT = "split"; extern const std::string G_OP_TYPE_SPLIT;
static const std::string G_OP_TYPE_FEED = "feed"; extern const std::string G_OP_TYPE_FEED;
static const std::string G_OP_TYPE_FETCH = "fetch"; extern const std::string G_OP_TYPE_FETCH;
static const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; extern const std::string G_OP_TYPE_DEPTHWISE_CONV;
static const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence"; extern const std::string G_OP_TYPE_IM2SEQUENCE;
static std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = { op_input_output_key;
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE, {{"X"}, {"Out"}}},
{G_OP_TYPE_BOX_CODER,
{{"PriorBox", "PriorBoxVar", "TargetBox"}, {"OutputBox"}}},
{G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}},
{G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
{G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <iostream> #include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
#pragma once #pragma once
...@@ -57,15 +56,11 @@ class RawData { ...@@ -57,15 +56,11 @@ class RawData {
char data[size]; char data[size];
RawData() {} RawData() {}
RawData(const RawData &raw_data) { strcpy(data, raw_data.data); } RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
// void operator=(const RawData &raw_data){
// strcpy(data, raw_data.data);
// }
}; };
template <typename... Ts> template <typename... Ts>
struct Variant { struct Variant {
Variant(const Variant &variant) { Variant(const Variant &variant) {
// std::cout << " 赋值构造函数 " << std::endl;
type_id = variant.type_id; type_id = variant.type_id;
data = variant.data; data = variant.data;
} }
...@@ -87,8 +82,7 @@ struct Variant { ...@@ -87,8 +82,7 @@ struct Variant {
if (type_id == typeid(T).hash_code()) { if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data)); return *const_cast<T *>(reinterpret_cast<const T *>(&data));
} else { } else {
// std::cout << " bad cast in variant " << std::endl; PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant ");
throw std::bad_cast();
} }
} }
......
...@@ -17,14 +17,8 @@ limitations under the License. */ ...@@ -17,14 +17,8 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
/*
* Variant<int, float, std::string, std::vector<int>, std::vector<float>,
std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
int64_t>
* */
struct PrintVistor : Vistor<Print &> { struct PrintVistor : Vistor<Print &> {
PrintVistor(Print &printer) : printer_(printer) {} explicit PrintVistor(Print &printer) : printer_(printer) {}
template <typename T> template <typename T>
Print &operator()(const T &value) { Print &operator()(const T &value) {
printer_ << value; printer_ << value;
......
...@@ -14,7 +14,11 @@ limitations under the License. */ ...@@ -14,7 +14,11 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <typeinfo>
#include <unordered_map> #include <unordered_map>
#include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
#include "common/variant.h" #include "common/variant.h"
...@@ -22,28 +26,15 @@ limitations under the License. */ ...@@ -22,28 +26,15 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
using std::string;
using std::vector;
class BlockDesc; class BlockDesc;
class Attribute { class Attribute {
public: public:
/*
* PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT = 1,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING = 2,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS = 3,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS = 4,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS = 5,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9
PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
*
* */
static Attribute GetAttrValue( static Attribute GetAttrValue(
PaddleMobile__Framework__Proto__OpDesc__Attr *attr_desc) { PaddleMobile__Framework__Proto__OpDesc__Attr *attr_desc) {
// std::cout << "begin get attr value" << std::endl;
Attribute attr; Attribute attr;
switch (attr_desc->type) { switch (attr_desc->type) {
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN: {
...@@ -63,35 +54,35 @@ class Attribute { ...@@ -63,35 +54,35 @@ class Attribute {
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS: {
std::vector<bool> val(attr_desc->n_bools); vector<bool> val(attr_desc->n_bools);
for (int i = 0; i < attr_desc->n_bools; ++i) { for (int i = 0; i < attr_desc->n_bools; ++i) {
val[i] = attr_desc->bools[i]; val[i] = attr_desc->bools[i];
} }
attr.Set<std::vector<bool>>(val); attr.Set<vector<bool>>(val);
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS: {
std::vector<int> val(attr_desc->n_ints); vector<int> val(attr_desc->n_ints);
for (int i = 0; i < attr_desc->n_ints; ++i) { for (int i = 0; i < attr_desc->n_ints; ++i) {
val[i] = attr_desc->ints[i]; val[i] = attr_desc->ints[i];
} }
attr.Set<std::vector<int>>(val); attr.Set<vector<int>>(val);
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS: {
std::vector<float> val(attr_desc->n_floats); vector<float> val(attr_desc->n_floats);
for (int i = 0; i < attr_desc->n_floats; ++i) { for (int i = 0; i < attr_desc->n_floats; ++i) {
val[i] = attr_desc->floats[i]; val[i] = attr_desc->floats[i];
} }
attr.Set<std::vector<float>>(val); attr.Set<vector<float>>(val);
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS: {
std::vector<std::string> val(attr_desc->n_strings); vector<string> val(attr_desc->n_strings);
for (int i = 0; i < attr_desc->n_strings; ++i) { for (int i = 0; i < attr_desc->n_strings; ++i) {
val[i] = attr_desc->strings[i]; val[i] = attr_desc->strings[i];
} }
attr.Set<std::vector<std::string>>(val); attr.Set<vector<string>>(val);
break; break;
} }
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG: { case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG: {
...@@ -122,47 +113,41 @@ class Attribute { ...@@ -122,47 +113,41 @@ class Attribute {
return vistor(attr.variant_.Get<int>()); return vistor(attr.variant_.Get<int>());
} else if (attr.variant_.TypeId() == typeid(float).hash_code()) { } else if (attr.variant_.TypeId() == typeid(float).hash_code()) {
return vistor(attr.variant_.Get<float>()); return vistor(attr.variant_.Get<float>());
} else if (attr.variant_.TypeId() == typeid(std::string).hash_code()) { } else if (attr.variant_.TypeId() == typeid(string).hash_code()) {
return vistor(attr.variant_.Get<std::string>()); return vistor(attr.variant_.Get<string>());
} else if (attr.variant_.TypeId() == typeid(std::vector<int>).hash_code()) { } else if (attr.variant_.TypeId() == typeid(vector<int>).hash_code()) {
return vistor(attr.variant_.Get<std::vector<int>>()); return vistor(attr.variant_.Get<vector<int>>());
} else if (attr.variant_.TypeId() == } else if (attr.variant_.TypeId() == typeid(vector<float>).hash_code()) {
typeid(std::vector<float>).hash_code()) { return vistor(attr.variant_.Get<vector<float>>());
return vistor(attr.variant_.Get<std::vector<float>>()); } else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) {
} else if (attr.variant_.TypeId() == return vistor(attr.variant_.Get<vector<string>>());
typeid(std::vector<std::string>).hash_code()) {
return vistor(attr.variant_.Get<std::vector<std::string>>());
} else if (attr.variant_.TypeId() == typeid(bool).hash_code()) { } else if (attr.variant_.TypeId() == typeid(bool).hash_code()) {
return vistor(attr.variant_.Get<bool>()); return vistor(attr.variant_.Get<bool>());
} else if (attr.variant_.TypeId() == } else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) {
typeid(std::vector<bool>).hash_code()) { return vistor(attr.variant_.Get<vector<bool>>());
return vistor(attr.variant_.Get<std::vector<bool>>());
} else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) { } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
return vistor(attr.variant_.Get<int64_t>()); return vistor(attr.variant_.Get<int64_t>());
} else { } else {
throw std::bad_exception(); PADDLE_MOBILE_THROW_EXCEPTION("type not support");
} }
} }
private: private:
Variant<int, float, std::string, std::vector<int>, std::vector<float>, Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
std::vector<std::string>, bool, std::vector<bool>, BlockDesc *, vector<bool>, BlockDesc *, int64_t>
int64_t>
variant_; variant_;
}; };
using AttributeMap = std::unordered_map<std::string, Attribute>; using AttributeMap = std::unordered_map<string, Attribute>;
class AttrReader { class AttrReader {
public: public:
explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {} explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
template <typename T> template <typename T>
inline T Get(const std::string &name) const { inline T Get(const string &name) const {
// PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should PADDLE_MOBILE_ENFORCE(attrs_.count(name) != 0,
// be in "%s should be in AttributeMap", name);
// AttributeMap",
// name);
return ((Attribute)attrs_.at(name)).Get<T>(); return ((Attribute)attrs_.at(name)).Get<T>();
} }
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include <cctype> #include <cctype>
#include <iostream>
#include <string> #include <string>
namespace paddle_mobile { namespace paddle_mobile {
...@@ -40,7 +39,7 @@ inline DataLayout StringToDataLayout(const std::string &str) { ...@@ -40,7 +39,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
} else if (s == "ANYLAYOUT") { } else if (s == "ANYLAYOUT") {
return DataLayout::kAnyLayout; return DataLayout::kAnyLayout;
} else { } else {
// std::cout << "Unknown storage order string: %s", s; PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
} }
} }
...@@ -54,14 +53,8 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) { ...@@ -54,14 +53,8 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) {
return "ANY_LAYOUT"; return "ANY_LAYOUT";
default: default:
break; break;
// std::cout << "unknown DataLayou %d", data_layout;
} }
} }
inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
out << DataLayoutToString(l);
return out;
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/data_transform.h"
namespace paddle_mobile {
namespace framework {
static void PassTensorData(Tensor *from, Tensor *to) {
to->ShareDataWith(*from);
*from = Tensor();
}
void DataTransform(const OpKernelType &expected_kernel_type,
const OpKernelType &kernel_type_for_var,
const Tensor &input_tensor, Tensor *output_tensor) {
bool transformed = false;
Tensor in;
in.ShareDataWith(input_tensor);
Tensor out;
// // do layout transform
// if (NeedTransformLayout(expected_kernel_type.data_layout_,
// kernel_type_for_var.data_layout_)) {
// TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
// &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do data type transform
// if (expected_kernel_type.data_type_ !=
// kernel_type_for_var.data_type_) {
// TransDataType(kernel_type_for_var, expected_kernel_type, in,
// &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do device transform
// if (!platform::is_same_place(kernel_type_for_var.place_,
// expected_kernel_type.place_)) {
// TransDataDevice(in, expected_kernel_type.place_, &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// PADDLE_ENFORCE(transformed, "No transform is applied, please
// check!");
// get output data
output_tensor->ShareDataWith(in);
}
void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
Variable *out_var) {
// if (in_var.IsType<LoDTensor>()) {
// auto& in_lod_tensor = in_var.Get<LoDTensor>();
// auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
// tran_lod_tensor->set_lod(in_lod_tensor.lod());
// tran_lod_tensor->set_layout(in_lod_tensor.layout());
// tran_lod_tensor->ShareDataWith(tensor);
// } else if (in_var.IsType<SelectedRows>()) {
// auto& in_selected_rows = in_var.Get<SelectedRows>();
// auto* trans_selected_rows =
// out_var.GetMutable<SelectedRows>();
// trans_selected_rows->set_height(in_selected_rows.height());
// trans_selected_rows->set_rows(in_selected_rows.rows());
// trans_selected_rows->mutable_value()->ShareDataWith(tensor);
// } else {
// PADDLE_THROW("unknown var type");
// }
}
} // namespace framework
} // namespace paddle_mobile
...@@ -63,9 +63,6 @@ void make_ddim(DDim &ddim, const int64_t *dims, int n) { ...@@ -63,9 +63,6 @@ void make_ddim(DDim &ddim, const int64_t *dims, int n) {
ddim = make_dim<9>(dims); ddim = make_dim<9>(dims);
break; break;
default: default:
// std::cout << "Dynamic dimensions must have between [1,
// 9]
// dimensions.";
break; break;
} }
} }
...@@ -133,9 +130,6 @@ int64_t DDim::operator[](int idx) const { ...@@ -133,9 +130,6 @@ int64_t DDim::operator[](int idx) const {
int DDim::size() const { return arity(*this); } int DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const { bool DDim::operator==(DDim d) const {
// if (var.which() != d.getVar().which()) {
// return false;
// } else {
std::vector<int64_t> v1 = vectorize(*this); std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d); std::vector<int64_t> v2 = vectorize(d);
...@@ -157,7 +151,7 @@ DDim DDim::operator+(DDim d) const { ...@@ -157,7 +151,7 @@ DDim DDim::operator+(DDim d) const {
std::vector<int64_t> v3; std::vector<int64_t> v3;
assert(v1.size() == v2.size()); PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() != v2.size()");
for (unsigned int i = 0; i < v1.size(); i++) { for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] + v2[i]); v3.push_back(v1[i] + v2[i]);
...@@ -172,7 +166,7 @@ DDim DDim::operator*(DDim d) const { ...@@ -172,7 +166,7 @@ DDim DDim::operator*(DDim d) const {
std::vector<int64_t> v3; std::vector<int64_t> v3;
assert(v1.size() == v2.size()); PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() == v2.size()");
for (unsigned int i = 0; i < v1.size(); i++) { for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] * v2[i]); v3.push_back(v1[i] * v2[i]);
...@@ -183,7 +177,7 @@ DDim DDim::operator*(DDim d) const { ...@@ -183,7 +177,7 @@ DDim DDim::operator*(DDim d) const {
int64_t get(const DDim &ddim, int idx) { return ddim[idx]; } int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
void set(DDim &ddim, int idx, int value) { ddim[idx] = value; } void set(DDim *ddim, int idx, int value) { (*ddim)[idx] = value; }
/// @cond HIDDEN /// @cond HIDDEN
struct VectorizeVisitor : Vistor<void> { struct VectorizeVisitor : Vistor<void> {
...@@ -235,13 +229,10 @@ struct SliceVectorizeVisitor : Vistor<void> { ...@@ -235,13 +229,10 @@ struct SliceVectorizeVisitor : Vistor<void> {
SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e) SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
: vector(v), begin(b), end(e) { : vector(v), begin(b), end(e) {
// PADDLE_ENFORCE(begin < end, PADDLE_MOBILE_ENFORCE(
// "Begin index must be less than end index in begin < end, "Begin index must be less than end index in ddim slice.");
// ddim PADDLE_MOBILE_ENFORCE(begin >= 0,
// slice."); "Begin index can't be less than zero in ddim slice.");
// PADDLE_ENFORCE(begin >= 0,
// "Begin index can't be less than zero in
// ddim slice.");
} }
template <int S> template <int S>
...@@ -267,9 +258,7 @@ DDim slice_ddim(const DDim &ddim, int begin, int end) { ...@@ -267,9 +258,7 @@ DDim slice_ddim(const DDim &ddim, int begin, int end) {
std::vector<int64_t> vec; std::vector<int64_t> vec;
vec.reserve(end - begin); vec.reserve(end - begin);
SliceVectorizeVisitor visitor(vec, begin, end); SliceVectorizeVisitor visitor(vec, begin, end);
// boost::apply_visitor(visitor, dim);
DDim::ApplyVistor(visitor, ddim); DDim::ApplyVistor(visitor, ddim);
// visitor(ddim.var.Get<Dim<4>>());
return make_ddim(vec); return make_ddim(vec);
} }
...@@ -287,31 +276,19 @@ struct ArityVisitor : Vistor<int> { ...@@ -287,31 +276,19 @@ struct ArityVisitor : Vistor<int> {
int arity(const DDim &d) { int arity(const DDim &d) {
ArityVisitor arityVisitor = ArityVisitor(); ArityVisitor arityVisitor = ArityVisitor();
return DDim::ApplyVistor(arityVisitor, d); return DDim::ApplyVistor(arityVisitor, d);
// return arityVisitor(d.var.Get<Dim<4>>());
// return boost::apply_visitor(ArityVisitor(), d); }
} }
/// \cond HIDDEN
/// \endcond
struct OSVistor : Vistor<std::ostream &> { #ifdef PADDLE_MOBILE_DEBUG
OSVistor(std::ostream &os) : os_(os) {} Print &operator<<(Print &printer, const DDim &ddim) {
for (int j = 0; j < ddim.size(); ++j) {
template <int D> printer << ddim[j] << " ";
std::ostream &operator()(Dim<D> dim) const {
return os_ << dim;
} }
private: return printer;
std::ostream &os_;
};
std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
auto vistor = OSVistor(os);
DDim::ApplyVistor(vistor, ddim);
return os;
} }
#endif
DDim::DDim(std::initializer_list<int64_t> init_list) { DDim::DDim(std::initializer_list<int64_t> init_list) {
*this = make_ddim(init_list); *this = make_ddim(init_list);
} }
......
...@@ -14,10 +14,10 @@ limitations under the License. */ ...@@ -14,10 +14,10 @@ limitations under the License. */
#pragma once #pragma once
#include <assert.h>
#include <initializer_list> #include <initializer_list>
#include <stdexcept> #include <typeinfo>
#include <vector> #include <vector>
#include "common/enforce.h"
#include "common/variant.h" #include "common/variant.h"
#include "dim.h" #include "dim.h"
...@@ -58,9 +58,7 @@ struct DDim { ...@@ -58,9 +58,7 @@ struct DDim {
} else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) { } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
return vistor(d.var.Get<Dim<9>>()); return vistor(d.var.Get<Dim<9>>());
} else { } else {
printf(" dim not support \n"); DLOG << " dim not support";
throw std::bad_exception();
// return typename Vistor::type_t();
} }
} }
...@@ -83,17 +81,6 @@ struct DDim { ...@@ -83,17 +81,6 @@ struct DDim {
int64_t operator[](int idx) const; int64_t operator[](int idx) const;
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor) {
// return var.apply_visitor(visitor);
// }
//
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor)
// const {
// return var.apply_visitor(visitor);
// }
DDimVar getVar() { return var; } DDimVar getVar() { return var; }
bool operator==(DDim d) const; bool operator==(DDim d) const;
...@@ -126,7 +113,7 @@ DDim make_ddim(std::initializer_list<int64_t> dims); ...@@ -126,7 +113,7 @@ DDim make_ddim(std::initializer_list<int64_t> dims);
int64_t get(const DDim &dim, int idx); int64_t get(const DDim &dim, int idx);
void set(DDim &dim, int idx, int val); void set(DDim *dim, int idx, int val);
std::vector<int64_t> vectorize(const DDim &ddim); std::vector<int64_t> vectorize(const DDim &ddim);
...@@ -151,8 +138,6 @@ DDim slice_ddim(const DDim &dim, int begin, int end); ...@@ -151,8 +138,6 @@ DDim slice_ddim(const DDim &dim, int begin, int end);
int arity(const DDim &ddim); int arity(const DDim &ddim);
std::ostream &operator<<(std::ostream &, const DDim &);
// Reshape a tensor to a matrix. The matrix's first dimension(column // Reshape a tensor to a matrix. The matrix's first dimension(column
// length) // length)
// will be the product of tensor's first `num_col_dims` dimensions. // will be the product of tensor's first `num_col_dims` dimensions.
...@@ -163,5 +148,9 @@ DDim flatten_to_1d(const DDim &src); ...@@ -163,5 +148,9 @@ DDim flatten_to_1d(const DDim &src);
DDim stride(const DDim &ddim); DDim stride(const DDim &ddim);
DDim stride_numel(const DDim &ddim); DDim stride_numel(const DDim &ddim);
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const DDim &ddim);
#endif
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,13 +14,7 @@ limitations under the License. */ ...@@ -14,13 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <iostream> #include "common/enforce.h"
#include <sstream>
#include <stdexcept>
#include <type_traits>
#include "platform/hostdevice.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -30,42 +24,35 @@ struct Dim { ...@@ -30,42 +24,35 @@ struct Dim {
static constexpr int dimensions = i; static constexpr int dimensions = i;
template <typename... Args> template <typename... Args>
HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) { Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
static_assert(sizeof...(_tail) == i - 1, static_assert(sizeof...(_tail) == i - 1,
"Dim initialized with the wrong number of parameters"); "Dim initialized with the wrong number of parameters");
} }
HOSTDEVICE
Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {} Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
HOSTDEVICE
Dim() : head(0), tail() {} Dim() : head(0), tail() {}
/** Construct a Dim from a linear index and size. Uses Fortran /** Construct a Dim from a linear index and size. Uses Fortran
* order * order
* indexing. */ * indexing. */
HOSTDEVICE
Dim(int64_t idx, const Dim<i> &size) Dim(int64_t idx, const Dim<i> &size)
: head(idx % size.head), tail(idx / size.head, size.tail) {} : head(idx % size.head), tail(idx / size.head, size.tail) {}
/** Construct a Dim with each dimension set to the given index */ /** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim(int64_t idx) : head(idx), tail(idx) {} Dim(int64_t idx) : head(idx), tail(idx) {}
HOSTDEVICE
bool operator==(const Dim<i> &o) const { bool operator==(const Dim<i> &o) const {
return (head == o.head) && (tail == o.tail); return (head == o.head) && (tail == o.tail);
} }
HOSTDEVICE
bool operator!=(const Dim<i> &o) const { return !(*this == o); } bool operator!=(const Dim<i> &o) const { return !(*this == o); }
HOSTDEVICE
int64_t &operator[](int idx); int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const; int64_t operator[](int idx) const;
HOST std::string to_string() const; std::string to_string() const;
int64_t head; int64_t head;
Dim<i - 1> tail; Dim<i - 1> tail;
...@@ -76,32 +63,22 @@ template <> ...@@ -76,32 +63,22 @@ template <>
struct Dim<0> { struct Dim<0> {
static constexpr int dimensions = 0; static constexpr int dimensions = 0;
HOSTDEVICE
Dim(int64_t _head) {} Dim(int64_t _head) {}
HOSTDEVICE
Dim() {} Dim() {}
HOSTDEVICE
Dim(int idx, const Dim<0> &size) { Dim(int idx, const Dim<0> &size) {
#ifndef __CUDA_ARCH__
if (idx > 0) { if (idx > 0) {
throw std::invalid_argument("Index out of range."); PADDLE_MOBILE_THROW_EXCEPTION("Index out of range.")
} }
#else
PADDLE_ASSERT(idx == 0);
#endif
} }
HOSTDEVICE
bool operator==(const Dim<0> &o) const { return true; } bool operator==(const Dim<0> &o) const { return true; }
HOSTDEVICE
bool operator!=(const Dim<0> &o) const { return false; } bool operator!=(const Dim<0> &o) const { return false; }
HOSTDEVICE
int64_t &operator[](int idx); int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const; int64_t operator[](int idx) const;
}; };
...@@ -112,12 +89,12 @@ template <int i> ...@@ -112,12 +89,12 @@ template <int i>
struct DimGetter { struct DimGetter {
// Return a copy if Dim is const // Return a copy if Dim is const
template <typename D> template <typename D>
HOSTDEVICE static int64_t impl(const D &d) { static int64_t impl(const D &d) {
return DimGetter<i - 1>::impl(d.tail); return DimGetter<i - 1>::impl(d.tail);
} }
// Return a reference if Dim is mutable // Return a reference if Dim is mutable
template <typename D> template <typename D>
HOSTDEVICE static int64_t &impl(D &d) { static int64_t &impl(D &d) {
return DimGetter<i - 1>::impl(d.tail); return DimGetter<i - 1>::impl(d.tail);
} }
}; };
...@@ -127,25 +104,22 @@ template <> ...@@ -127,25 +104,22 @@ template <>
struct DimGetter<0> { struct DimGetter<0> {
// Return a copy if Dim is const // Return a copy if Dim is const
template <typename D> template <typename D>
HOSTDEVICE static int64_t impl(const D &d) { static int64_t impl(const D &d) {
return d.head; return d.head;
} }
// Return a reference if Dim is mutable // Return a reference if Dim is mutable
template <typename D> template <typename D>
HOSTDEVICE static int64_t &impl(D &d) { static int64_t &impl(D &d) {
return d.head; return d.head;
} }
}; };
template <int D> template <int D>
HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) { int64_t &indexer(Dim<D> &dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) { if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension"); PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
} }
#else
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) { if (idx == 0) {
return dim.head; return dim.head;
} }
...@@ -153,31 +127,15 @@ HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) { ...@@ -153,31 +127,15 @@ HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
} }
template <> template <>
HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) { int64_t &indexer<0>(Dim<0> &dim, int idx) {
#ifndef __CUDA_ARCH__ PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
throw std::invalid_argument("Invalid index");
#else
PADDLE_ASSERT(false);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
#else
static int64_t head = 0;
#endif
return head;
#endif
} }
template <int D> template <int D>
HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) { int64_t indexer(const Dim<D> &dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) { if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension"); PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
} }
#else
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) { if (idx == 0) {
return dim.head; return dim.head;
} }
...@@ -185,102 +143,84 @@ HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) { ...@@ -185,102 +143,84 @@ HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
} }
template <> template <>
HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) { int64_t indexer<0>(const Dim<0> &dim, int idx) {
#ifndef __CUDA_ARCH__ PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
throw std::invalid_argument("Invalid index");
#else
PADDLE_ASSERT(false);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
#else
static int64_t head = 0;
#endif
return head;
#endif
} }
} // namespace } // namespace
// Static access to constant Dim // Static access to constant Dim
template <int i, int l> template <int i, int l>
HOSTDEVICE int64_t get(const Dim<l> &d) { int64_t get(const Dim<l> &d) {
return DimGetter<i>::impl(d); return DimGetter<i>::impl(d);
} }
// Static access to mutable Dim // Static access to mutable Dim
template <int i, int l> template <int i, int l>
HOSTDEVICE int64_t &get(Dim<l> &d) { int64_t &get(Dim<l> &d) {
return DimGetter<i>::impl(d); return DimGetter<i>::impl(d);
} }
// Dynamic access to constant Dim // Dynamic access to constant Dim
template <int l> template <int l>
HOSTDEVICE int64_t Dim<l>::operator[](int i) const { int64_t Dim<l>::operator[](int i) const {
// std::cout << "l: " << l << std::endl; // std::cout << "l: " << l << std::endl;
return indexer(*this, i); return indexer(*this, i);
} }
// Dynamic access to mutable Dim // Dynamic access to mutable Dim
template <int l> template <int l>
HOSTDEVICE int64_t &Dim<l>::operator[](int i) { int64_t &Dim<l>::operator[](int i) {
return indexer(*this, i); return indexer(*this, i);
} }
// Dynamic access to constant Dim // Dynamic access to constant Dim
inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const { inline int64_t Dim<0>::operator[](int i) const { return indexer(*this, i); }
return indexer(*this, i);
}
// Dynamic access to mutable Dim // Dynamic access to mutable Dim
inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) { inline int64_t &Dim<0>::operator[](int i) { return indexer(*this, i); }
return indexer(*this, i);
}
// Dynamic access to constant Dim // Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d) // without std::enable_if will try to instantiate this on get<0>(d)
template <int l> template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d, typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d, int i) {
int i) {
return d[i]; return d[i];
} }
// Dynamic access to mutable Dim // Dynamic access to mutable Dim
template <int l> template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d, typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d, int i) {
int i) {
return d[i]; return d[i];
} }
// Dot product of two dims // Dot product of two dims
template <int i> template <int i>
HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) { int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
return a.head * b.head + linearize(a.tail, b.tail); return a.head * b.head + linearize(a.tail, b.tail);
} }
// Base case dot product of two Dims // Base case dot product of two Dims
// Notice it is inline because it is no longer a template // Notice it is inline because it is no longer a template
template <> template <>
HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) { inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
return 0; return 0;
} }
// Product of a Dim // Product of a Dim
template <int i> template <int i>
HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) { int64_t product(const Dim<i> &a, int prod = 1) {
return prod * a.head * product(a.tail); return prod * a.head * product(a.tail);
} }
// Base case product of a Dim // Base case product of a Dim
// Notice it is inline because it is no longer a template // Notice it is inline because it is no longer a template
template <> template <>
HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) { inline int64_t product(const Dim<0> &a, int prod) {
return prod; return prod;
} }
// Is 0 <= idx_i < size_i for all i? // Is 0 <= idx_i < size_i for all i?
template <int i> template <int i>
HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) { bool contained(const Dim<i> &idx, const Dim<i> &size) {
return ((0 <= idx.head) && (idx.head < size.head) && return ((0 <= idx.head) && (idx.head < size.head) &&
contained(idx.tail, size.tail)); contained(idx.tail, size.tail));
} }
...@@ -288,7 +228,7 @@ HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) { ...@@ -288,7 +228,7 @@ HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
// Base case of is 0 <= idx_i < size_i ? // Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template // Notice it is inline because it is no longer a template
template <> template <>
HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) { inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
return true; return true;
} }
...@@ -296,7 +236,7 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) { ...@@ -296,7 +236,7 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
* \brief Compute exclusive prefix-multiply of a Dim. * \brief Compute exclusive prefix-multiply of a Dim.
*/ */
template <int i> template <int i>
HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) { Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head)); return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
} }
...@@ -304,7 +244,7 @@ HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) { ...@@ -304,7 +244,7 @@ HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
// Base case of ex_prefix_mul // Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template // Notice it is inline because it is no longer a template
template <> template <>
HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) { inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
return Dim<0>(); return Dim<0>();
} }
///\endcond ///\endcond
...@@ -313,18 +253,18 @@ HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) { ...@@ -313,18 +253,18 @@ HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
* Add two dimensions together * Add two dimensions together
*/ */
template <int i> template <int i>
HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) { Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail)); return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
} }
// Base case // Base case
template <> template <>
HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) { inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>(); return Dim<0>();
} }
template <int i> template <int i>
HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) { Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_plus(lhs, rhs); return dim_plus(lhs, rhs);
} }
...@@ -332,18 +272,18 @@ HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) { ...@@ -332,18 +272,18 @@ HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
* Multiply two dimensions together * Multiply two dimensions together
*/ */
template <int i> template <int i>
HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) { Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail)); return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
} }
// Base case // Base case
template <> template <>
HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) { inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>(); return Dim<0>();
} }
template <int i> template <int i>
HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) { Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_mult(lhs, rhs); return dim_mult(lhs, rhs);
} }
...@@ -358,7 +298,7 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) { ...@@ -358,7 +298,7 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
*/ */
template <int i> template <int i>
HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) { Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
int norm_stride = size.head == 1 ? 0 : stride.head; int norm_stride = size.head == 1 ? 0 : stride.head;
return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail)); return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
} }
...@@ -366,8 +306,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) { ...@@ -366,8 +306,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
///\cond HIDDEN ///\cond HIDDEN
template <> template <>
HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size, inline Dim<0> normalize_strides(const Dim<0> &size, const Dim<0> &stride) {
const Dim<0> &stride) {
return Dim<0>(); return Dim<0>();
} }
...@@ -382,54 +321,9 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size, ...@@ -382,54 +321,9 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
*/ */
template <typename... Args> template <typename... Args>
HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) { Dim<sizeof...(Args)> make_dim(Args... idxes) {
return Dim<sizeof...(Args)>(idxes...); return Dim<sizeof...(Args)>(idxes...);
} }
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream &>::type operator<<(
std::ostream &os, const Dim<i> &d) {
os << d.head << ", " << d.tail;
return os;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream &>::type operator<<(
std::ostream &os, const Dim<i> &d) {
os << d.head;
return os;
}
inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
return os;
}
template <int i>
HOST std::string Dim<i>::to_string() const {
std::stringstream stream;
stream << *this;
return stream.str();
}
template <int D>
HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
Dim<D> result;
for (int i = 0; i < D - 1; ++i) {
result[i] = linear_index % extents[i];
linear_index /= extents[i];
}
result[D - 1] = linear_index;
return result;
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -13,72 +13,56 @@ See the License for the specific language governing permissions and ...@@ -13,72 +13,56 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "lod_tensor.h" #include "lod_tensor.h"
#include <stdint.h>
#include <string.h>
#include <algorithm> #include <algorithm>
#include <iterator>
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
std::ostream &operator<<(std::ostream &os, const LoD &lod) { // std::ostream &operator<<(std::ostream &os, const LoD &lod) {
os << "{"; // os << "{";
for (auto &v : lod) { // for (auto &v : lod) {
os << "{"; // os << "{";
bool is_first = true; // bool is_first = true;
for (auto &i : v) { // for (auto &i : v) {
if (is_first) { // if (is_first) {
os << i; // os << i;
is_first = false; // is_first = false;
} else { // } else {
os << ", " << i; // os << ", " << i;
} // }
} // }
os << "}"; // os << "}";
} // }
os << "}"; // os << "}";
//
return os; // return os;
} //}
//
std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { // std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// PADDLE_ENFORCE(t.type().hash_code() == // PADDLE_MOBILE_ENFORCE(t.type().hash_code() == typeid(float).hash_code(),
// typeid(float).hash_code()); // "t.type() is not float");
// os << "dim: " << t.dims() << "\n";
// if (!platform::is_cpu_place(t.place())) { // os << "lod: " << t.lod() << "\n";
// LoDTensor tt; // // only print first ten elements
// framework::TensorCopy(t, platform::CPUPlace(), &tt); // int64_t size = t.numel() < 10 ? t.numel() : 10;
// platform::DeviceContextPool &pool = // for (int64_t i = 0; i < size; ++i) {
// platform::DeviceContextPool::Instance(); auto &dev_ctx = // os << t.data<float>()[i] << " ";
// *pool.Get(t.place()); dev_ctx.Wait(); // }
// //
// os << tt; // return os;
// return os; //}
// }
// std::string LoDToString(const LoD &lod) {
os << "dim: " << t.dims() << "\n"; // std::ostringstream stream;
os << "lod: " << t.lod() << "\n"; // stream << lod;
// return stream.str();
// only print first ten elements //}
int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) {
os << t.data<float>()[i] << " ";
}
return os;
}
std::string LoDToString(const LoD &lod) {
std::ostringstream stream;
stream << lod;
return stream.str();
}
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin, LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end) { size_t elem_end) {
// PADDLE_ENFORCE_LT(level, in.size()); PADDLE_MOBILE_ENFORCE(level < in.size(), "level should >= in.size()");
// PADDLE_ENFORCE_LT(elem_end, in[level].size()); PADDLE_MOBILE_ENFORCE(elem_end < in[level].size(),
"elem_end >= in[level].size()");
LoD res; LoD res;
res.resize(in.size() - level); res.resize(in.size() - level);
// copy the first level // copy the first level
...@@ -152,7 +136,7 @@ bool CheckLoD(const LoD &in, int tensor_height) { ...@@ -152,7 +136,7 @@ bool CheckLoD(const LoD &in, int tensor_height) {
if (a < b) return true; if (a < b) return true;
return false; return false;
})) { })) {
std::cout << "ascending error"; PADDLE_MOBILE_THROW_EXCEPTION("ascending error")
return false; return false;
} }
} }
...@@ -211,8 +195,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, ...@@ -211,8 +195,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
LoD sub_lod; LoD sub_lod;
for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) { for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
// PADDLE_ENFORCE_LE(start_idx, end_idx); PADDLE_MOBILE_ENFORCE(start_idx <= end_idx, "start_idx > end_idx");
// PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size()); PADDLE_MOBILE_ENFORCE(end_idx < lod[level_idx].size(),
"end_idx >= lod[level_idx].size()");
std::vector<size_t> level_lens; std::vector<size_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) { for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]); level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
...@@ -226,10 +211,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, ...@@ -226,10 +211,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
} }
void AppendLoD(LoD *lod, const LoD &lod_length) { void AppendLoD(LoD *lod, const LoD &lod_length) {
// PADDLE_ENFORCE( PADDLE_MOBILE_ENFORCE(
// lod->empty() || lod->size() == lod_length.size(), lod->empty() || lod->size() == lod_length.size(),
// "The lod_length should has the same size with the appended "The lod_length should has the same size with the appended lod.");
// lod.");
if (lod->empty()) { if (lod->empty()) {
for (size_t i = 0; i < lod_length.size(); ++i) { for (size_t i = 0; i < lod_length.size(); ++i) {
lod->emplace_back(1, 0); // size = 1, value = 0; lod->emplace_back(1, 0); // size = 1, value = 0;
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "tensor.h" #include "tensor.h"
#include "tensor_util.h" #include "tensor_util.h"
......
...@@ -25,9 +25,8 @@ template <typename Dtype> ...@@ -25,9 +25,8 @@ template <typename Dtype>
struct OpInfo { struct OpInfo {
OpCreator<Dtype> creator_; OpCreator<Dtype> creator_;
const OpCreator<Dtype> &Creator() const { const OpCreator<Dtype> &Creator() const {
// PADDLE_ENFORCE_NOT_NULL(creator_, PADDLE_MOBILE_ENFORCE(creator_ != nullptr,
// "Operator Creator has not been "Operator Creator has not been registered");
// registered");
return creator_; return creator_;
} }
}; };
...@@ -48,17 +47,15 @@ class OpInfoMap { ...@@ -48,17 +47,15 @@ class OpInfoMap {
} }
void Insert(const std::string &type, const OpInfo<Dtype> &info) { void Insert(const std::string &type, const OpInfo<Dtype> &info) {
// PADDLE_ENFORCE(!Has(type), "Operator %s has been PADDLE_MOBILE_ENFORCE(!Has(type), "Operator %s has been registered",
// registered", type); type.c_str());
map_.insert({type, info}); map_.insert({type, info});
} }
const OpInfo<Dtype> &Get(const std::string &type) const { const OpInfo<Dtype> &Get(const std::string &type) const {
auto op_info_ptr = GetNullable(type); auto op_info_ptr = GetNullable(type);
// PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not PADDLE_MOBILE_ENFORCE(op_info_ptr != nullptr,
// been "Operator %s has not been registered", type.c_str());
// registered",
// type);
return *op_info_ptr; return *op_info_ptr;
} }
......
...@@ -96,24 +96,39 @@ class OpRegistry { ...@@ -96,24 +96,39 @@ class OpRegistry {
} }
}; };
#define REGISTER_OPERATOR(op_type, op_class) \ #define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \
template <typename Dtype, typename T> \ template <typename Dtype, typename T> \
class _OpClass_##op_type##_ : public op_class<Dtype, T> { \ class _OpClass_##op_type##_##device_name : public op_class<Dtype, T> { \
public: \ public: \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \ DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class); \
}; \ }; \
static paddle_mobile::framework::OperatorRegistrar< \ static paddle_mobile::framework::OperatorRegistrar< \
paddle_mobile::CPU, _OpClass_##op_type##_<paddle_mobile::CPU, float>> \ device_type, _OpClass_##op_type##_##device_name<device_type, float>> \
__op_registrar_##op_type##__(#op_type); \ __op_registrar_##op_type##_##device_name(#op_type); \
int TouchOpRegistrar_##op_type() { \ int TouchOpRegistrar_##op_type##_##device_name() { \
__op_registrar_##op_type##__.Touch(); \ __op_registrar_##op_type##_##device_name.Touch(); \
return 0; \ return 0; \
} }
#define USE_OP(op_type) \ #define REGISTER_OPERATOR_CPU(op_type, op_class) \
extern int TouchOpRegistrar_##op_type(); \ REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type() #define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
#define USE_OP(op_type, device_name) \
extern int TouchOpRegistrar_##op_type##_##device_name(); \
static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##device_name()
#define USE_OP_CPU(op_type) USE_OP(op_type, cpu);
#define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu);
#define USE_OP_FPGA(op_type) USE_OP(op_type, fpga);
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -58,7 +58,8 @@ void OperatorBase<Dtype>::Run() const { ...@@ -58,7 +58,8 @@ void OperatorBase<Dtype>::Run() const {
} }
template class OperatorBase<CPU>; template class OperatorBase<CPU>;
template class OperatorWithKernel<CPU>; template class OperatorBase<FPGA>;
template class OperatorBase<GPU_MALI>;
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include <map> #include <map>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
...@@ -27,7 +26,6 @@ limitations under the License. */ ...@@ -27,7 +26,6 @@ limitations under the License. */
#include "framework/op_info.h" #include "framework/op_info.h"
#include "framework/op_kernel_type.h" #include "framework/op_kernel_type.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/block_desc.h" #include "framework/program/block_desc.h"
#include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/node.h"
#include "framework/scope.h" #include "framework/scope.h"
...@@ -52,7 +50,7 @@ static T *GetVarValue(const string &key, const VariableNameMap &var_map, ...@@ -52,7 +50,7 @@ static T *GetVarValue(const string &key, const VariableNameMap &var_map,
} }
template <typename Dtype> template <typename Dtype>
class OperatorBase : PaddleMobileObject { class OperatorBase {
public: public:
/* /*
* @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor * @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor
...@@ -65,6 +63,7 @@ class OperatorBase : PaddleMobileObject { ...@@ -65,6 +63,7 @@ class OperatorBase : PaddleMobileObject {
std::vector<string> GetOutKeys() const; std::vector<string> GetOutKeys() const;
virtual void RunImpl() const = 0; virtual void RunImpl() const = 0;
virtual void Init() const = 0;
/* /*
* @b op 运算所需的输入, 如上一层的输出结果、卷积核 * @b op 运算所需的输入, 如上一层的输出结果、卷积核
* */ * */
...@@ -105,31 +104,55 @@ class OperatorBase : PaddleMobileObject { ...@@ -105,31 +104,55 @@ class OperatorBase : PaddleMobileObject {
/* /*
* @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase * @b 这个类为所有带有运算的 op 的父类, 这个 op 继承与 OperatorBase
* */ * */
template <typename Dtype> template <typename Dtype, typename ParamType, typename KernelType>
class OperatorWithKernel : public OperatorBase<Dtype> { class OperatorWithKernel : public OperatorBase<Dtype> {
public: public:
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs, OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {} : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {}
virtual void RunImpl() const { this->kernel_.Compute(this->param_); }
virtual void RunImpl() const = 0;
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
void Init() const {
PADDLE_MOBILE_ENFORCE(kernel_.Init(param_), " %s kernel init failed",
this->type_.c_str());
}
protected:
KernelType kernel_;
ParamType param_;
}; };
/* /*
* @b 所有kernel的父类 * @b 所有kernel的父类
* */ * */
template <typename Dtype, typename P> template <typename Dtype, typename P>
class OpKernelBase : PaddleMobileObject { class OpKernelBase {
public: public:
/* /*
* @b 所有kernel 需实现 Compute 方法 * @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, * @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h * 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */ * */
#ifdef PADDLE_MOBILE_MALI_GPU
OpKernelBase() { acl_op_ = nullptr; }
void *GetAclOp() const { return acl_op_; }
void SetAclOp(void *op, void *ob) const {
reinterpret_cast<OpKernelBase<Dtype, P> *>(ob)->acl_op_ = op;
}
#endif
virtual void Compute(const P &para) const = 0; virtual void Compute(const P &para) const = 0;
virtual bool Init(const P &para) const { return true; };
virtual ~OpKernelBase() = default; virtual ~OpKernelBase() = default;
private:
#ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_;
#endif
}; };
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ #define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
...@@ -139,20 +162,23 @@ class OpKernelBase : PaddleMobileObject { ...@@ -139,20 +162,23 @@ class OpKernelBase : PaddleMobileObject {
std::shared_ptr<::paddle_mobile::framework::Scope> scope) \ std::shared_ptr<::paddle_mobile::framework::Scope> scope) \
: parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {} : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}
class FusionOpMatcher : PaddleMobileObject { class FusionOpMatcher {
public: public:
FusionOpMatcher() {} FusionOpMatcher() {}
virtual std::string Type() = 0; virtual std::string Type() = 0;
virtual void FolderNodes(Node *node) { virtual void FolderNodes(
node->Folder(node_.Depth(), Type(), {}); Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(), {}, removed_nodes);
} }
virtual Node &BeginNode() { return node_; } virtual Node &BeginNode() { return node_; }
std::string BeginType() { return node_.Type(); } std::string BeginType() { return node_.Type(); }
// virtual bool Fusion();
protected: protected:
Node node_; Node node_;
std::string type_; std::string type_;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle_mobile_object.h"
...@@ -13,17 +13,12 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "block_desc.h" #include "block_desc.h"
#include <algorithm>
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const { std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const { return vars_; }
std::vector<std::shared_ptr<VarDesc>> res;
for (const auto &p : vars_) {
res.push_back(p.second);
}
return res;
}
std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const { return ops_; } std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const { return ops_; }
...@@ -31,10 +26,14 @@ BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc) ...@@ -31,10 +26,14 @@ BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc)
: index_(desc->idx), parent_index_(desc->idx) { : index_(desc->idx), parent_index_(desc->idx) {
for (int i = 0; i < desc->n_vars; ++i) { for (int i = 0; i < desc->n_vars; ++i) {
PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i]; PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i];
vars_[std::string(var_desc->name)] = vars_.emplace_back(std::shared_ptr<VarDesc>(new VarDesc(var_desc)));
std::shared_ptr<VarDesc>(new VarDesc(var_desc));
} }
std::sort(vars_.begin(), vars_.end(),
[](std::shared_ptr<VarDesc> left, std::shared_ptr<VarDesc> right) {
return left->Name() < right->Name();
});
for (int j = 0; j < desc->n_ops; ++j) { for (int j = 0; j < desc->n_ops; ++j) {
PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j]; PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j];
ops_.emplace_back(new framework::OpDesc(op_desc)); ops_.emplace_back(new framework::OpDesc(op_desc));
......
...@@ -15,14 +15,13 @@ limitations under the License. */ ...@@ -15,14 +15,13 @@ limitations under the License. */
#pragma once #pragma once
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/op_desc.h" #include "framework/program/op_desc.h"
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class BlockDesc : PaddleMobileObject { class BlockDesc {
public: public:
friend class Node; friend class Node;
friend class ProgramOptimize; friend class ProgramOptimize;
...@@ -35,10 +34,9 @@ class BlockDesc : PaddleMobileObject { ...@@ -35,10 +34,9 @@ class BlockDesc : PaddleMobileObject {
ops_.push_back(copy_op_desc); ops_.push_back(copy_op_desc);
} }
for (auto &var_desc : block_desc.vars_) { for (int i = 0; i < block_desc.vars_.size(); ++i) {
std::shared_ptr<VarDesc> copy_var_desc = auto &var_desc = block_desc.vars_[i];
std::make_shared<VarDesc>(*var_desc.second); vars_.emplace_back(std::make_shared<VarDesc>(*var_desc));
vars_[var_desc.first] = copy_var_desc;
} }
} }
...@@ -64,7 +62,7 @@ class BlockDesc : PaddleMobileObject { ...@@ -64,7 +62,7 @@ class BlockDesc : PaddleMobileObject {
bool multi_thread_; bool multi_thread_;
int parent_index_; int parent_index_;
std::vector<std::shared_ptr<OpDesc>> ops_; std::vector<std::shared_ptr<OpDesc>> ops_;
std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_; std::vector<std::shared_ptr<VarDesc>> vars_;
}; };
} // namespace framework } // namespace framework
......
...@@ -20,12 +20,11 @@ limitations under the License. */ ...@@ -20,12 +20,11 @@ limitations under the License. */
#include "common/log.h" #include "common/log.h"
#include "common/type_define.h" #include "common/type_define.h"
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/paddle_mobile_object.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class OpDesc : PaddleMobileObject { class OpDesc {
public: public:
friend class ProgramOptimize; friend class ProgramOptimize;
friend class FusionOpMatcher; friend class FusionOpMatcher;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "fusion_op_register.h"
...@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <sstream>
#include "framework/operator.h"
#include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/node.h"
#include <algorithm>
#include "framework/operator.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -45,54 +44,13 @@ bool Node::operator==(const Node &in) { ...@@ -45,54 +44,13 @@ bool Node::operator==(const Node &in) {
return true; return true;
} }
bool Node::CanSplit(std::unordered_set<std::string> complex_compute_set) { std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(int size) {
bool split = false;
CanSplit(&split, false, 0, &complex_compute_set, this);
return split;
}
void Node::CanSplit(bool *split, bool spliting, int complex_count,
std::unordered_set<std::string> *complex_compute_set,
Node *pre_node) {
if (spliting) {
if (complex_compute_set->find(this->type_) != complex_compute_set->end()) {
complex_count++;
}
}
if (inputs_.size() > 1 && pre_node != inputs_.back()) {
return;
}
if (inputs_.size() > 1 && pre_node == inputs_.back()) {
if (complex_count > 1) {
*split = true;
return;
}
}
// multi output, to check
if (outputs_.size() > 1) {
spliting = true;
complex_compute_set = 0;
} else {
if (spliting == true && inputs_.size() > 0) {
spliting = false;
} else {
}
}
for (auto &output : outputs_) {
output->CanSplit(split, spliting, complex_count, complex_compute_set, this);
}
}
std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(uint size) {
std::vector<std::shared_ptr<framework::OpDesc>> op_descs; std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
OpDescs(size - 1, &op_descs); OpDescs(size - 1, &op_descs);
return op_descs; return op_descs;
} }
void Node::OpDescs(uint index, void Node::OpDescs(int index,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc) { std::vector<std::shared_ptr<framework::OpDesc>> *op_desc) {
if (index == 0) { if (index == 0) {
return; return;
...@@ -103,107 +61,6 @@ void Node::OpDescs(uint index, ...@@ -103,107 +61,6 @@ void Node::OpDescs(uint index,
} }
} }
void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node, bool adding_thread, int thread_num) {
if (outputs_.size() > 1) {
adding_thread = false;
}
bool can_add_split = false;
// 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
if (outputs_.size() > 1 &&
op_input_output_key[op_desc_->type_].second.size() == 1) {
can_add_split = true;
// 遍历当前节点的 output 节点
for (const auto &output : outputs_) {
// 不支持 output 有多个 output 的情况
if (output->outputs_.size() > 0) {
can_add_split = false;
break;
}
//与节点关联的 OpDesc
std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
//获取这个 op 的 inputs key 和 outputs key
auto inputs_and_outputs = op_input_output_key[op_desc->type_];
//判断现在 是否存在这个 op
//判断这个 output 和 input key 的 size 等于 1
if (op_input_output_key.find(op_desc->type_) !=
op_input_output_key.end() &&
inputs_and_outputs.first.size() == 1 &&
inputs_and_outputs.second.size() == 1) {
auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
// 判断一下, 如果输入和输出没有同名, 是支持的
for (int i = 0; i < inputs_of_output.size(); ++i) {
std::string input_of_output = inputs_of_output[i];
for (int j = 0; j < outputs_of_output.size(); ++j) {
std::string output_of_output = outputs_of_output[j];
if (input_of_output == output_of_output) {
DLOG << "output的 output 包含 input" << input_of_output;
can_add_split = false;
break;
}
}
}
} else { // 如果模型中包含没有的 op, 则不支持添加 split
DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
can_add_split = false;
}
}
}
if (inputs_.size() > 1 && node != inputs_.back()) {
return;
} else if (inputs_.size() > 1 && node == inputs_.back()) {
adding_thread = false;
op_desc->push_back(this->op_desc_);
} else {
op_desc->push_back(this->op_desc_);
}
if (adding_thread) {
Attribute attr;
attr.Set<int>(thread_num);
this->op_desc_->attrs_["thread"] = attr;
}
if (can_add_split) {
adding_thread = true;
std::shared_ptr<OpDesc> split_op_desc = std::make_shared<OpDesc>();
split_op_desc->type_ = G_OP_TYPE_SPLIT;
auto outputs = this->op_desc_->Output(
op_input_output_key[this->op_desc_->Type()].second[0]);
split_op_desc->inputs_ = {
{op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}};
auto &split_outputs =
split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]];
for (const auto &output : outputs_) {
split_outputs.push_back(outputs[0]);
}
DLOG << "add split";
op_desc->push_back(split_op_desc);
}
for (int i = 0; i < outputs_.size(); ++i) {
auto &output = outputs_[i];
if (can_add_split) {
output->OpDescs(op_desc, this, adding_thread, i);
} else {
output->OpDescs(op_desc, this, adding_thread, thread_num);
}
}
}
std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
OpDescs(&op_descs, this, false, 0);
return op_descs;
}
std::shared_ptr<Node> Node::To(int size) { std::shared_ptr<Node> Node::To(int size) {
std::shared_ptr<Node> node = std::make_shared<Node>(); std::shared_ptr<Node> node = std::make_shared<Node>();
this->To(size - 1, node); this->To(size - 1, node);
...@@ -224,24 +81,25 @@ void Node::To(int index, std::shared_ptr<Node> node) { ...@@ -224,24 +81,25 @@ void Node::To(int index, std::shared_ptr<Node> node) {
} }
} }
uint Node::Depth(uint begin) { int Node::Depth(int begin) {
uint depth = 0; int depth = 0;
begin++; begin++;
for (int i = 0; i < outputs_.size(); ++i) { for (int i = 0; i < outputs_.size(); ++i) {
uint output_depth = outputs_[i]->Depth(begin); int output_depth = outputs_[i]->Depth(begin);
depth = output_depth > depth ? output_depth : depth; depth = output_depth > depth ? output_depth : depth;
} }
return begin > depth ? begin : depth; return begin > depth ? begin : depth;
} }
Node &Node::Folder( Node &Node::Folder(
uint size, std::string type, int size, std::string type,
std::map<std::string, std::pair<std::string, std::string>> change) { std::map<std::string, std::pair<std::string, std::string>> change,
std::vector<std::shared_ptr<Node>> *removed_nodes) {
std::shared_ptr<framework::OpDesc> op_desc = std::shared_ptr<framework::OpDesc> op_desc =
std::make_shared<framework::OpDesc>(); std::make_shared<framework::OpDesc>();
op_desc->inputs_ = this->op_desc_->inputs_; op_desc->inputs_ = this->op_desc_->inputs_;
std::vector<std::shared_ptr<Node>> outputs; std::vector<std::shared_ptr<Node>> outputs;
this->Folder(op_desc, &outputs, size - 1, &change, this); this->Folder(op_desc, &outputs, size - 1, &change, this, removed_nodes);
this->outputs_ = outputs; this->outputs_ = outputs;
this->type_ = type; this->type_ = type;
this->op_desc_ = op_desc; this->op_desc_ = op_desc;
...@@ -251,9 +109,9 @@ Node &Node::Folder( ...@@ -251,9 +109,9 @@ Node &Node::Folder(
void Node::Folder( void Node::Folder(
std::shared_ptr<framework::OpDesc> op_desc, std::shared_ptr<framework::OpDesc> op_desc,
std::vector<std::shared_ptr<Node>> *outputs, uint index, std::vector<std::shared_ptr<Node>> *outputs, int index,
std::map<std::string, std::pair<std::string, std::string>> *change, std::map<std::string, std::pair<std::string, std::string>> *change,
Node *begin_node) { Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes) {
if (change->find(this->type_) != change->end()) { if (change->find(this->type_) != change->end()) {
auto change_pair = (*change)[this->type_]; auto change_pair = (*change)[this->type_];
op_desc->GetInputs()[change_pair.second] = op_desc->GetInputs()[change_pair.second] =
...@@ -266,7 +124,9 @@ void Node::Folder( ...@@ -266,7 +124,9 @@ void Node::Folder(
if (index > 0) { if (index > 0) {
--index; --index;
for (auto output : outputs_) { for (auto output : outputs_) {
output->Folder(op_desc, outputs, index, change, begin_node); removed_nodes->push_back(output);
output->Folder(op_desc, outputs, index, change, begin_node,
removed_nodes);
} }
} else { } else {
for (auto &op_output : this->op_desc_->outputs_) { for (auto &op_output : this->op_desc_->outputs_) {
...@@ -285,7 +145,7 @@ void Node::Folder( ...@@ -285,7 +145,7 @@ void Node::Folder(
} }
} }
} }
#ifdef PADDLE_MOBILE_DEBUG
std::string Node::ToString(std::string blank, const Node *node) const { std::string Node::ToString(std::string blank, const Node *node) const {
std::stringstream ss; std::stringstream ss;
ss << type_ << "-> \n"; ss << type_ << "-> \n";
...@@ -316,6 +176,7 @@ Print &operator<<(Print &printer, const Node &node) { ...@@ -316,6 +176,7 @@ Print &operator<<(Print &printer, const Node &node) {
printer << node.ToString(); printer << node.ToString();
return printer; return printer;
} }
#endif
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,20 +14,17 @@ limitations under the License. */ ...@@ -14,20 +14,17 @@ limitations under the License. */
#pragma once #pragma once
#include <cinttypes>
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_set>
#include <utility>
#include <vector> #include <vector>
#include "common/log.h" #include "common/log.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/op_desc.h" #include "framework/program/op_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class Node : PaddleMobileObject { class Node {
friend class ProgramOptimize; friend class ProgramOptimize;
public: public:
...@@ -37,35 +34,34 @@ class Node : PaddleMobileObject { ...@@ -37,35 +34,34 @@ class Node : PaddleMobileObject {
: op_desc_(op_desc), type_(op_desc->Type()) {} : op_desc_(op_desc), type_(op_desc->Type()) {}
Node &operator>(std::shared_ptr<Node> node); Node &operator>(std::shared_ptr<Node> node);
bool operator==(const Node &in); bool operator==(const Node &in);
bool CanSplit(std::unordered_set<std::string> complex_compute_set);
#ifdef PADDLE_MOBILE_DEBUG
std::string ToString() const; std::string ToString() const;
void Description();
#endif
std::shared_ptr<Node> To(int size); std::shared_ptr<Node> To(int size);
uint Depth(uint begin = 0); int Depth(int begin = 0);
Node &Folder( Node &Folder(
uint size, std::string type, int size, std::string type,
std::map<std::string, std::pair<std::string, std::string>> change_map); std::map<std::string, std::pair<std::string, std::string>> change_map,
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size); std::vector<std::shared_ptr<Node>> *removed_nodes);
std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(); std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(int size);
std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; } std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; }
std::string Type() { return type_; } std::string Type() { return type_; }
void Description();
private: private:
void CanSplit(bool *split, bool spliting, int complex_count, void OpDescs(int size,
std::unordered_set<std::string> *complex_compute_set,
Node *pre_node);
void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *node, bool adding_thread, int thread_num);
void OpDescs(uint size,
std::vector<std::shared_ptr<framework::OpDesc>> *op_desc); std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
void To(int index, std::shared_ptr<Node>); void To(int index, std::shared_ptr<Node>);
void Folder( void Folder(
std::shared_ptr<framework::OpDesc> op_desc, std::shared_ptr<framework::OpDesc> op_desc,
std::vector<std::shared_ptr<Node>> *outputs, uint index, std::vector<std::shared_ptr<Node>> *outputs, int index,
std::map<std::string, std::pair<std::string, std::string>> *change, std::map<std::string, std::pair<std::string, std::string>> *change,
Node *begin_node); Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes);
std::shared_ptr<framework::OpDesc> op_desc_; std::shared_ptr<framework::OpDesc> op_desc_;
#ifdef PADDLE_MOBILE_DEBUG
std::string ToString(std::string blank, const Node *node) const; std::string ToString(std::string blank, const Node *node) const;
#endif
std::vector<std::shared_ptr<Node>> outputs_; std::vector<std::shared_ptr<Node>> outputs_;
std::vector<Node *> inputs_; std::vector<Node *> inputs_;
std::string type_; std::string type_;
......
...@@ -13,13 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#include <algorithm>
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
std::shared_ptr<ProgramDesc> ori_des, bool add_split) { std::shared_ptr<ProgramDesc> ori_des, bool add_split) {
// ProgramDesc *optimize_program = new ProgramDesc(*ori_des); // ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
std::shared_ptr<ProgramDesc> optimize_program = std::shared_ptr<ProgramDesc> optimize_program =
...@@ -31,6 +32,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( ...@@ -31,6 +32,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>> std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>>
type_map; type_map;
std::vector<std::shared_ptr<Node>> nodes;
std::shared_ptr<Node> begin_node; std::shared_ptr<Node> begin_node;
auto block = optimize_program->Block(i); auto block = optimize_program->Block(i);
// DLOG << " ops size: " << block->Ops().size(); // DLOG << " ops size: " << block->Ops().size();
...@@ -38,11 +41,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( ...@@ -38,11 +41,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
auto op = block->Ops()[j]; auto op = block->Ops()[j];
auto op_type = op->Type(); auto op_type = op->Type();
if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) { if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
LOG(kLOG_ERROR) << "return null "; LOG(kLOG_ERROR) << "has not support op return null "
<< " op type: " << op->Type();
return nullptr; return nullptr;
} }
std::shared_ptr<Node> node = std::make_shared<Node>(op); std::shared_ptr<Node> node = std::make_shared<Node>(op);
nodes.push_back(node);
// //
type_map[op->Type()].push_back(node); type_map[op->Type()].push_back(node);
...@@ -87,21 +92,29 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize( ...@@ -87,21 +92,29 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
// DLOG << " match success " << " fusion node: \n" << // DLOG << " match success " << " fusion node: \n" <<
// matcher->BeginNode() << "\nsub node: \n" << *sub_node; // matcher->BeginNode() << "\nsub node: \n" << *sub_node;
// DLOG << "match node\n"<< *match_node; // DLOG << "match node\n"<< *match_node;
matcher->FolderNodes(match_node.get());
// DLOG << " after match node\n"<< *match_node;
// match_node->Description();
// DLOG << "begin node: \n" << *begin_node; std::vector<std::shared_ptr<Node>> removed_nodes;
matcher->FolderNodes(match_node.get(), &removed_nodes);
for (int j = 0; j < removed_nodes.size(); ++j) {
auto removed_node = removed_nodes[j];
auto removed_ite =
std::find(nodes.begin(), nodes.end(), removed_node);
nodes.erase(removed_ite);
}
} }
} }
} }
// DLOG << "node: \n" << *begin_node;
std::vector<std::shared_ptr<framework::OpDesc>> op_descs; std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
// bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV, if (add_split) {
// G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV}); GenerateOps(&op_descs, begin_node.get(), add_split);
GenerateOps(&op_descs, begin_node.get()); } else {
for (int m = 0; m < nodes.size(); ++m) {
auto &node = nodes[m];
op_descs.push_back(node->op_desc_);
}
}
block->ops_ = op_descs; block->ops_ = op_descs;
} }
...@@ -118,6 +131,14 @@ void ProgramOptimize::GenerateOps( ...@@ -118,6 +131,14 @@ void ProgramOptimize::GenerateOps(
Node *current_node) { Node *current_node) {
if (current_node->inputs_.size() > 1 && if (current_node->inputs_.size() > 1 &&
input_node != current_node->inputs_.back()) { input_node != current_node->inputs_.back()) {
DLOG << " current type " << current_node->type_;
DLOG << " inputs size of current node > 0 ";
for (int i = 0; i < current_node->inputs_.size(); ++i) {
DLOG << " input i: " << current_node->inputs_[i]->type_;
}
return; return;
} else if (current_node->inputs_.size() > 1 && } else if (current_node->inputs_.size() > 1 &&
input_node == current_node->inputs_.back()) { input_node == current_node->inputs_.back()) {
...@@ -250,12 +271,12 @@ void ProgramOptimize::GenerateOps( ...@@ -250,12 +271,12 @@ void ProgramOptimize::GenerateOps(
} }
void ProgramOptimize::GenerateOps( void ProgramOptimize::GenerateOps(
std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node,
Node *begin_node) { bool can_add_split) {
// std::vector<std::shared_ptr<framework::OpDesc>> *op_desc, // std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
// Node *input_node, Node *current_node, bool adding_thread, int // Node *input_node, Node *current_node, bool adding_thread, int
// thread_num // thread_num
if (false) { if (can_add_split) {
this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr);
} else { } else {
this->GenerateOps(op_descs, begin_node, begin_node); this->GenerateOps(op_descs, begin_node, begin_node);
......
...@@ -27,14 +27,14 @@ namespace framework { ...@@ -27,14 +27,14 @@ namespace framework {
class ProgramOptimize { class ProgramOptimize {
public: public:
ProgramOptimize() {} ProgramOptimize() {}
std::shared_ptr<ProgramDesc> FushionOptimize( std::shared_ptr<ProgramDesc> FusionOptimize(
std::shared_ptr<ProgramDesc> ori_des, bool add_split = false); std::shared_ptr<ProgramDesc> ori_des, bool add_split = false);
private: private:
int current_block_; int current_block_;
std::vector<std::shared_ptr<BlockDesc>> new_blocks_; std::vector<std::shared_ptr<BlockDesc>> new_blocks_;
void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_descs,
Node *begin_node); Node *begin_node, bool can_add_split);
void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc, void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
Node *input_node, Node *current_node); Node *input_node, Node *current_node);
void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc, void GenerateOps(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
namespace paddle_mobile {
namespace framework {}
} // namespace paddle_mobile
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/program_desc.h" #include "framework/program/program_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
...@@ -23,12 +22,14 @@ namespace paddle_mobile { ...@@ -23,12 +22,14 @@ namespace paddle_mobile {
namespace framework { namespace framework {
template <typename Dtype, Precision P = Precision::FP32> template <typename Dtype, Precision P = Precision::FP32>
class Program : PaddleMobileObject { class Program {
public: public:
std::shared_ptr<ProgramDesc> originProgram; std::shared_ptr<ProgramDesc> originProgram;
std::shared_ptr<ProgramDesc> optimizeProgram; std::shared_ptr<ProgramDesc> optimizeProgram;
std::shared_ptr<Scope> scope; std::shared_ptr<Scope> scope;
std::string model_path; std::string model_path;
std::string para_path;
bool is_commbine = false;
private: private:
}; };
......
...@@ -18,13 +18,12 @@ limitations under the License. */ ...@@ -18,13 +18,12 @@ limitations under the License. */
#include "common/types.h" #include "common/types.h"
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/block_desc.h" #include "framework/program/block_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class ProgramDesc : PaddleMobileObject { class ProgramDesc {
public: public:
friend class Node; friend class Node;
friend class ProgramOptimize; friend class ProgramOptimize;
......
...@@ -14,40 +14,14 @@ limitations under the License. */ ...@@ -14,40 +14,14 @@ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/tensor_desc.h" #include "framework/program/tensor_desc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
/*
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL = 0,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16 = 1,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32 = 2,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64 = 3,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16 = 4,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32 = 5,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64 = 6,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR = 7,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS = 8,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH = 9,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST = 10,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES = 11,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE = 12,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_ARRAY = 13,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST = 14,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER = 15,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL = 16,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW = 17,
PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE = 18
*/
class VarDesc { class VarDesc {
public: public:
VarDesc(const VarDesc &var_desc) { VarDesc(const VarDesc &var_desc) {
...@@ -56,14 +30,6 @@ class VarDesc { ...@@ -56,14 +30,6 @@ class VarDesc {
this->persistable_ = var_desc.persistable_; this->persistable_ = var_desc.persistable_;
this->tensor_desc_ = var_desc.tensor_desc_; this->tensor_desc_ = var_desc.tensor_desc_;
this->type_ = var_desc.type_; this->type_ = var_desc.type_;
/*
*
* std::string name_;
bool persistable_;
TensorDesc tensor_desc_;
VarType_Type type_;
VarType_Type data_type_;
* */
} }
VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) { VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) {
type_ = (VarType_Type)desc->type->type; type_ = (VarType_Type)desc->type->type;
...@@ -102,39 +68,6 @@ class VarDesc { ...@@ -102,39 +68,6 @@ class VarDesc {
const TensorDesc &Tensor_desc() const { return tensor_desc_; } const TensorDesc &Tensor_desc() const { return tensor_desc_; }
// const proto::VarType::ChannelDesc &channel_desc() const {
// switch (desc_.type().type()) {
// case proto::VarType::CHANNEL:
// return desc_.type().channel();
// default:
// break;
// }
// }
// proto::VarType::Type GetDataType() const {
// switch (desc_.type().type()) {
// case proto::VarType::CHANNEL:
// return channel_desc().data_type();
// break;
// default:
// return tensor_desc().data_type();
// }
// }
// template <typename T>
// std::vector<T> RepeatedToVector(
// const google::protobuf::RepeatedField<T> &repeated_field) const {
// std::vector<T> ret;
// ret.reserve(repeated_field.size());
// std::copy(repeated_field.begin(), repeated_field.end(),
// std::back_inserter(ret));
// return ret;
// }
// std::vector<int64_t> GetShape() const {
// return this->RepeatedToVector(tensor_desc().dims());
// }
private: private:
std::string name_; std::string name_;
bool persistable_; bool persistable_;
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include <algorithm>
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -22,7 +23,6 @@ namespace paddle_mobile { ...@@ -22,7 +23,6 @@ namespace paddle_mobile {
namespace framework { namespace framework {
Scope &Scope::NewScope() const { Scope &Scope::NewScope() const {
std::unique_lock<std::mutex> lock(mutex_);
kids_.push_back(new Scope(this)); kids_.push_back(new Scope(this));
return *kids_.back(); return *kids_.back();
} }
...@@ -72,11 +72,9 @@ std::vector<std::string> Scope::LocalVarNames() const { ...@@ -72,11 +72,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
} }
void Scope::DeleteScope(Scope *scope) const { void Scope::DeleteScope(Scope *scope) const {
std::unique_lock<std::mutex> lock(mutex_);
auto it = std::find(kids_.begin(), kids_.end(), scope); auto it = std::find(kids_.begin(), kids_.end(), scope);
kids_.erase(it); kids_.erase(it);
delete scope; delete scope;
// deferent
} }
void Scope::EraseVars(const std::vector<std::string> &var_names) { void Scope::EraseVars(const std::vector<std::string> &var_names) {
...@@ -104,14 +102,6 @@ void Scope::Rename(const std::string &origin_name, ...@@ -104,14 +102,6 @@ void Scope::Rename(const std::string &origin_name,
vars_[new_name] = origin_it->second; vars_[new_name] = origin_it->second;
vars_.erase(origin_it); vars_.erase(origin_it);
} }
//
// std::string Scope::Rename(const std::string& origin_name)
// const {
// auto var_name = string::Sprintf("%p.%d", this,
// vars_.size());
// Rename(origin_name, var_name);
// return var_name;
// }
Variable *Scope::FindVarLocally(const std::string &name) const { Variable *Scope::FindVarLocally(const std::string &name) const {
auto it = vars_.find(name); auto it = vars_.find(name);
......
...@@ -14,17 +14,16 @@ limitations under the License. */ ...@@ -14,17 +14,16 @@ limitations under the License. */
#pragma once #pragma once
#include <list> //std::list #include <list>
#include <mutex> //std::mutex #include <unordered_map>
#include <unordered_map> //std::unordered_map
#include "variable.h" #include "variable.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
class Scope { class Scope {
public: public:
Scope() {} Scope() = default;
~Scope() {} ~Scope() = default;
Scope &NewScope() const; Scope &NewScope() const;
...@@ -70,8 +69,6 @@ class Scope { ...@@ -70,8 +69,6 @@ class Scope {
mutable std::unordered_map<std::string, Variable *> vars_; mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_; mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr}; Scope const *parent_{nullptr};
mutable std::mutex mutex_;
}; };
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,14 +14,15 @@ limitations under the License. */ ...@@ -14,14 +14,15 @@ limitations under the License. */
#pragma once #pragma once
#include <common/enforce.h>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <type_traits> #include <type_traits>
#include <typeindex> #include <typeindex>
#include <vector> #include <vector>
#include "common/enforce.h"
#include "common/enforce.h"
#include "framework/data_layout.h" #include "framework/data_layout.h"
#include "framework/ddim.h" #include "framework/ddim.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
...@@ -84,6 +85,12 @@ class Tensor { ...@@ -84,6 +85,12 @@ class Tensor {
} }
} }
Tensor(const Tensor &inTensor) {
this->dims_ = inTensor.dims_;
this->holder_ = inTensor.holder_;
this->offset_ = inTensor.offset_;
}
/*! Return a pointer to mutable memory block. */ /*! Return a pointer to mutable memory block. */
template <typename T> template <typename T>
inline T *data() { inline T *data() {
...@@ -130,7 +137,6 @@ class Tensor { ...@@ -130,7 +137,6 @@ class Tensor {
} }
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.")
int64_t size = numel() * SizeOfType(type); int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || holder_->size() < size + offset_) { if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type)); holder_.reset(new PlaceholderImpl(size, type));
offset_ = 0; offset_ = 0;
...@@ -169,7 +175,9 @@ class Tensor { ...@@ -169,7 +175,9 @@ class Tensor {
/*! The internal of two tensors share the same memory block. */ /*! The internal of two tensors share the same memory block. */
inline Tensor &ShareDataWith(const Tensor &src) { inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size(); src.check_memory_size();
*this = src; if (holder_.get() != src.holder_.get()) {
*this = src;
}
return *this; return *this;
} }
...@@ -198,7 +206,6 @@ class Tensor { ...@@ -198,7 +206,6 @@ class Tensor {
size_t base = numel() / dims_[0]; size_t base = numel() / dims_[0];
Tensor dst; Tensor dst;
dst.holder_ = holder_; dst.holder_ = holder_;
dst.set_layout(layout_);
DDim dst_dims = dims_; DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx; dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims); dst.Resize(dst_dims);
...@@ -227,10 +234,6 @@ class Tensor { ...@@ -227,10 +234,6 @@ class Tensor {
"Tensor's dims_ is out of bound. "); "Tensor's dims_ is out of bound. ");
} }
inline DataLayout layout() const { return layout_; }
inline void set_layout(const DataLayout layout) { layout_ = layout; }
private: private:
/** /**
* @note Placeholder hides type T, so it doesn't appear as a * @note Placeholder hides type T, so it doesn't appear as a
...@@ -288,21 +291,6 @@ class Tensor { ...@@ -288,21 +291,6 @@ class Tensor {
DDim dims_; DDim dims_;
/**
* @brief the layout of memory block, default is NHWC.
*
* @note the memory allocation order, describe how weight/data is
* stored
* For example, in 4-D Tensor(rank=4), there are three
* commonly
* used layout. They are
* NCHW, NHWC, CHWN.
* N,C,H,W for respectively the batch size, the number of
* feature maps, the height, the width.
*/
DataLayout layout_ = DataLayout::kNHWC;
/** /**
* @brief A PlaceHolder may be shared by more than one tensor. * @brief A PlaceHolder may be shared by more than one tensor.
* *
......
...@@ -13,137 +13,18 @@ See the License for the specific language governing permissions and ...@@ -13,137 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "tensor_util.h" #include "tensor_util.h"
#include <algorithm>
#include <limits>
#include <vector>
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst) { void TensorCopy(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopy " << src.dims() << " from " <<
// src.place() << " to
// "
// << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size);
}
void TensorCopySync(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
// src.place()
// << " to " << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>(); auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type()); auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size); memory::Copy(dst_ptr, src_ptr, size);
} }
template <typename Predicate>
struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor &tensor_;
Tensor *out_;
AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
: predicate_(predicate), tensor_(tensor), out_(out) {}
template <typename T>
void operator()() const {
// auto t = EigenVector<T>::Flatten(tensor_);
// auto o = EigenScalar<bool>::From(*out_);
// return any of predicate_(t) is true.
// o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate>
inline void AnyImpl(Predicate predicate, const Tensor &tensor,
framework::Tensor *out) {
VisitDataType(ToDataType(tensor.type()),
AnyDTypeVisitor<Predicate>(predicate, tensor, out));
}
template <typename Predicate>
struct AnyVisitor {
const framework::Tensor &tensor_;
Predicate predicate_;
AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
bool operator()(void) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>();
AnyImpl(predicate_, tensor_, &out);
return this->GetResult(out);
}
bool GetResult(const framework::Tensor &out) const {
return *out.data<bool>();
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
// return platform::VisitPlace(visitor);
return visitor();
}
struct ContainsNANPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isnan();
}
};
bool TensorContainsNAN(const framework::Tensor &tensor) {
ContainsNANPredicate predicate;
return Any(tensor, predicate);
}
struct ContainsInfPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isinf();
}
};
bool TensorContainsInf(const framework::Tensor &tensor) {
ContainsInfPredicate predicate;
return Any(tensor, predicate);
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void **buf, Tensor *tensor)
: buf_(buf), tensor_(tensor) {}
template <typename T>
void operator()() {
*buf_ = tensor_->mutable_data<T>();
}
void **buf_;
Tensor *tensor_;
};
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -15,51 +15,12 @@ limitations under the License. */ ...@@ -15,51 +15,12 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
#include "platform/data_type.h"
#include "tensor.h" #include "tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst); void TensorCopy(const Tensor &src, Tensor *dst);
void TensorCopySync(const Tensor &src, Tensor *dst);
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst);
template <typename T>
void TesnorToVector(const Tensor &src, std::vector<T> *dst);
bool TensorContainsNAN(const framework::Tensor &tensor);
bool TensorContainsInf(const framework::Tensor &tensor);
void TensorToStream(std::ostream &os, const Tensor &tensor);
void TensorFromStream(std::istream &is, Tensor *tensor);
//
// The implementation of template functions.
//
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
auto src_ptr = static_cast<const void *>(src.data());
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
auto size = src.size() * sizeof(T);
memory::Copy(dst_ptr, src_ptr, size);
}
template <typename T>
void TensorToVector(const Tensor &src, std::vector<T> *dst) {
auto src_ptr = static_cast<const void *>(src.data<T>());
auto size = src.numel() * sizeof(T);
dst->resize(src.numel());
auto dst_ptr = static_cast<void *>(dst->data());
memory::Copy(dst_ptr, src_ptr, size);
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,19 +14,17 @@ limitations under the License. */ ...@@ -14,19 +14,17 @@ limitations under the License. */
#pragma once #pragma once
#include <iostream>
#include <memory> #include <memory>
#include <string> #include <string>
#include <typeindex> #include <typeindex>
#include <typeinfo> #include <typeinfo>
#include "../common/variant.h" #include "../common/variant.h"
#include "paddle_mobile_object.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
using std::string; using std::string;
class Variable : public PaddleMobileObject { class Variable {
public: public:
template <typename T> template <typename T>
const T *Get() const { const T *Get() const {
......
...@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io.h" #include "io/io.h"
#include <fstream> #include <algorithm>
#include <vector> #include <vector>
#include "common/log.h"
#include "common/enforce.h" #include "common/enforce.h"
#include "common/log.h"
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h" #include "framework/operator.h"
...@@ -26,20 +25,29 @@ limitations under the License. */ ...@@ -26,20 +25,29 @@ limitations under the License. */
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <queue>
#include <utility>
#include "common/threadpool.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
using framework::Variable; using framework::Variable;
void ReadBinaryFile(const std::string &filename, std::string *contents) { char *Get_binary_data(std::string filename) {
std::ifstream fin(filename, std::ios::in | std::ios::binary); FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(fin.is_open(), "open file: %s failed", PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str()); filename.c_str());
fin.seekg(0, std::ios::end); fseek(file, 0, SEEK_END);
contents->clear(); long size = ftell(file);
contents->resize(fin.tellg()); PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
fin.seekg(0, std::ios::beg); rewind(file);
fin.read(&(contents->at(0)), contents->size()); char *data = new char[size];
fin.close(); size_t bytes_read = fread(data, 1, size, file);
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
fclose(file);
return data;
} }
static size_t ReadBuffer(const char *file_name, uint8_t **out) { static size_t ReadBuffer(const char *file_name, uint8_t **out) {
...@@ -66,110 +74,28 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { ...@@ -66,110 +74,28 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Loader<Dtype, P>::LoadVar(framework::Variable *variable, const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const framework::VarDesc &var_desc, const std::string &dirname, bool optimize, bool can_add_split) {
const std::string &file_path) { auto program =
auto tensor = variable->GetMutable<framework::LoDTensor>(); this->LoadProgram(dirname + "/__model__", optimize, can_add_split);
std::ifstream is(file_path); program.model_path = dirname;
PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed", return program;
file_path.c_str());
std::fpos<mbstate_t> pos;
pos = is.tellg(); // save current position
is.seekg(0, std::ios::end);
is.seekg(pos); // restore saved position
// 1. version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// 2 Lod information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
for (auto j : tmp) {
LOG(kLOG_DEBUG1) << " lod - " << j;
}
lod[i] = tmp;
}
// 3. tensor version
uint32_t tensor_version;
is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
// 4. tensor desc
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
const framework::TensorDesc &desc = var_desc.Tensor_desc();
PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor_desc = NULL;
// void *v;
// PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure()(tensor_desc,
// buf.get());
// DLOG << "PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure- " <<
// tensor_desc;
// framework::TensorDesc &tensor_desc = variable->
// PaddleMobile__Framework__Proto__ProgramDesc *c_program;
// uint8_t *proto_buf = NULL;
// size_t read_size = ReadBuffer(file_path.c_str(), &proto_buf);
// c_program = paddle_mobile__framework__proto__program_desc__unpack(NULL,
// read_size, buf);
// paddle_mobile__framework__proto__var_type__tensor_desc__init()
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
tensor->Resize(framework::make_ddim(desc.Dims()));
void *memory = tensor;
int type_size = 0;
switch (desc.DataType()) {
case framework::VARTYPE_TYPE_FP16:
type_size = 2;
break;
case framework::VARTYPE_TYPE_FP32:
type_size = 4;
memory = tensor->mutable_data<float>();
break;
case framework::VARTYPE_TYPE_FP64:
type_size = 8;
break;
case framework::VARTYPE_TYPE_INT32:
type_size = 4;
break;
case framework::VARTYPE_TYPE_INT64:
type_size = 8;
break;
case framework::VARTYPE_TYPE_BOOL:
type_size = 1;
break;
default:
break;
}
is.read(static_cast<char *>(memory), memory_size * type_size);
is.close();
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &dirname, bool optimize) { const std::string &model_path, const std::string &para_path,
std::string model_filename = dirname + "/__model__"; bool optimize) {
auto program = this->LoadProgram(model_path, optimize);
program.para_path = para_path;
program.is_commbine = true;
return program;
}
template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
const std::string &model_path, bool optimize, bool can_add_split) {
std::string model_filename = model_path;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL; uint8_t *buf = NULL;
size_t read_size = ReadBuffer(model_filename.c_str(), &buf); size_t read_size = ReadBuffer(model_filename.c_str(), &buf);
...@@ -183,22 +109,16 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -183,22 +109,16 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
// //
DLOG << "n_ops: " << (*c_program->blocks)->n_ops; DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
// //
std::shared_ptr<framework::ProgramDesc> originProgramDesc = auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program);
std::make_shared<framework::ProgramDesc>(c_program);
framework::Program<Dtype, P> program; framework::Program<Dtype, P> program;
program.model_path = dirname;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
std::shared_ptr<framework::Scope> scope = auto scope = std::make_shared<framework::Scope>();
std::make_shared<framework::Scope>();
program.scope = scope; program.scope = scope;
originProgramDesc->Block(0);
for (const auto &block : originProgramDesc->Blocks()) { for (const auto &block : originProgramDesc->Blocks()) {
for (int i = 0; i < block->Vars().size(); ++i) { for (auto var_desc : block->Vars()) {
std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
// DLOG << "var name-- " << var_desc->Name();
auto var = scope->Var(var_desc->Name()); auto var = scope->Var(var_desc->Name());
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
...@@ -224,7 +144,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -224,7 +144,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
if (optimize) { if (optimize) {
framework::ProgramOptimize program_optimize; framework::ProgramOptimize program_optimize;
program.optimizeProgram = program.optimizeProgram =
program_optimize.FushionOptimize(originProgramDesc); program_optimize.FusionOptimize(originProgramDesc, can_add_split);
} }
if (optimize) { if (optimize) {
program.optimizeProgram->Description("optimize: "); program.optimizeProgram->Description("optimize: ");
...@@ -237,9 +157,10 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -237,9 +157,10 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
} }
template class Loader<CPU, Precision::FP32>; template class Loader<CPU, Precision::FP32>;
template class Loader<FPGA, Precision::FP32>;
template class Loader<GPU_MALI, Precision::FP32>;
#pragma mark - executor #pragma mark - executor
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
bool use_optimize) bool use_optimize)
...@@ -253,6 +174,9 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -253,6 +174,9 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
variable_ptr[0].SetValue<int>(batch_size); variable_ptr[0].SetValue<int>(batch_size);
const std::vector<std::shared_ptr<framework::BlockDesc>> blocks = const std::vector<std::shared_ptr<framework::BlockDesc>> blocks =
to_predict_program_->Blocks(); to_predict_program_->Blocks();
#ifdef PADDLE_EXECUTOR_MULTITHREAD
depManager.resize(blocks.size());
#endif
for (int i = 0; i < blocks.size(); ++i) { for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i]; std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
...@@ -263,40 +187,54 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -263,40 +187,54 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope); program_.scope);
op_base->InferShape(); op_base->InferShape();
ops_of_block_[*block_desc.get()].push_back(op_base); ops_of_block_[*block_desc.get()].push_back(op_base);
#ifdef PADDLE_EXECUTOR_MULTITHREAD
depManager[i].analysisDep(ops_of_block_[*block_desc.get()]);
#endif
} }
} }
InitMemory(); if (program_.is_commbine) {
InitCombineMemory();
} else {
InitMemory();
}
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
for (const auto &op : ops) {
op->Init();
}
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
framework::LoDTensor *tensor, framework::LoDTensor *tensor, char *&data) {
const std::string &file_path) {
std::ifstream is(file_path);
PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed",
file_path.c_str());
std::fpos<mbstate_t> pos;
pos = is.tellg(); // save current position
is.seekg(0, std::ios::end);
is.seekg(pos); // restore saved position
// 1. version // 1. version
uint32_t version; uint32_t version = *(uint32_t *)data;
is.read(reinterpret_cast<char *>(&version), sizeof(version)); data += sizeof(uint32_t);
// 2 Lod information // 2 Lod information
uint64_t lod_level; uint64_t *lod_level_ptr = new uint64_t();
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level)); memcpy(lod_level_ptr, data, sizeof(uint64_t));
uint64_t lod_level = *lod_level_ptr;
delete lod_level_ptr;
data += sizeof(uint64_t);
auto &lod = *tensor->mutable_lod(); auto &lod = *tensor->mutable_lod();
lod.resize(lod_level); lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) { for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size; uint64_t size = *(uint64_t *)data;
is.read(reinterpret_cast<char *>(&size), sizeof(size)); data += sizeof(uint64_t);
DLOG << "lod size: " << i << size;
std::vector<size_t> tmp(size / sizeof(size_t)); std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size)); for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *(size_t *)data;
DLOG << "tmp[k]: " << k << *(size_t *)data;
data += sizeof(size_t);
}
for (auto j : tmp) { for (auto j : tmp) {
LOG(kLOG_DEBUG1) << " lod - " << j; LOG(kLOG_DEBUG1) << " lod - " << j;
} }
...@@ -304,17 +242,20 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, ...@@ -304,17 +242,20 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
} }
// 3. tensor version // 3. tensor version
uint32_t tensor_version; uint32_t tensor_version = *(uint32_t *)data;
is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version)); data += sizeof(uint32_t);
// 4. tensor desc // 4. tensor desc
int32_t size; int32_t size = *(int32_t *)data;
is.read(reinterpret_cast<char *>(&size), sizeof(size)); data += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]); std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size); for (int m = 0; m < size; ++m) {
buf.get()[m] = data[m];
}
data += (sizeof(char) * size);
const framework::TensorDesc &desc = var_desc.Tensor_desc(); const framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1; int memory_size = 1;
for (auto l : desc.Dims()) { for (auto l : desc.Dims()) {
memory_size *= l; memory_size *= l;
...@@ -348,8 +289,10 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, ...@@ -348,8 +289,10 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
break; break;
} }
is.read(static_cast<char *>(memory), memory_size * type_size); for (int n = 0; n < memory_size * type_size; ++n) {
is.close(); static_cast<char *>(memory)[n] = data[n];
}
data += (sizeof(char) * memory_size * type_size);
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
...@@ -362,8 +305,12 @@ void Executor<Dtype, P>::InitMemory() { ...@@ -362,8 +305,12 @@ void Executor<Dtype, P>::InitMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
LoadMemory(*var_desc, tensor,
program_.model_path + "/" + var_desc->Name()); char *origin_data =
Get_binary_data(program_.model_path + "/" + var_desc->Name());
char *data = origin_data;
LoadMemory(*var_desc, tensor, data);
delete origin_data;
} else { } else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto tensor = var->template GetMutable<framework::LoDTensor>(); auto tensor = var->template GetMutable<framework::LoDTensor>();
...@@ -375,6 +322,32 @@ void Executor<Dtype, P>::InitMemory() { ...@@ -375,6 +322,32 @@ void Executor<Dtype, P>::InitMemory() {
} }
} }
template <typename Dtype, Precision P>
void Executor<Dtype, P>::InitCombineMemory() {
LOG(kLOG_INFO) << " begin init combine memory";
char *origin_data = Get_binary_data(program_.para_path);
char *data = origin_data;
for (const auto &block : to_predict_program_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
if (var_desc->Persistable()) {
auto tensor = var->template GetMutable<framework::LoDTensor>();
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadMemory(*var_desc, tensor, data);
} else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto tensor = var->template GetMutable<framework::LoDTensor>();
tensor->template mutable_data<Ptype>();
}
}
}
}
delete origin_data;
LOG(kLOG_INFO) << " end init combine memory ";
}
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
const framework::Tensor &t) { const framework::Tensor &t) {
...@@ -385,19 +358,135 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -385,19 +358,135 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
feed_tensor->ShareDataWith(t); feed_tensor->ShareDataWith(t);
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0); to_predict_program_->Block(0);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { auto &ops = ops_of_block_[*to_predict_block.get()];
auto op = ops_of_block_[*to_predict_block.get()][j]; #ifdef PADDLE_MOBILE_PROFILE
op->Run(); std::vector<ProfInfo> profile(ops.size());
#endif
#ifdef PADDLE_EXECUTOR_MULTITHREAD
std::mutex m;
std::condition_variable cv;
std::queue<int> next;
next.push(0);
int rsize = ops.size();
std::vector<int> status(rsize, 0);
auto &threadPool = ThreadPool::getThreadPool();
auto &dep = depManager[0];
auto finishF = [&ops, &m, &cv, &next, &status, &rsize, &dep](int opi) {
std::lock_guard<std::mutex> lk(m);
rsize--;
status[opi] = 2;
for (int i : dep.getNext(opi)) {
bool ok = true;
for (int j : dep.getDeps(i)) {
if (status[j] != 2) {
ok = false;
break;
}
}
if (ok && (status[i] == 0)) {
next.push(i);
}
}
cv.notify_one();
};
for (;;) {
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, [&next, &rsize] { return rsize == 0 || !next.empty(); });
if (rsize == 0) {
break;
}
while (next.size() > 0) {
int opi = next.front();
next.pop();
status[opi] = 1;
threadPool.enqueue([opi, &ops, &finishF, &profile] {
auto &op = ops[opi];
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[opi].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
profile[opi].tid = ThreadPool::getThreadPoolThreadId();
#endif
ops[opi]->Run();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[opi].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
finishF(opi);
});
}
} }
auto ops = ops_of_block_[*to_predict_program_->Block(0)]; #else
for (int i = 0; i < ops.size(); i++) {
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
// to Run
ops[i]->Run();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
#endif
auto last_op = ops.rbegin(); auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs(); auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys(); std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
PADDLE_MOBILE_ENFORCE(out_keys.size() > 0, "the last op contains no output"); PADDLE_MOBILE_ENFORCE(out_keys.size() > 0, "the last op contains no output");
framework::LoDTensor *output_tensor = framework::LoDTensor *output_tensor =
framework::GetVarValue<framework::LoDTensor>(out_keys[0], output_map, framework::GetVarValue<framework::LoDTensor>(out_keys[0], output_map,
*(program_.scope)); *(program_.scope));
return std::shared_ptr<framework::Tensor>(output_tensor); #ifdef PADDLE_MOBILE_PROFILE
#ifdef PADDLE_EXECUTOR_MULTITHREAD
// TODO expose profile info as an interface, user can get them to analysis
// the performance of their deepnet.
FILE *df = fopen("net.dot", "w");
fprintf(df, "digraph {\n");
for (int i = 0; i < ops.size(); i++) {
for (int j : dep.getNext(i)) {
fprintf(df, "op_%d -> op_%d\n", i, j);
}
}
for (int i = 0; i < ops.size(); i++) {
fprintf(df, "op_%d[label=\"%s (%d)\"]\n", i, ops[i]->Type().c_str(), i);
}
fprintf(df, "}\n");
fclose(df);
#endif
FILE *pf = fopen("profile.out", "w");
std::unordered_map<std::string, uint64_t> _tp;
for (int i = 0; i < profile.size(); i++) {
const auto &pInfo = profile[i];
uint64_t timeCost = pInfo.runEnd - pInfo.runBegin;
_tp[ops[i]->Type()] += timeCost;
fprintf(pf, "%d\t%s\t%d\t%llu\t%llu\t%llu\n", i, ops[i]->Type().c_str(),
pInfo.tid, pInfo.runBegin, pInfo.runEnd, timeCost);
}
fclose(pf);
printf("====================[ profile ]======================\n");
using prof_t = std::pair<std::string, uint64_t>;
std::vector<prof_t> _tv(_tp.begin(), _tp.end());
uint64_t _ptotal = 0;
for (auto const &p : _tv) {
_ptotal += p.second;
}
auto compf = [](const prof_t &a, const prof_t &b) {
return a.second > b.second;
};
std::sort(_tv.begin(), _tv.end(), compf);
_tv.push_back(std::make_pair("total", _ptotal));
for (auto const &p : _tv) {
printf("%-16s\t%-10.0f\t%-2.4f\n", p.first.c_str(), (float)p.second,
(float)p.second / _ptotal * 100.0);
}
printf("====================[---------]======================\n");
#endif
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
...@@ -420,5 +509,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -420,5 +509,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
} }
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,51 +14,80 @@ limitations under the License. */ ...@@ -14,51 +14,80 @@ limitations under the License. */
#pragma once #pragma once
#include <memory.h>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/paddle_mobile_object.h"
#include "framework/program/program.h" #include "framework/program/program.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <condition_variable>
#include <mutex>
#include <thread>
#include "common/dep_core.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32> template <typename Dtype = CPU, Precision P = Precision::FP32>
class Loader : PaddleMobileObject { class Loader {
public: public:
/*
* @b load separate format fluid model
* @b 加载分开形式的 fluid 模型
* */
const framework::Program<Dtype, P> Load(const std::string &dirname, const framework::Program<Dtype, P> Load(const std::string &dirname,
bool optimize = true); bool optimize = false,
bool can_add_split = false);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* */
const framework::Program<Dtype, P> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false);
private: private:
void LoadVar(framework::Variable *variable, const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
const framework::VarDesc &var_desc, bool optimize = false,
const std::string &file_path); bool can_add_split = false);
}; };
template <typename Dtype, Precision P = Precision::FP32> template <typename Dtype = CPU, Precision P = Precision::FP32>
class Executor { class Executor {
public: public:
typedef typename PrecisionTrait<P>::ptype Ptype; typedef typename PrecisionTrait<P>::ptype Ptype;
/*
* @b init executor with program load by Loader class
* @b 用 loader load 的 program 实例化 executor
* */
Executor(const framework::Program<Dtype> p, int batch_size = 1, Executor(const framework::Program<Dtype> p, int batch_size = 1,
bool use_optimize = true); bool use_optimize = true);
/*
* @b to predict
* */
std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t); std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
/*
* @b to predict with vector and dim
*
* @b 使用 输入 和 输入的维度信息 进行预测
* */
std::vector<Ptype> Predict(const std::vector<Ptype> &input, std::vector<Ptype> Predict(const std::vector<Ptype> &input,
const std::vector<int64_t> &dims); const std::vector<int64_t> &dims);
protected: protected:
Executor() = default; Executor() = default;
void InitMemory(); void InitMemory();
void LoadMemory(const framework::VarDesc var_desc, void LoadMemory(const framework::VarDesc var_desc,
framework::LoDTensor *tensor, const std::string &file_path); framework::LoDTensor *tensor, char *&data);
void InitCombineMemory();
framework::Program<Dtype> program_; framework::Program<Dtype> program_;
int batch_size_ = 1; int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_; std::shared_ptr<framework::ProgramDesc> to_predict_program_;
...@@ -68,6 +97,16 @@ class Executor { ...@@ -68,6 +97,16 @@ class Executor {
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>> std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_; ops_of_block_;
bool use_optimize_ = false; bool use_optimize_ = false;
#ifdef PADDLE_EXECUTOR_MULTITHREAD
std::vector<depCore> depManager;
#endif
#ifdef PADDLE_MOBILE_PROFILE
struct ProfInfo {
int tid = 0;
uint64_t runBegin = 0UL;
uint64_t runEnd = 0UL;
};
#endif
}; };
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ANDROID
#include "paddle_mobile_jni.h"
#ifdef __cplusplus
extern "C" {
#endif
namespace paddle_mobile {
namespace jni {
using framework::DDim;
using framework::Program;
using framework::Tensor;
using paddle_mobile::CPU;
using std::string;
extern const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
static Executor<CPU> *shared_executor_instance = nullptr;
// toDo mutex lock
// static std::mutex shared_mutex;
Executor<CPU> *getExecutorInstance(const Program<CPU> p, int batch_size,
bool use_optimize) {
if (nullptr == shared_executor_instance) {
shared_executor_instance = new Executor<CPU>(p, batch_size, use_optimize);
}
return shared_executor_instance;
}
string jstring2cppstring(JNIEnv *env, jstring jstr) {
const char *cstr = env->GetStringUTFChars(jstr, 0);
string cppstr(cstr);
env->ReleaseStringUTFChars(jstr, cstr);
return cppstr;
}
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
jclass thiz,
jstring modelPath) {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
bool optimize = true;
auto program = loader.Load(jstring2cppstring(env, modelPath), optimize);
shared_executor_instance = getExecutorInstance(program, 1, optimize);
return shared_executor_instance != nullptr ? JNI_TRUE : JNI_FALSE;
}
JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
JNIEnv *env, jclass thiz, jfloatArray buf) {
jfloatArray result = NULL;
int count = 0;
float *dataPointer = nullptr;
if (nullptr != buf) {
dataPointer = env->GetFloatArrayElements(buf, NULL);
}
framework::Tensor input;
framework::DDim ddim = framework::make_ddim({1, 3, 224, 224});
input.Resize(ddim);
auto input_ptr = input.mutable_data<float>();
for (int i = 0; i < framework::product(ddim); i++) {
input_ptr[i] = dataPointer[i];
}
auto output = shared_executor_instance->Predict(input);
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
return result;
}
JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
jclass thiz) {}
} // namespace jni
} // namespace paddle_mobile
#ifdef __cplusplus
}
#endif
#endif
...@@ -13,25 +13,39 @@ See the License for the specific language governing permissions and ...@@ -13,25 +13,39 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifdef ANDROID
#include <jni.h>
#include "common/log.h"
#include "framework/tensor.h"
#include "io/io.h"
#ifdef __cplusplus
extern "C" {
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace jni {
/**
// inline proto::VarType::Type ToDataType(std::type_index type) { * load model & params of the net for android
// using namespace paddle_mobile::framework::proto; */
// if (typeid(float).hash_code() == type.hash_code()) { JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
// return proto::VarType::FP32; jclass thiz,
// } else if (typeid(double).hash_code() == type.hash_code()) { jstring modelPath);
// return proto::VarType::FP64;
// } else if (typeid(int).hash_code() == type.hash_code()) { /**
// return proto::VarType::INT32; * object detection for anroid
// } else if (typeid(int64_t).hash_code() == type.hash_code()) { */
// return proto::VarType::INT64; JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
// } else if (typeid(bool).hash_code() == type.hash_code()) { JNIEnv *env, jclass thiz, jfloatArray buf);
// return proto::VarType::BOOL;
// } else { /**
//// PADDLE_THROW("Not supported"); * clear data of the net when destroy for android
// } */
// } JNIEXPORT void JNICALL Java_com_baidu_paddle_PMLL_clear(JNIEnv *env,
} // namespace framework jclass thiz);
} // namespace jni
} // namespace paddle_mobile } // namespace paddle_mobile
#ifdef __cplusplus
}
#endif
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册